__init__.py 10.9 KB
Newer Older
1
# -*- coding: utf-8 -*-
2
# Copyright (C) 2014 Laboratoire d'Astrophysique de Marseille, AMU
3
# Copyright (C) 2013 Centre de données Astrophysiques de Marseille
4 5
# Copyright (C) 2013-2014 Institute of Astronomy
# Copyright (C) 2013-2014 Yannick Roehlly <yannick@iaora.eu>
6
# Licensed under the CeCILL-v2 licence - see Licence_CeCILL_V2-en.txt
7
# Author: Yannick Roehlly, Médéric Boquien & Denis Burgarella
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27

"""
Probability Density Function analysis module
============================================

This module builds the probability density functions (PDF) of the SED
parameters to compute their moments.

The models corresponding to all possible combinations of parameters are
computed and their fluxes in the same filters as the observations are
integrated. These fluxes are compared to the observed ones to compute the
χ² value of the fitting. This χ² give a probability that is associated with
the model values for the parameters.

At the end, for each parameter, the probability-weighted mean and standard
deviation are computed and the best fitting model (the one with the least
reduced χ²) is given for each observation.

"""

28
from collections import OrderedDict
29
import ctypes
30
import multiprocessing as mp
31 32 33 34 35
from multiprocessing.sharedctypes import RawArray
import time

import numpy as np

36
from ...utils import read_table
37
from .. import AnalysisModule
38
from .utils import save_results, analyse_chi2
39
from ...warehouse import SedWarehouse
40
from .workers import sed as worker_sed
41 42
from .workers import init_sed as init_worker_sed
from .workers import init_analysis as init_worker_analysis
43
from .workers import analysis as worker_analysis
44
from ...managers.observations import ObservationsManager
45
from ...managers.parameters import ParametersManager
46

47

48 49 50
class PdfAnalysis(AnalysisModule):
    """PDF analysis module"""

51
    parameter_list = OrderedDict([
52
        ("variables", (
53
            "cigale_string_list()",
54 55 56
            "List of the physical properties to estimate. Leave empty to "
            "analyse all the physical properties (not recommended when there "
            "are many models).",
57
            ["sfh.sfr", "sfh.sfr10Myrs", "sfh.sfr100Myrs"]
58 59
        )),
        ("save_best_sed", (
60
            "boolean()",
61 62 63
            "If true, save the best SED for each observation to a file.",
            False
        )),
64
        ("save_chi2", (
65
            "boolean()",
66
            "If true, for each observation and each analysed variable save "
67
            "the reduced chi2.",
68 69 70
            False
        )),
        ("save_pdf", (
71
            "boolean()",
72 73
            "If true, for each observation and each analysed variable save "
            "the probability density function.",
74 75
            False
        )),
76
        ("lim_flag", (
77
            "boolean()",
78 79 80
            "If true, for each object check whether upper limits are present "
            "and analyse them.",
            False
81 82
        )),
        ("mock_flag", (
83
            "boolean()",
84 85 86
            "If true, for each object we create a mock object "
            "and analyse them.",
            False
87 88 89 90 91 92 93 94
        )),
        ("redshift_decimals", (
            "integer()",
            "When redshifts are not given explicitly in the redshifting "
            "module, number of decimals to round the observed redshifts to "
            "compute the grid of models. To disable rounding give a negative "
            "value. Do not round if you use narrow-band filters.",
            2
95 96 97
        ))
    ])

98
    def process(self, conf):
99 100
        """Process with the psum analysis.

101 102 103 104 105
        The analysis is done in two steps which can both run on multiple
        processors to run faster. The first step is to compute all the fluxes
        associated with each model as well as ancillary data such as the SED
        information. The second step is to carry out the analysis of each
        object, considering all models at once.
106 107 108

        Parameters
        ----------
109 110
        conf: dictionary
            Contents of pcigale.ini in the form of a dictionary
111 112

        """
113
        np.seterr(invalid='ignore')
114

115 116
        print("Initialising the analysis module... ")

117
        # Rename the output directory if it exists
118
        self.prepare_dirs()
119

120
        # Initalise variables from input arguments.
121 122 123 124
        variables = conf['analysis_params']["variables"]
        variables_nolog = [variable[:-4] if variable.endswith('_log') else
                           variable for variable in variables]
        n_variables = len(variables)
125 126 127
        save = {key: conf['analysis_params']["save_{}".format(key)] for key in
                ["best_sed", "chi2", "pdf"]}
        lim_flag = conf['analysis_params']["lim_flag"]
128

129
        filters = [name for name in conf['bands'] if not
130
                   name.endswith('_err')]
131
        n_filters = len(filters)
132 133 134

        # Read the observation table and complete it by adding error where
        # none is provided and by adding the systematic deviation.
135 136
        obs = ObservationsManager(conf)
        n_obs = len(obs.table)
137

138
        z = np.array(conf['sed_modules_params']['redshifting']['redshift'])
139

140
        # The parameters manager allows us to retrieve the models parameters
141 142 143 144
        # from a 1D index. This is useful in that we do not have to create
        # a list of parameters as they are computed on-the-fly. It also has
        # nice goodies such as finding the index of the first parameter to
        # have changed between two indices or the number of models.
145
        params = ParametersManager(conf)
146 147
        n_params = params.size

148
        # Retrieve an arbitrary SED to obtain the list of output parameters
149
        warehouse = SedWarehouse()
150
        sed = warehouse.get_sed(conf['sed_modules'], params.from_index(0))
151 152 153
        info = list(sed.info.keys())
        info.sort()
        n_info = len(info)
154 155
        del warehouse, sed

156 157
        print("Computing the models fluxes...")

158 159 160 161 162 163 164
        # Arrays where we store the data related to the models. For memory
        # efficiency reasons, we use RawArrays that will be passed in argument
        # to the pool. Each worker will fill a part of the RawArrays. It is
        # important that there is no conflict and that two different workers do
        # not write on the same section.
        # We put the shape in a tuple along with the RawArray because workers
        # need to know the shape to create the numpy array from the RawArray.
165
        model_fluxes = (RawArray(ctypes.c_double, n_params * n_filters),
166
                        (n_filters, n_params))
167
        model_variables = (RawArray(ctypes.c_double, n_params * n_variables),
168
                           (n_variables, n_params))
169

170
        initargs = (params, filters, variables_nolog, model_fluxes,
171
                    model_variables, time.time(), mp.Value('i', 0))
172
        if conf['cores'] == 1:  # Do not create a new process
173
            init_worker_sed(*initargs)
174 175
            for idx in range(n_params):
                worker_sed(idx)
176
        else:  # Compute the models in parallel
177
            with mp.Pool(processes=conf['cores'], initializer=init_worker_sed,
178
                         initargs=initargs) as pool:
179
                pool.map(worker_sed, range(n_params))
180

181
        print("\nAnalysing models...")
182

183 184
        # We use RawArrays for the same reason as previously
        analysed_averages = (RawArray(ctypes.c_double, n_obs * n_variables),
Médéric Boquien's avatar
Médéric Boquien committed
185
                             (n_obs, n_variables))
186
        analysed_std = (RawArray(ctypes.c_double, n_obs * n_variables),
Médéric Boquien's avatar
Médéric Boquien committed
187
                        (n_obs, n_variables))
188
        best_fluxes = (RawArray(ctypes.c_double, n_obs * n_filters),
Médéric Boquien's avatar
Médéric Boquien committed
189
                       (n_obs, n_filters))
190 191 192 193 194
        best_parameters = (RawArray(ctypes.c_double, n_obs * n_info),
                           (n_obs, n_info))
        best_chi2 = (RawArray(ctypes.c_double, n_obs), (n_obs))
        best_chi2_red = (RawArray(ctypes.c_double, n_obs), (n_obs))

195
        initargs = (params, filters, variables, z, model_fluxes,
196 197 198 199
                    model_variables, time.time(), mp.Value('i', 0),
                    analysed_averages, analysed_std, best_fluxes,
                    best_parameters, best_chi2, best_chi2_red, save, lim_flag,
                    n_obs)
200
        if conf['cores'] == 1:  # Do not create a new process
201
            init_worker_analysis(*initargs)
202
            for idx, obs in enumerate(obs.table):
203
                worker_analysis(idx, obs)
204
        else:  # Analyse observations in parallel
205 206
            with mp.Pool(processes=conf['cores'],
                         initializer=init_worker_analysis,
207
                         initargs=initargs) as pool:
208
                pool.starmap(worker_analysis, enumerate(obs.table))
209

210
        analyse_chi2(best_chi2_red)
211

212 213
        print("\nSaving results...")

214
        save_results("results", obs.table['id'], variables, analysed_averages,
215 216
                     analysed_std, best_chi2, best_chi2_red, best_parameters,
                     best_fluxes, filters, info)
217

218
        if conf['analysis_params']['mock_flag'] is True:
219 220 221

            print("\nMock analysis...")

222 223 224 225
            # For the mock analysis we do not save the ancillary files
            for k in save:
                save[k] = False

226 227
            obs_fluxes = np.array([obs.table[name] for name in filters]).T
            obs_errors = np.array([obs.table[name + "_err"] for name in
Médéric Boquien's avatar
Médéric Boquien committed
228
                                   filters]).T
229
            mock_fluxes = obs_fluxes.copy()
230 231
            bestmod_fluxes = np.ctypeslib.as_array(best_fluxes[0])
            bestmod_fluxes = bestmod_fluxes.reshape(best_fluxes[1])
232
            wdata = np.where((obs_fluxes > 0.) & (obs_errors > 0.))
233 234
            mock_fluxes[wdata] = np.random.normal(bestmod_fluxes[wdata],
                                                  obs_errors[wdata])
Médéric Boquien's avatar
Médéric Boquien committed
235

236
            for idx, name in enumerate(filters):
237
                obs.table[name] = mock_fluxes[:, idx]
Médéric Boquien's avatar
Médéric Boquien committed
238

239
            initargs = (params, filters, variables, z, model_fluxes,
240 241 242 243
                        model_variables, time.time(), mp.Value('i', 0),
                        analysed_averages, analysed_std, best_fluxes,
                        best_parameters, best_chi2, best_chi2_red, save,
                        lim_flag, n_obs)
244
            if conf['cores'] == 1:  # Do not create a new process
245
                init_worker_analysis(*initargs)
246
                for idx, mock in enumerate(obs.table):
247 248
                    worker_analysis(idx, mock)
            else:  # Analyse observations in parallel
249 250
                with mp.Pool(processes=conf['cores'],
                             initializer=init_worker_analysis,
251
                             initargs=initargs) as pool:
252
                    pool.starmap(worker_analysis, enumerate(obs.table))
253

254
            print("\nSaving results...")
255

256
            save_results("results_mock", obs.table['id'], variables,
257 258 259
                         analysed_averages, analysed_std, best_chi2,
                         best_chi2_red, best_parameters, best_fluxes, filters,
                         info)
260 261

        print("Run completed!")
262

263

264 265
# AnalysisModule to be returned by get_module
Module = PdfAnalysis