workers.py 10 KB
Newer Older
1
2
3
# -*- coding: utf-8 -*-
# Copyright (C) 2013 Centre de données Astrophysiques de Marseille
# Copyright (C) 2013-2014 Institute of Astronomy
4
# Copyright (C) 2014 Laboratoire d'Astrophysique de Marseille, AMU
5
6
# Copyright (C) 2014 Yannick Roehlly <yannick@iaora.eu>
# Licensed under the CeCILL-v2 licence - see Licence_CeCILL_V2-en.txt
7
# Author: Yannick Roehlly, Médéric Boquien & Denis Burgarella
8

9
from copy import deepcopy
10

11
import numpy as np
12

13
from .utils import save_chi2, compute_corr_dz, compute_chi2, weighted_param
14
from ...warehouse import SedWarehouse
15

Médéric Boquien's avatar
Médéric Boquien committed
16

17
def init_sed(models, counter):
18
    """Initializer of the pool of processes to share variables between workers.
19
20
21

    Parameters
    ----------
22
23
    models: ModelsManagers
        Manages the storage of the computed models (fluxes and properties).
24
25
    counter: Counter class object
        Counter for the number of models computed
26
27

    """
28
    global gbl_warehouse, gbl_models, gbl_counter
29

30
    gbl_warehouse = SedWarehouse()
31

32
    gbl_models = models
33
    gbl_counter = counter
Médéric Boquien's avatar
Médéric Boquien committed
34

 Hector Salas's avatar
Hector Salas committed
35

36
def init_analysis(models, results, counter):
37
    """Initializer of the pool of processes to share variables between workers.
38
39
40

    Parameters
    ----------
41
42
43
44
    models: ModelsManagers
        Manages the storage of the computed models (fluxes and properties).
    results: ResultsManager
        Contains the estimates and errors on the properties.
45
46
    counter: Counter class object
        Counter for the number of objects analysed
47
48

    """
49
    global gbl_models, gbl_obs, gbl_results, gbl_counter
50

51
    gbl_models = models
52
    gbl_obs = models.obs
53
    gbl_results = results
54
    gbl_counter = counter
55
56


57
def init_bestfit(conf, params, observations, results, counter):
58
    """Initializer of the pool of processes to share variables between workers.
59

60
61
62
63
64
65
66
67
68
69
    Parameters
    ----------
    conf: dict
        Contains the pcigale.ini configuration.
    params: ParametersManager
        Manages the parameters from a 1D index.
    observations: astropy.Table
        Contains the observations including the filter names.
    results: ResultsManager
        Contains the estimates and errors on the properties.
70
71
    counter: Counter class object
        Counter for the number of objects analysed
72

73
    """
74
    global gbl_warehouse, gbl_conf, gbl_params, gbl_obs
75
    global gbl_results, gbl_counter
76

77
    gbl_warehouse = SedWarehouse()
78

79
80
81
82
    gbl_conf = conf
    gbl_params = params
    gbl_obs = observations
    gbl_results = results
83
    gbl_counter = counter
Médéric Boquien's avatar
Médéric Boquien committed
84

 Hector Salas's avatar
Hector Salas committed
85

86
87
88
def sed(idx, midx):
    """Worker process to retrieve a SED and affect the relevant data to an
    instance of ModelsManager.
89
90
91

    Parameters
    ----------
92
    idx: int
93
94
95
        Index of the model within the current block of models.
    midx: int
        Global index of the model.
96
97

    """
98
99
    sed = gbl_warehouse.get_sed(gbl_models.params.modules,
                                gbl_models.params.from_index(midx))
100

101
102
103
    # The redshift is the fastest varying variable but we want to store it
    # as the slowest one so that models at a given redshift are contiguous
    idx = (idx % gbl_models.nz) * gbl_models.nm + idx // gbl_models.nz
104
    if 'sfh.age' in sed.info and sed.info['sfh.age'] > sed.info['universe.age']:
105
        for band in gbl_models.flux:
106
            gbl_models.flux[band][idx] = np.nan
107
        for prop in gbl_models.extprop:
108
            gbl_models.extprop[prop][idx] = np.nan
109
        for prop in gbl_models.intprop:
110
            gbl_models.intprop[prop][idx] = np.nan
111
    else:
112
        for band in gbl_models.flux:
113
            gbl_models.flux[band][idx] = sed.compute_fnu(band)
114
        for prop in gbl_models.extprop:
115
            gbl_models.extprop[prop][idx] = sed.info[prop]
116
        for prop in gbl_models.intprop:
117
            gbl_models.intprop[prop][idx] = sed.info[prop]
118
    gbl_models.index[idx] = midx
119

120
    gbl_counter.inc()
121

Médéric Boquien's avatar
Médéric Boquien committed
122

123
def analysis(idx, obs):
124
125
    """Worker process to analyse the PDF and estimate parameters values and
    store them in an instance of ResultsManager.
126
127
128

    Parameters
    ----------
129
130
    idx: int
        Index of the observation. This is necessary to put the computed values
131
        at the right location in the ResultsManager.
132
133
134
135
    obs: row
        Input data for an individual object

    """
136
137
    np.seterr(invalid='ignore')

138
    if obs.redshift >= 0.:
139
140
        # We pick the the models with the closest redshift using a slice to
        # work on views of the arrays and not on copies to save on RAM.
141
142
        z = np.array(
            gbl_models.conf['sed_modules_params']['redshifting']['redshift'])
143
144
145
146
        length = gbl_models.nm
        zidx = np.abs(obs.redshift-z).argmin()
        wz = slice(zidx * length, (zidx + 1) * length, 1)
        corr_dz = compute_corr_dz(z[zidx], obs)
147
148
    else:  # We do not know the redshift so we use the full grid
        wz = slice(0, None, 1)
149
        corr_dz = 1.
150

151
    chi2, scaling = compute_chi2(gbl_models, obs, corr_dz, wz,
152
                                 gbl_models.conf['analysis_params']['lim_flag'])
153

154
    if np.any(chi2 < -np.log(np.finfo(np.float64).tiny) * 2.):
155
156
        # We use the exponential probability associated with the χ² as
        # likelihood function.
157
        likelihood = np.exp(-.5 * chi2)
158
159
160
161
        wlikely = np.where(np.isfinite(likelihood))
        # If all the models are valid, it is much more efficient to use a slice
        if likelihood.size == wlikely[0].size:
            wlikely = slice(None, None)
162
163
164
        likelihood = likelihood[wlikely]
        scaling_l = scaling[wlikely]

165
        gbl_results.bayes.weight[idx] = np.nansum(likelihood)
166
        likelihood *= 1. / gbl_results.bayes.weight[idx]
167

Médéric Boquien's avatar
Médéric Boquien committed
168
169
        # We compute the weighted average and standard deviation using the
        # likelihood as weight.
170
171
        for prop in gbl_results.bayes.intmean:
            if prop.endswith('_log'):
172
                values = gbl_models.intprop[prop[:-4]][wz]
173
174
                _ = np.log10
            else:
175
                values = gbl_models.intprop[prop][wz]
176
                _ = lambda x: x
177
            mean, std = weighted_param(_(values[wlikely]), likelihood)
178
179
            gbl_results.bayes.intmean[prop][idx] = mean
            gbl_results.bayes.interror[prop][idx] = std
180
            if gbl_models.conf['analysis_params']['save_chi2'] is True:
181
                save_chi2(obs, prop, gbl_models, chi2, _(values))
182

183
184
        for prop in gbl_results.bayes.extmean:
            if prop.endswith('_log'):
185
                values = gbl_models.extprop[prop[:-4]][wz]
186
                _ = np.log10
187
            else:
188
                values = gbl_models.extprop[prop][wz]
189
                _ = lambda x: x
190
191
            mean, std = weighted_param(_(values[wlikely] * scaling_l * corr_dz),
                                       likelihood)
192
193
            gbl_results.bayes.extmean[prop][idx] = mean
            gbl_results.bayes.exterror[prop][idx] = std
194
            if gbl_models.conf['analysis_params']['save_chi2'] is True:
195
                save_chi2(obs, prop, gbl_models, chi2,
196
                          _(values * scaling * corr_dz))
197

198
199
200
201
202
203
204
205
206
        for band in gbl_results.bayes.fluxmean:
            values = gbl_models.flux[band][wz]
            mean, std = weighted_param(values[wlikely] * scaling_l,
                                       likelihood)
            gbl_results.bayes.fluxmean[band][idx] = mean
            gbl_results.bayes.fluxerror[band][idx] = std
            if gbl_models.conf['analysis_params']['save_chi2'] is True:
                save_chi2(obs, band, gbl_models, chi2, values * scaling)

207
208
        best_idx_z = np.nanargmin(chi2)
        gbl_results.best.chi2[idx] = chi2[best_idx_z]
209
        gbl_results.best.scaling[idx] = scaling[best_idx_z]
210
        gbl_results.best.index[idx] = gbl_models.index[wz][best_idx_z]
211

212
    gbl_counter.inc()
 Hector Salas's avatar
Hector Salas committed
213

214
215
216
217
218
219
220
221
222
223
224
225
226
227
def bestfit(oidx, obs):
    """Worker process to compute and save the best fit.

    Parameters
    ----------
    oidx: int
        Index of the observation. This is necessary to put the computed values
        at the right location in the ResultsManager.
    obs: row
        Input data for an individual object

    """
    np.seterr(invalid='ignore')

228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
    if np.isfinite(gbl_results.best.index[oidx]):
        best_index = int(gbl_results.best.index[oidx])

        # We compute the model at the exact redshift not to have to correct for
        # the difference between the object and the grid redshifts.
        params = deepcopy(gbl_params.from_index(best_index))
        if obs.redshift >= 0.:
            model_z = params[gbl_params.modules.index('redshifting')]['redshift']
            params[gbl_params.modules.index('redshifting')]['redshift'] = obs.redshift
            # Correct fluxes for the fact that the scaling factor was computed
            # on the grid redshift. Because of the difference in redshift the
            # distance is different and must be reflected in the scaling
            corr_scaling = compute_corr_dz(model_z, obs) / \
                           compute_corr_dz(obs.redshift, obs)
        else:  # The model redshift is always exact in redhisfting mode
            corr_scaling = 1.

        sed = deepcopy(gbl_warehouse.get_sed(gbl_params.modules, params))

        # Handle the case where the distance does not correspond to the redshift.
        if obs.redshift >= 0.:
            corr_dz = (obs.distance / sed.info['universe.luminosity_distance']) ** 2
        else:
            corr_dz = 1.

        scaling = gbl_results.best.scaling[oidx] * corr_scaling

        for band in gbl_results.best.flux:
            gbl_results.best.flux[band][oidx] = sed.compute_fnu(band) * scaling

        # If the distance is user defined, the redshift-based luminosity distance
        # of the model is probably incorrect so we replace it
        sed.add_info('universe.luminosity_distance', obs.distance, force=True)
        for prop in gbl_results.best.intprop:
            gbl_results.best.intprop[prop][oidx] = sed.info[prop]

        for prop in gbl_results.best.extprop:
            gbl_results.best.extprop[prop][oidx] = sed.info[prop] * scaling \
                                                       * corr_dz

        if gbl_conf['analysis_params']["save_best_sed"]:
            sed.to_fits(f"out/{obs.id}", scaling * corr_dz)
270

271
    gbl_counter.inc()