workers.py 10.1 KB
Newer Older
1
2
3
# -*- coding: utf-8 -*-
# Copyright (C) 2013 Centre de données Astrophysiques de Marseille
# Copyright (C) 2013-2014 Institute of Astronomy
4
# Copyright (C) 2014 Laboratoire d'Astrophysique de Marseille, AMU
5
6
# Copyright (C) 2014 Yannick Roehlly <yannick@iaora.eu>
# Licensed under the CeCILL-v2 licence - see Licence_CeCILL_V2-en.txt
7
# Author: Yannick Roehlly, Médéric Boquien & Denis Burgarella
8

9
from copy import deepcopy
10

11
import numpy as np
12

13
from ..utils import nothread
14
from .utils import save_chi2, compute_corr_dz, compute_chi2, weighted_param
15
from ...warehouse import SedWarehouse
16

Médéric Boquien's avatar
Médéric Boquien committed
17

18
def init_sed(models, counter):
19
    """Initializer of the pool of processes to share variables between workers.
20
21
22

    Parameters
    ----------
23
24
    models: ModelsManagers
        Manages the storage of the computed models (fluxes and properties).
25
26
    counter: Counter class object
        Counter for the number of models computed
27
28

    """
29
    global gbl_warehouse, gbl_models, gbl_counter
30

31
32
33
34
    # Limit the number of threads to 1 if we use MKL in order to limit the
    # oversubscription of the CPU/RAM.
    nothread()

35
    gbl_warehouse = SedWarehouse()
36

37
    gbl_models = models
38
    gbl_counter = counter
Médéric Boquien's avatar
Médéric Boquien committed
39

 Hector Salas's avatar
Hector Salas committed
40

41
def init_analysis(models, results, counter):
42
    """Initializer of the pool of processes to share variables between workers.
43
44
45

    Parameters
    ----------
46
47
48
49
    models: ModelsManagers
        Manages the storage of the computed models (fluxes and properties).
    results: ResultsManager
        Contains the estimates and errors on the properties.
50
51
    counter: Counter class object
        Counter for the number of objects analysed
52
53

    """
54
    global gbl_models, gbl_obs, gbl_results, gbl_counter
55

56
    gbl_models = models
57
    gbl_obs = models.obs
58
    gbl_results = results
59
    gbl_counter = counter
60
61


62
def init_bestfit(conf, params, observations, results, counter):
63
    """Initializer of the pool of processes to share variables between workers.
64

65
66
67
68
69
70
71
72
73
74
    Parameters
    ----------
    conf: dict
        Contains the pcigale.ini configuration.
    params: ParametersManager
        Manages the parameters from a 1D index.
    observations: astropy.Table
        Contains the observations including the filter names.
    results: ResultsManager
        Contains the estimates and errors on the properties.
75
76
    counter: Counter class object
        Counter for the number of objects analysed
77

78
    """
79
    global gbl_warehouse, gbl_conf, gbl_params, gbl_obs
80
    global gbl_results, gbl_counter
81

82
    gbl_warehouse = SedWarehouse()
83

84
85
86
87
    gbl_conf = conf
    gbl_params = params
    gbl_obs = observations
    gbl_results = results
88
    gbl_counter = counter
Médéric Boquien's avatar
Médéric Boquien committed
89

 Hector Salas's avatar
Hector Salas committed
90

91
92
93
def sed(idx, midx):
    """Worker process to retrieve a SED and affect the relevant data to an
    instance of ModelsManager.
94
95
96

    Parameters
    ----------
97
    idx: int
98
99
100
        Index of the model within the current block of models.
    midx: int
        Global index of the model.
101
102

    """
103
104
    sed = gbl_warehouse.get_sed(gbl_models.params.modules,
                                gbl_models.params.from_index(midx))
105

106
107
108
    # The redshift is the fastest varying variable but we want to store it
    # as the slowest one so that models at a given redshift are contiguous
    idx = (idx % gbl_models.nz) * gbl_models.nm + idx // gbl_models.nz
109
    if 'sfh.age' in sed.info and sed.info['sfh.age'] > sed.info['universe.age']:
110
        for band in gbl_models.flux:
111
            gbl_models.flux[band][idx] = np.nan
112
        for prop in gbl_models.extprop:
113
            gbl_models.extprop[prop][idx] = np.nan
114
        for prop in gbl_models.intprop:
115
            gbl_models.intprop[prop][idx] = np.nan
116
    else:
117
        for band in gbl_models.flux:
118
            gbl_models.flux[band][idx] = sed.compute_fnu(band)
119
        for prop in gbl_models.extprop:
120
            gbl_models.extprop[prop][idx] = sed.info[prop]
121
        for prop in gbl_models.intprop:
122
            gbl_models.intprop[prop][idx] = sed.info[prop]
123
    gbl_models.index[idx] = midx
124

125
    gbl_counter.inc()
126

Médéric Boquien's avatar
Médéric Boquien committed
127

128
def analysis(idx, obs):
129
130
    """Worker process to analyse the PDF and estimate parameters values and
    store them in an instance of ResultsManager.
131
132
133

    Parameters
    ----------
134
135
    idx: int
        Index of the observation. This is necessary to put the computed values
136
        at the right location in the ResultsManager.
137
138
139
140
    obs: row
        Input data for an individual object

    """
141
142
    np.seterr(invalid='ignore')

143
    if obs.redshift >= 0.:
144
145
        # We pick the the models with the closest redshift using a slice to
        # work on views of the arrays and not on copies to save on RAM.
146
147
        z = np.array(
            gbl_models.conf['sed_modules_params']['redshifting']['redshift'])
148
149
150
151
        length = gbl_models.nm
        zidx = np.abs(obs.redshift-z).argmin()
        wz = slice(zidx * length, (zidx + 1) * length, 1)
        corr_dz = compute_corr_dz(z[zidx], obs)
152
153
    else:  # We do not know the redshift so we use the full grid
        wz = slice(0, None, 1)
154
        corr_dz = 1.
155

156
    chi2, scaling = compute_chi2(gbl_models, obs, corr_dz, wz,
157
                                 gbl_models.conf['analysis_params']['lim_flag'])
158

159
    if np.any(chi2 < -np.log(np.finfo(np.float64).tiny) * 2.):
160
161
        # We use the exponential probability associated with the χ² as
        # likelihood function.
162
        likelihood = np.exp(-.5 * chi2)
163
164
165
166
        wlikely = np.where(np.isfinite(likelihood))
        # If all the models are valid, it is much more efficient to use a slice
        if likelihood.size == wlikely[0].size:
            wlikely = slice(None, None)
167
168
169
        likelihood = likelihood[wlikely]
        scaling_l = scaling[wlikely]

170
        gbl_results.bayes.weight[idx] = np.nansum(likelihood)
171

Médéric Boquien's avatar
Médéric Boquien committed
172
173
        # We compute the weighted average and standard deviation using the
        # likelihood as weight.
174
175
        for prop in gbl_results.bayes.intmean:
            if prop.endswith('_log'):
176
                values = gbl_models.intprop[prop[:-4]][wz]
177
178
                _ = np.log10
            else:
179
                values = gbl_models.intprop[prop][wz]
180
                _ = lambda x: x
181
            mean, std = weighted_param(_(values[wlikely]), likelihood)
182
183
            gbl_results.bayes.intmean[prop][idx] = mean
            gbl_results.bayes.interror[prop][idx] = std
184
185
            if gbl_models.conf['analysis_params']['save_chi2'] is True:
                save_chi2(obs, prop, gbl_models, chi2, values)
186

187
188
        for prop in gbl_results.bayes.extmean:
            if prop.endswith('_log'):
189
                values = gbl_models.extprop[prop[:-4]][wz]
190
                _ = np.log10
191
            else:
192
                values = gbl_models.extprop[prop][wz]
193
                _ = lambda x: x
194
195
            mean, std = weighted_param(_(values[wlikely] * scaling_l * corr_dz),
                                       likelihood)
196
197
            gbl_results.bayes.extmean[prop][idx] = mean
            gbl_results.bayes.exterror[prop][idx] = std
198
            if gbl_models.conf['analysis_params']['save_chi2'] is True:
199
200
                save_chi2(obs, prop, gbl_models, chi2,
                          values * scaling * corr_dz)
201

202
203
204
205
206
207
208
209
210
        for band in gbl_results.bayes.fluxmean:
            values = gbl_models.flux[band][wz]
            mean, std = weighted_param(values[wlikely] * scaling_l,
                                       likelihood)
            gbl_results.bayes.fluxmean[band][idx] = mean
            gbl_results.bayes.fluxerror[band][idx] = std
            if gbl_models.conf['analysis_params']['save_chi2'] is True:
                save_chi2(obs, band, gbl_models, chi2, values * scaling)

211
212
        best_idx_z = np.nanargmin(chi2)
        gbl_results.best.chi2[idx] = chi2[best_idx_z]
213
        gbl_results.best.scaling[idx] = scaling[best_idx_z]
214
        gbl_results.best.index[idx] = gbl_models.index[wz][best_idx_z]
215

216
    gbl_counter.inc()
 Hector Salas's avatar
Hector Salas committed
217

218
219
220
221
222
223
224
225
226
227
228
229
230
231
def bestfit(oidx, obs):
    """Worker process to compute and save the best fit.

    Parameters
    ----------
    oidx: int
        Index of the observation. This is necessary to put the computed values
        at the right location in the ResultsManager.
    obs: row
        Input data for an individual object

    """
    np.seterr(invalid='ignore')

232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
    if np.isfinite(gbl_results.best.index[oidx]):
        best_index = int(gbl_results.best.index[oidx])

        # We compute the model at the exact redshift not to have to correct for
        # the difference between the object and the grid redshifts.
        params = deepcopy(gbl_params.from_index(best_index))
        if obs.redshift >= 0.:
            model_z = params[gbl_params.modules.index('redshifting')]['redshift']
            params[gbl_params.modules.index('redshifting')]['redshift'] = obs.redshift
            # Correct fluxes for the fact that the scaling factor was computed
            # on the grid redshift. Because of the difference in redshift the
            # distance is different and must be reflected in the scaling
            corr_scaling = compute_corr_dz(model_z, obs) / \
                           compute_corr_dz(obs.redshift, obs)
        else:  # The model redshift is always exact in redhisfting mode
            corr_scaling = 1.

        sed = deepcopy(gbl_warehouse.get_sed(gbl_params.modules, params))

        # Handle the case where the distance does not correspond to the redshift.
        if obs.redshift >= 0.:
            corr_dz = (obs.distance / sed.info['universe.luminosity_distance']) ** 2
        else:
            corr_dz = 1.

        scaling = gbl_results.best.scaling[oidx] * corr_scaling

        for band in gbl_results.best.flux:
            gbl_results.best.flux[band][oidx] = sed.compute_fnu(band) * scaling

        # If the distance is user defined, the redshift-based luminosity distance
        # of the model is probably incorrect so we replace it
        sed.add_info('universe.luminosity_distance', obs.distance, force=True)
        for prop in gbl_results.best.intprop:
            gbl_results.best.intprop[prop][oidx] = sed.info[prop]

        for prop in gbl_results.best.extprop:
            gbl_results.best.extprop[prop][oidx] = sed.info[prop] * scaling \
                                                       * corr_dz

        if gbl_conf['analysis_params']["save_best_sed"]:
            sed.to_fits(f"out/{obs.id}", scaling * corr_dz)
274

275
    gbl_counter.inc()