workers.py 10 KB
Newer Older
1 2 3
# -*- coding: utf-8 -*-
# Copyright (C) 2013 Centre de données Astrophysiques de Marseille
# Copyright (C) 2013-2014 Institute of Astronomy
4
# Copyright (C) 2014 Laboratoire d'Astrophysique de Marseille, AMU
5 6
# Copyright (C) 2014 Yannick Roehlly <yannick@iaora.eu>
# Licensed under the CeCILL-v2 licence - see Licence_CeCILL_V2-en.txt
7
# Author: Yannick Roehlly, Médéric Boquien & Denis Burgarella
8

9
from copy import deepcopy
10

11
import numpy as np
12

13
from .utils import save_chi2, compute_corr_dz, compute_chi2, weighted_param
14
from ...warehouse import SedWarehouse
15

Médéric Boquien's avatar
Médéric Boquien committed
16

17
def init_sed(models, counter):
18
    """Initializer of the pool of processes to share variables between workers.
19 20 21

    Parameters
    ----------
22 23
    models: ModelsManagers
        Manages the storage of the computed models (fluxes and properties).
24 25
    counter: Counter class object
        Counter for the number of models computed
26 27

    """
28
    global gbl_warehouse, gbl_models, gbl_counter
29

30
    gbl_warehouse = SedWarehouse()
31

32
    gbl_models = models
33
    gbl_counter = counter
Médéric Boquien's avatar
Médéric Boquien committed
34

 Hector Salas's avatar
Hector Salas committed
35

36
def init_analysis(models, results, counter):
37
    """Initializer of the pool of processes to share variables between workers.
38 39 40

    Parameters
    ----------
41 42 43 44
    models: ModelsManagers
        Manages the storage of the computed models (fluxes and properties).
    results: ResultsManager
        Contains the estimates and errors on the properties.
45 46
    counter: Counter class object
        Counter for the number of objects analysed
47 48

    """
49
    global gbl_models, gbl_obs, gbl_results, gbl_counter
50

51
    gbl_models = models
52
    gbl_obs = models.obs
53
    gbl_results = results
54
    gbl_counter = counter
55 56


57
def init_bestfit(conf, params, observations, results, counter):
58
    """Initializer of the pool of processes to share variables between workers.
59

60 61 62 63 64 65 66 67 68 69
    Parameters
    ----------
    conf: dict
        Contains the pcigale.ini configuration.
    params: ParametersManager
        Manages the parameters from a 1D index.
    observations: astropy.Table
        Contains the observations including the filter names.
    results: ResultsManager
        Contains the estimates and errors on the properties.
70 71
    counter: Counter class object
        Counter for the number of objects analysed
72

73
    """
74
    global gbl_warehouse, gbl_conf, gbl_params, gbl_obs
75
    global gbl_results, gbl_counter
76

77
    gbl_warehouse = SedWarehouse()
78

79 80 81 82
    gbl_conf = conf
    gbl_params = params
    gbl_obs = observations
    gbl_results = results
83
    gbl_counter = counter
Médéric Boquien's avatar
Médéric Boquien committed
84

 Hector Salas's avatar
Hector Salas committed
85

86 87 88
def sed(idx, midx):
    """Worker process to retrieve a SED and affect the relevant data to an
    instance of ModelsManager.
89 90 91

    Parameters
    ----------
92
    idx: int
93 94 95
        Index of the model within the current block of models.
    midx: int
        Global index of the model.
96 97

    """
98 99
    sed = gbl_warehouse.get_sed(gbl_models.params.modules,
                                gbl_models.params.from_index(midx))
100

101 102 103
    # The redshift is the fastest varying variable but we want to store it
    # as the slowest one so that models at a given redshift are contiguous
    idx = (idx % gbl_models.nz) * gbl_models.nm + idx // gbl_models.nz
104
    if 'sfh.age' in sed.info and sed.info['sfh.age'] > sed.info['universe.age']:
105
        for band in gbl_models.flux:
106
            gbl_models.flux[band][idx] = np.nan
107
        for prop in gbl_models.extprop:
108
            gbl_models.extprop[prop][idx] = np.nan
109
        for prop in gbl_models.intprop:
110
            gbl_models.intprop[prop][idx] = np.nan
111
    else:
112
        for band in gbl_models.flux:
113
            gbl_models.flux[band][idx] = sed.compute_fnu(band)
114
        for prop in gbl_models.extprop:
115
            gbl_models.extprop[prop][idx] = sed.info[prop]
116
        for prop in gbl_models.intprop:
117
            gbl_models.intprop[prop][idx] = sed.info[prop]
118
    gbl_models.index[idx] = midx
119

120
    gbl_counter.inc()
121

Médéric Boquien's avatar
Médéric Boquien committed
122

123
def analysis(idx, obs):
124 125
    """Worker process to analyse the PDF and estimate parameters values and
    store them in an instance of ResultsManager.
126 127 128

    Parameters
    ----------
129 130
    idx: int
        Index of the observation. This is necessary to put the computed values
131
        at the right location in the ResultsManager.
132 133 134 135
    obs: row
        Input data for an individual object

    """
136 137
    np.seterr(invalid='ignore')

138
    if obs.redshift >= 0.:
139 140
        # We pick the the models with the closest redshift using a slice to
        # work on views of the arrays and not on copies to save on RAM.
141 142
        z = np.array(
            gbl_models.conf['sed_modules_params']['redshifting']['redshift'])
143 144 145 146
        length = gbl_models.nm
        zidx = np.abs(obs.redshift-z).argmin()
        wz = slice(zidx * length, (zidx + 1) * length, 1)
        corr_dz = compute_corr_dz(z[zidx], obs)
147 148
    else:  # We do not know the redshift so we use the full grid
        wz = slice(0, None, 1)
149
        corr_dz = 1.
150

151
    chi2, scaling = compute_chi2(gbl_models, obs, corr_dz, wz,
152
                                 gbl_models.conf['analysis_params']['lim_flag'])
153

154
    if np.any(chi2 < -np.log(np.finfo(np.float64).tiny) * 2.):
155 156
        # We use the exponential probability associated with the χ² as
        # likelihood function.
157
        likelihood = np.exp(-.5 * chi2)
158 159 160 161
        wlikely = np.where(np.isfinite(likelihood))
        # If all the models are valid, it is much more efficient to use a slice
        if likelihood.size == wlikely[0].size:
            wlikely = slice(None, None)
162 163 164
        likelihood = likelihood[wlikely]
        scaling_l = scaling[wlikely]

165
        gbl_results.bayes.weight[idx] = np.nansum(likelihood)
166
        likelihood *= 1. / gbl_results.bayes.weight[idx]
167

Médéric Boquien's avatar
Médéric Boquien committed
168 169
        # We compute the weighted average and standard deviation using the
        # likelihood as weight.
170 171
        for prop in gbl_results.bayes.intmean:
            if prop.endswith('_log'):
172
                values = gbl_models.intprop[prop[:-4]][wz]
173 174
                _ = np.log10
            else:
175
                values = gbl_models.intprop[prop][wz]
176
                _ = lambda x: x
177
            mean, std = weighted_param(_(values[wlikely]), likelihood)
178 179
            gbl_results.bayes.intmean[prop][idx] = mean
            gbl_results.bayes.interror[prop][idx] = std
180
            if gbl_models.conf['analysis_params']['save_chi2'] is True:
181
                save_chi2(obs, prop, gbl_models, chi2, _(values))
182

183 184
        for prop in gbl_results.bayes.extmean:
            if prop.endswith('_log'):
185
                values = gbl_models.extprop[prop[:-4]][wz]
186
                _ = np.log10
187
            else:
188
                values = gbl_models.extprop[prop][wz]
189
                _ = lambda x: x
190 191
            mean, std = weighted_param(_(values[wlikely] * scaling_l * corr_dz),
                                       likelihood)
192 193
            gbl_results.bayes.extmean[prop][idx] = mean
            gbl_results.bayes.exterror[prop][idx] = std
194
            if gbl_models.conf['analysis_params']['save_chi2'] is True:
195
                save_chi2(obs, prop, gbl_models, chi2,
196
                          _(values * scaling * corr_dz))
197

198 199 200 201 202 203 204 205 206
        for band in gbl_results.bayes.fluxmean:
            values = gbl_models.flux[band][wz]
            mean, std = weighted_param(values[wlikely] * scaling_l,
                                       likelihood)
            gbl_results.bayes.fluxmean[band][idx] = mean
            gbl_results.bayes.fluxerror[band][idx] = std
            if gbl_models.conf['analysis_params']['save_chi2'] is True:
                save_chi2(obs, band, gbl_models, chi2, values * scaling)

207 208
        best_idx_z = np.nanargmin(chi2)
        gbl_results.best.chi2[idx] = chi2[best_idx_z]
209
        gbl_results.best.scaling[idx] = scaling[best_idx_z]
210
        gbl_results.best.index[idx] = gbl_models.index[wz][best_idx_z]
211

212
    gbl_counter.inc()
 Hector Salas's avatar
Hector Salas committed
213

214 215 216 217 218 219 220 221 222 223 224 225 226 227
def bestfit(oidx, obs):
    """Worker process to compute and save the best fit.

    Parameters
    ----------
    oidx: int
        Index of the observation. This is necessary to put the computed values
        at the right location in the ResultsManager.
    obs: row
        Input data for an individual object

    """
    np.seterr(invalid='ignore')

228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269
    if np.isfinite(gbl_results.best.index[oidx]):
        best_index = int(gbl_results.best.index[oidx])

        # We compute the model at the exact redshift not to have to correct for
        # the difference between the object and the grid redshifts.
        params = deepcopy(gbl_params.from_index(best_index))
        if obs.redshift >= 0.:
            model_z = params[gbl_params.modules.index('redshifting')]['redshift']
            params[gbl_params.modules.index('redshifting')]['redshift'] = obs.redshift
            # Correct fluxes for the fact that the scaling factor was computed
            # on the grid redshift. Because of the difference in redshift the
            # distance is different and must be reflected in the scaling
            corr_scaling = compute_corr_dz(model_z, obs) / \
                           compute_corr_dz(obs.redshift, obs)
        else:  # The model redshift is always exact in redhisfting mode
            corr_scaling = 1.

        sed = deepcopy(gbl_warehouse.get_sed(gbl_params.modules, params))

        # Handle the case where the distance does not correspond to the redshift.
        if obs.redshift >= 0.:
            corr_dz = (obs.distance / sed.info['universe.luminosity_distance']) ** 2
        else:
            corr_dz = 1.

        scaling = gbl_results.best.scaling[oidx] * corr_scaling

        for band in gbl_results.best.flux:
            gbl_results.best.flux[band][oidx] = sed.compute_fnu(band) * scaling

        # If the distance is user defined, the redshift-based luminosity distance
        # of the model is probably incorrect so we replace it
        sed.add_info('universe.luminosity_distance', obs.distance, force=True)
        for prop in gbl_results.best.intprop:
            gbl_results.best.intprop[prop][oidx] = sed.info[prop]

        for prop in gbl_results.best.extprop:
            gbl_results.best.extprop[prop][oidx] = sed.info[prop] * scaling \
                                                       * corr_dz

        if gbl_conf['analysis_params']["save_best_sed"]:
            sed.to_fits(f"out/{obs.id}", scaling * corr_dz)
270

271
    gbl_counter.inc()