utils.py 7.96 KB
Newer Older
1
2
# -*- coding: utf-8 -*-
# Copyright (C) 2013 Centre de données Astrophysiques de Marseille
3
# Copyright (C) 2013-2014 Institute of Astronomy
4
5
# Copyright (C) 2014 Yannick Roehlly <yannick@iaora.eu>
# Licensed under the CeCILL-v2 licence - see Licence_CeCILL_V2-en.txt
6
# Author: Yannick Roehlly & Médéric Boquien
7

8
9
10
from functools import lru_cache
import os

11
from astropy import log
12
from astropy.cosmology import WMAP7 as cosmo
13
from astropy.table import Table, Column
14
import numpy as np
15
from scipy import optimize
16
from scipy.special import erf
17

Médéric Boquien's avatar
Médéric Boquien committed
18
19
log.setLevel('ERROR')

20
21
def save_chi2(obs, variable, models, chi2, values):
    """Save the chi² and the associated physocal properties
22
23

    """
24
25
26
27
    fname = 'out/{}_{}_chi2.npy'.format(obs['id'], variable)
    if os.path.exists(fname):
        data = np.memmap(fname, dtype=np.float64, mode='r+',
                         shape=(2, models.params.size))
28
    else:
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
        data = np.memmap(fname, dtype=np.float64, mode='w+',
                         shape=(2, models.params.size))
    data[0, models.block] = chi2
    data[1, models.block] = values


@lru_cache(maxsize=None)
def compute_corr_dz(model_z, obs_z):
    """The mass-dependent physical properties are computed assuming the
    redshift of the model. However because we round the observed redshifts to
    two decimals, there can be a difference of 0.005 in redshift between the
    models and the actual observation. At low redshift, this can cause a
    discrepancy in the mass-dependent physical properties: ~0.35 dex at z=0.010
    vs 0.015 for instance. Therefore we correct these physical quantities by
    multiplying them by corr_dz.
44
45
46

    Parameters
    ----------
47
48
49
50
    model_z: float
        Redshift of the model.
    obs_z: float
        Redshift of the observed object.
51
52

    """
53
54
55
56
57
58
    if model_z == obs_z:
        return 1.
    if model_z > 0.:
        return (cosmo.luminosity_distance(obs_z).value /
                cosmo.luminosity_distance(model_z).value)**2.
    return (cosmo.luminosity_distance(obs_z).value * 1e5)**2.
59
60


61
def dchi2_over_ds2(s, obs_fluxes, obs_errors, mod_fluxes):
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
    """Function used to estimate the normalization factor in the SED fitting
    process when upper limits are included in the dataset to fit (from Eq. A11
    in Sawicki M. 2012, PASA, 124, 1008).

    Parameters
    ----------
    s: Float
        Contains value onto which we perform minimization = normalization
        factor
    obs_fluxes: RawArray
        Contains observed fluxes for each filter.
    obs_errors: RawArray
        Contains observed errors for each filter.
    model_fluxes: RawArray
        Contains modeled fluxes for each filter.
    lim_flag: Boolean
        Tell whether we use upper limits (True) or not (False).

   Returns
    -------
    func: Float
        Eq. A11 in Sawicki M. 2012, PASA, 124, 1008).

    """
    # We enter into this function if lim_flag = True.

    # The mask "data" selects the filter(s) for which measured fluxes are given
    # i.e., when obs_fluxes is >=0. and obs_errors >=0.
    # The mask "lim" selects the filter(s) for which upper limits are given
91
    # i.e., when obs_errors < 0
92

93
    wlim = np.where(np.isfinite(obs_errors) & (obs_errors < 0.))
94
    wdata = np.where(obs_errors >= 0.)
95

96
97
    mod_fluxes_data = mod_fluxes[wdata]
    mod_fluxes_lim = mod_fluxes[wlim]
98

99
100
    obs_fluxes_data = obs_fluxes[wdata]
    obs_fluxes_lim = obs_fluxes[wlim]
101

102
103
    obs_errors_data = obs_errors[wdata]
    obs_errors_lim = -obs_errors[wlim]
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125

    dchi2_over_ds_data = np.sum(
        (obs_fluxes_data-s*mod_fluxes_data) *
        mod_fluxes_data/(obs_errors_data*obs_errors_data))

    dchi2_over_ds_lim = np.sqrt(2./np.pi)*np.sum(
        mod_fluxes_lim*np.exp(
            -np.square(
                (obs_fluxes_lim-s*mod_fluxes_lim)/(np.sqrt(2)*obs_errors_lim)
                      )
                             )/(
            obs_errors_lim*(
                1.+erf(
                   (obs_fluxes_lim-s*mod_fluxes_lim)/(np.sqrt(2)*obs_errors_lim)
                      )
                           )
                               )
                                                )

    func = dchi2_over_ds_data - dchi2_over_ds_lim

    return func
126

Médéric Boquien's avatar
Médéric Boquien committed
127

128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
def _compute_scaling(model_fluxes, obs_fluxes, obs_errors):
    """Compute the scaling factor to be applied to the model fluxes to best fit
    the observations. Note that we look over the bands to avoid the creation of
    an array of the same size as the model_fluxes array. Because we loop on the
    bands and not on the models, the impact on the performance should be small.

    Parameters
    ----------
    model_fluxes: array
        Fluxes of the models
    obs_fluxes: array
        Observed fluxes
    obs_errors: array
        Observed errors

    Returns
    -------
    scaling: array
        Scaling factors minimising the χ²
    """
148
149
    num = np.zeros(model_fluxes.shape[1])
    denom = np.zeros(model_fluxes.shape[1])
150
    for i in range(obs_fluxes.size):
151
        if np.isfinite(obs_fluxes[i]) and obs_errors[i] > 0.:
152
            num += model_fluxes[i, :] * (obs_fluxes[i] / (obs_errors[i] *
153
                                                          obs_errors[i]))
154
            denom += np.square(model_fluxes[i, :] * (1./obs_errors[i]))
155
156
157

    return num/denom

158
159

def compute_chi2(model_fluxes, obs_fluxes, obs_errors, lim_flag):
160
161
162
163
164
    """Compute the χ² of observed fluxes with respect to the grid of models. We
    take into account upper limits if need be. Note that we look over the bands
    to avoid the creation of an array of the same size as the model_fluxes
    array. Because we loop on the bands and not on the models, the impact on
    the performance should be small.
165
166
167
168
169
170
171
172
173
174

    Parameters
    ----------
    model_fluxes: array
        2D grid containing the fluxes of the models
    obs_fluxes: array
        Fluxes of the observed object
    obs_errors: array
        Uncertainties on the fluxes of the observed object
    lim_flag: boolean
175
        Boolean indicating whether upper limits should be treated (True) or
176
177
178
179
180
181
182
183
184
        discarded (False)

    Returns
    -------
    chi2: array
        χ² for all the models in the grid
    scaling: array
        scaling of the models to obtain the minimum χ²
    """
185
186
    limits = lim_flag and np.any(obs_errors <= 0.)

187
    scaling = _compute_scaling(model_fluxes, obs_fluxes, obs_errors)
188

189
190
191
    # Some observations may not have flux values in some filter(s), but
    # they can have upper limit(s).
    if limits == True:
192
        for imod in range(scaling.size):
193
194
            scaling[imod] = optimize.root(dchi2_over_ds2, scaling[imod],
                                          args=(obs_fluxes, obs_errors,
195
                                                model_fluxes[:, imod])).x
196
197

    # χ² of the comparison of each model to each observation.
198
    chi2 = np.zeros(model_fluxes.shape[1])
199
    for i in range(obs_fluxes.size):
200
        if np.isfinite(obs_fluxes[i]) and obs_errors[i] > 0.:
201
            chi2 += np.square(
202
                (obs_fluxes[i] - model_fluxes[i, :] * scaling) * (1./obs_errors[i]))
203
204
205

    # Some observations may not have flux values in some filter(s), but
    # they can have upper limit(s).
206
    if limits == True:
207
208
209
210
211
        for i, obs_error in enumerate(obs_errors):
            if obs_error < 0.:
                chi2 += -2. * np.log(np.sqrt(np.pi/2.) * (-obs_errors[i]) * (
                        1.+erf((obs_fluxes[i] - model_fluxes[i, :]*scaling) /
                            (np.sqrt(2)*(-obs_errors[i])))))
212
213

    return chi2, scaling
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238


def weighted_param(param, weights):
    """Compute the weighted mean and standard deviation of an array of data.

    Parameters
    ----------
    param: array
        Values of the parameters for the entire grid of models
    weights: array
        Weights by which to weight the parameter values

    Returns
    -------
    mean: float
        Weighted mean of the parameter values
    std: float
        Weighted standard deviation of the parameter values

    """

    mean = np.average(param, weights=weights)
    std = np.sqrt(np.average((param-mean)**2, weights=weights))

    return (mean, std)