utils.py 12.6 KB
Newer Older
1
2
# -*- coding: utf-8 -*-
# Copyright (C) 2013 Centre de données Astrophysiques de Marseille
3
# Copyright (C) 2013-2014 Institute of Astronomy
4
5
# Copyright (C) 2014 Yannick Roehlly <yannick@iaora.eu>
# Licensed under the CeCILL-v2 licence - see Licence_CeCILL_V2-en.txt
6
# Author: Yannick Roehlly & Médéric Boquien
7

8
from astropy import log
9
from astropy.table import Table, Column
10
import numpy as np
11
from scipy import optimize
12
from scipy.special import erf
13

14
15
from ..utils import OUT_DIR

Médéric Boquien's avatar
Médéric Boquien committed
16
17
18
log.setLevel('ERROR')


19
def save_best_sed(obsid, sed, norm):
20
21
22
23
24
25
    """Save the best SED to a VO table.

    Parameters
    ----------
    obsid: string
        Name of the object. Used to prepend the output file name
26
27
    sed: SED object
        Best SED
28
29
30
31
    norm: float
        Normalisation factor to scale the scale to the observations

    """
32
    sed.to_votable(OUT_DIR + "{}_best_model.xml".format(obsid), mass=norm)
33
34


35
36
def save_pdf(obsid, analysed_variables, model_variables, likelihood, wlikely):
    """Compute and save the PDF to a FITS file
37

38
39
    We estimate the probability density functions (PDF) of the parameters from
    a likelihood-weighted hisogram.
40
41
42
43
44
45
46
47

    Parameters
    ----------
    obsid: string
        Name of the object. Used to prepend the output file name
    analysed_variables: list
        Analysed variables names
    model_variables: 2D array
48
49
50
51
52
53
        Values of the model variables
    likelihood: 1D array
        Likelihood of the "likely" models
    wlikely: 1D array
        Indices of of the likely models to select them from the model_variables
        array
54
55

    """
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79

    # We check how many unique parameter values are analysed and if
    # less than Npdf (= 100), the PDF is initally built assuming a
    # number of bins equal to the number of unique values for a given
    # parameter
    Npdf = 100
    for i, var_name in enumerate(analysed_variables):
        min_hist = np.min(model_variables[wlikely[0], i])
        max_hist = np.max(model_variables[wlikely[0], i])
        Nhist = min(Npdf, len(np.unique(model_variables[:, i])))

        if min_hist == max_hist:
            pdf_grid = np.array([min_hist, max_hist])
            pdf_prob = np.array([1., 1.])
        else:
            pdf_prob, pdf_grid = np.histogram(model_variables[wlikely[0], i],
                                              Nhist,
                                              (min_hist, max_hist),
                                              weights=likelihood, density=True)
            pdf_x = (pdf_grid[1:]+pdf_grid[:-1]) / 2.

            pdf_grid = np.linspace(min_hist, max_hist, Npdf)
            pdf_prob = np.interp(pdf_grid, pdf_x, pdf_prob)

80
81
82
83
84
85
86
87
88
89
90
91
92
        if pdf_prob is None:
            # TODO: use logging
            print("Can not compute PDF for observation <{}> and "
                  "variable <{}>.".format(obsid, var_name))
        else:
            table = Table((
                Column(pdf_grid, name=var_name),
                Column(pdf_prob, name="probability density")
            ))
            table.write(OUT_DIR + "{}_{}_pdf.fits".format(obsid, var_name))


def save_chi2(obsid, analysed_variables, model_variables, reduced_chi2):
93
    """Save the best reduced Dz versus the analysed variables
94
95
96
97
98
99
100
101
102
103

    Parameters
    ----------
    obsid: string
        Name of the object. Used to prepend the output file name
    analysed_variables: list
        Analysed variable names
    model_variables: 2D array
        Analysed variables values for all models
    reduced_chi2:
104
        Reduced Dz
105
106
107
108
109
110
111
112
113
114

    """
    for var_index, var_name in enumerate(analysed_variables):
        table = Table((
            Column(model_variables[:, var_index],
                   name=var_name),
            Column(reduced_chi2, name="chi2")))
        table.write(OUT_DIR + "{}_{}_chi2.fits".format(obsid, var_name))


115
def save_table_analysis(filename, obsid, analysed_variables, analysed_averages,
116
117
118
119
120
                        analysed_std):
    """Save the estimated values derived from the analysis of the PDF

    Parameters
    ----------
121
122
    filename: name of the file to save
        Name of the output file
123
124
125
126
    obsid: table column
        Names of the objects
    analysed_variables: list
        Analysed variable names
127
    analysed_averages: RawArray
128
        Analysed variables values estimates
129
    analysed_std: RawArray
130
131
132
        Analysed variables errors estimates

    """
133
134
135
136
137
138
    np_analysed_averages = np.ctypeslib.as_array(analysed_averages[0])
    np_analysed_averages = np_analysed_averages.reshape(analysed_averages[1])

    np_analysed_std = np.ctypeslib.as_array(analysed_std[0])
    np_analysed_std = np_analysed_std.reshape(analysed_std[1])

139
140
141
142
    result_table = Table()
    result_table.add_column(Column(obsid.data, name="observation_id"))
    for index, variable in enumerate(analysed_variables):
        result_table.add_column(Column(
143
            np_analysed_averages[:, index],
144
145
146
            name=variable
        ))
        result_table.add_column(Column(
147
            np_analysed_std[:, index],
148
149
            name=variable+"_err"
        ))
150
151
    result_table.write(OUT_DIR + filename, format='ascii.fixed_width',
                       delimiter=None)
152
153


Médéric Boquien's avatar
Médéric Boquien committed
154
155
def save_table_best(filename, obsid, chi2, chi2_red, variables, fluxes,
                    filters, info_keys):
156
157
158
159
    """Save the values corresponding to the best fit

    Parameters
    ----------
160
161
    filename: name of the file to save
        Name of the output file
162
163
    obsid: table column
        Names of the objects
164
165
166
167
168
    chi2: RawArray
        Best χ² for each object
    chi2_red: RawArray
        Best reduced χ² for each object
    variables: RawArray
169
        All variables corresponding to a SED
170
    fluxes: RawArray
171
        Fluxes in all bands for each object
172
    filters: list
173
        Filters used to compute the fluxes
174
175
    info_keys: list
        Parameters names
176
177

    """
178
179
180
181
182
183
184
185
186
187
    np_fluxes = np.ctypeslib.as_array(fluxes[0])
    np_fluxes = np_fluxes.reshape(fluxes[1])

    np_variables = np.ctypeslib.as_array(variables[0])
    np_variables = np_variables.reshape(variables[1])

    np_chi2 = np.ctypeslib.as_array(chi2[0])

    np_chi2_red = np.ctypeslib.as_array(chi2_red[0])

188
189
    best_model_table = Table()
    best_model_table.add_column(Column(obsid.data, name="observation_id"))
190
191
    best_model_table.add_column(Column(np_chi2, name="chi_square"))
    best_model_table.add_column(Column(np_chi2_red, name="reduced_chi_square"))
192

193
    for index, name in enumerate(info_keys):
194
        column = Column(np_variables[:, index], name=name)
195
196
197
        best_model_table.add_column(column)

    for index, name in enumerate(filters):
198
        column = Column(np_fluxes[:, index], name=name, unit='mJy')
199
200
        best_model_table.add_column(column)

Médéric Boquien's avatar
Médéric Boquien committed
201
    best_model_table.write(OUT_DIR + filename, format='ascii.fixed_width',
202
                           delimiter=None)
203
204


205
def dchi2_over_ds2(s, obs_fluxes, obs_errors, mod_fluxes):
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
    """Function used to estimate the normalization factor in the SED fitting
    process when upper limits are included in the dataset to fit (from Eq. A11
    in Sawicki M. 2012, PASA, 124, 1008).

    Parameters
    ----------
    s: Float
        Contains value onto which we perform minimization = normalization
        factor
    obs_fluxes: RawArray
        Contains observed fluxes for each filter.
    obs_errors: RawArray
        Contains observed errors for each filter.
    model_fluxes: RawArray
        Contains modeled fluxes for each filter.
    lim_flag: Boolean
        Tell whether we use upper limits (True) or not (False).

   Returns
    -------
    func: Float
        Eq. A11 in Sawicki M. 2012, PASA, 124, 1008).

    """
    # We enter into this function if lim_flag = True.

    # The mask "data" selects the filter(s) for which measured fluxes are given
    # i.e., when obs_fluxes is >=0. and obs_errors >=0.
    # The mask "lim" selects the filter(s) for which upper limits are given
235
    # i.e., when obs_errors < 0
236

237
    wlim = np.where(np.isfinite(obs_errors) & (obs_errors < 0.))
238
    wdata = np.where(obs_errors >= 0.)
239

240
241
    mod_fluxes_data = mod_fluxes[wdata]
    mod_fluxes_lim = mod_fluxes[wlim]
242

243
244
    obs_fluxes_data = obs_fluxes[wdata]
    obs_fluxes_lim = obs_fluxes[wlim]
245

246
247
    obs_errors_data = obs_errors[wdata]
    obs_errors_lim = -obs_errors[wlim]
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269

    dchi2_over_ds_data = np.sum(
        (obs_fluxes_data-s*mod_fluxes_data) *
        mod_fluxes_data/(obs_errors_data*obs_errors_data))

    dchi2_over_ds_lim = np.sqrt(2./np.pi)*np.sum(
        mod_fluxes_lim*np.exp(
            -np.square(
                (obs_fluxes_lim-s*mod_fluxes_lim)/(np.sqrt(2)*obs_errors_lim)
                      )
                             )/(
            obs_errors_lim*(
                1.+erf(
                   (obs_fluxes_lim-s*mod_fluxes_lim)/(np.sqrt(2)*obs_errors_lim)
                      )
                           )
                               )
                                                )

    func = dchi2_over_ds_data - dchi2_over_ds_lim

    return func
270

Médéric Boquien's avatar
Médéric Boquien committed
271

272
273
274
275
276
277
278
279
280
281
282
283
284
285
def analyse_chi2(chi2):
    """Function to analyse the best chi^2 and find out whether what fraction of
    objects seem to be overconstrainted.

    Parameters
    ----------
    chi2: RawArray
        Contains the reduced chi^2

    """
    chi2_red = np.ctypeslib.as_array(chi2[0])
    # If low values of reduced chi^2, it means that the data are overfitted
    # Errors might be under-estimated or not enough valid data.
    print("\n{}% of the objects have chi^2_red~0 and {}% chi^2_red<0.5"
Médéric Boquien's avatar
Médéric Boquien committed
286
287
          .format(np.round((chi2_red < 1e-12).sum()/chi2_red.size, 1),
                  np.round((chi2_red < 0.5).sum()/chi2_red.size, 1)))
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316


def compute_chi2(model_fluxes, obs_fluxes, obs_errors, lim_flag):
    """Fit a grid of models to the observations using a χ² minimisation. The
    models are scaled using an analytic formula to minimise the χ². The
    algorithm also handle missing data and the presence of upper limits.

    Parameters
    ----------
    model_fluxes: array
        2D grid containing the fluxes of the models
    obs_fluxes: array
        Fluxes of the observed object
    obs_errors: array
        Uncertainties on the fluxes of the observed object
    lim_flag: boolean
        Boolean indicated whether upper limits should be treated (True) or
        discarded (False)

    Returns
    -------
    chi2: array
        χ² for all the models in the grid
    scaling: array
        scaling of the models to obtain the minimum χ²

    """

    # Some observations may not have fluxes in some filters, but they can have
317
    # upper limits, indicated with obs_errors≤0. To treat them as such,
318
319
    # lim_flag has to be set to True.
    tolerance = 1e-12
320
    limits = lim_flag and np.any(np.isfinite(obs_errors) &
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
                                 (obs_errors < tolerance))

    # Scaling factor to be applied to a model fluxes to minimise the χ².
    scaling = (
        np.sum(model_fluxes * obs_fluxes / (obs_errors * obs_errors), axis=1) /
        np.sum(model_fluxes * model_fluxes / (obs_errors * obs_errors), axis=1)
    )
    if limits == True:
        for imod in range(len(model_fluxes)):
            scaling[imod] = optimize.root(dchi2_over_ds2, scaling[imod],
                                          args=(obs_fluxes, obs_errors,
                                                model_fluxes[imod, :])).x

    # Scale the models and we compute the χ² on the entire grid
    model_fluxes *= scaling[:, np.newaxis]

    # Mask to select the bands with measured fluxes
    mask_data = np.logical_and(obs_fluxes > tolerance,
                               obs_errors > tolerance)
    chi2 = np.sum(np.square(
        (obs_fluxes[mask_data]-model_fluxes[:, mask_data]) /
        obs_errors[mask_data]), axis=1)
    if limits == True:
        # This mask selects the filter(s) for which upper limits are given
345
346
347
        # i.e., when obs_errors<0.
        mask_lim = np.logical_and(np.isfinite(obs_errors),
                                  obs_errors < tolerance)
348
349
350
351
352
353
354
355
        chi2 += -2. * np.sum(
            np.log(
                np.sqrt(np.pi/2.)*(-obs_errors[mask_lim])*(
                    1.+erf(
                        (obs_fluxes[mask_lim]-model_fluxes[:, mask_lim]) /
                        (np.sqrt(2)*(-obs_errors[mask_lim]))))), axis=1)

    return chi2, scaling
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380


def weighted_param(param, weights):
    """Compute the weighted mean and standard deviation of an array of data.

    Parameters
    ----------
    param: array
        Values of the parameters for the entire grid of models
    weights: array
        Weights by which to weight the parameter values

    Returns
    -------
    mean: float
        Weighted mean of the parameter values
    std: float
        Weighted standard deviation of the parameter values

    """

    mean = np.average(param, weights=weights)
    std = np.sqrt(np.average((param-mean)**2, weights=weights))

    return (mean, std)