utils.py 8.6 KB
Newer Older
1
2
# -*- coding: utf-8 -*-
# Copyright (C) 2013 Centre de données Astrophysiques de Marseille
3
# Copyright (C) 2013-2014 Institute of Astronomy
4
5
# Copyright (C) 2014 Yannick Roehlly <yannick@iaora.eu>
# Licensed under the CeCILL-v2 licence - see Licence_CeCILL_V2-en.txt
6
# Author: Yannick Roehlly & Médéric Boquien
7

8
from astropy import log
9
from astropy.table import Table, Column
10
import numpy as np
11
from scipy.stats import scoreatpercentile
12

13
14
from ..utils import OUT_DIR

Médéric Boquien's avatar
Médéric Boquien committed
15
16
log.setLevel('ERROR')

17
18
# Number of points in the PDF
PDF_NB_POINTS = 1000
19

Médéric Boquien's avatar
Médéric Boquien committed
20

21
def save_best_sed(obsid, sed, norm):
22
23
24
25
26
27
    """Save the best SED to a VO table.

    Parameters
    ----------
    obsid: string
        Name of the object. Used to prepend the output file name
28
29
    sed: SED object
        Best SED
30
31
32
33
    norm: float
        Normalisation factor to scale the scale to the observations

    """
34
    sed.to_votable(OUT_DIR + "{}_best_model.xml".format(obsid), mass=norm)
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52


def save_pdf(obsid, analysed_variables, model_variables, likelihood):
    """Save the PDF to a FITS file

    We estimate the probability density functions (PDF) of the parameters using
    a weighted kernel density estimation. This part should definitely be
    improved as we simulate the weight by adding as many value as their
    probability * 100.

    Parameters
    ----------
    obsid: string
        Name of the object. Used to prepend the output file name
    analysed_variables: list
        Analysed variables names
    model_variables: 2D array
        Analysed variables values for all models
53
    likelihood: 2D array
54
55
56
57
        Likelihood for all models

    """
    for var_index, var_name in enumerate(analysed_variables):
58
59
        pdf_grid = model_variables[:, var_index]
        pdf_prob = likelihood[:, var_index]
60
61
62
63
64
65
66
67
68
69
70
71
72
        if pdf_prob is None:
            # TODO: use logging
            print("Can not compute PDF for observation <{}> and "
                  "variable <{}>.".format(obsid, var_name))
        else:
            table = Table((
                Column(pdf_grid, name=var_name),
                Column(pdf_prob, name="probability density")
            ))
            table.write(OUT_DIR + "{}_{}_pdf.fits".format(obsid, var_name))


def save_chi2(obsid, analysed_variables, model_variables, reduced_chi2):
73
    """Save the best reduced Dz versus the analysed variables
74
75
76
77
78
79
80
81
82
83

    Parameters
    ----------
    obsid: string
        Name of the object. Used to prepend the output file name
    analysed_variables: list
        Analysed variable names
    model_variables: 2D array
        Analysed variables values for all models
    reduced_chi2:
84
        Reduced Dz
85
86
87
88
89
90
91
92
93
94

    """
    for var_index, var_name in enumerate(analysed_variables):
        table = Table((
            Column(model_variables[:, var_index],
                   name=var_name),
            Column(reduced_chi2, name="chi2")))
        table.write(OUT_DIR + "{}_{}_chi2.fits".format(obsid, var_name))


95
def save_table_analysis(filename, obsid, analysed_variables, analysed_averages,
96
97
98
99
100
                        analysed_std):
    """Save the estimated values derived from the analysis of the PDF

    Parameters
    ----------
101
102
    filename: name of the file to save
        Name of the output file
103
104
105
106
    obsid: table column
        Names of the objects
    analysed_variables: list
        Analysed variable names
107
    analysed_averages: RawArray
108
        Analysed variables values estimates
109
    analysed_std: RawArray
110
111
112
        Analysed variables errors estimates

    """
113
114
115
116
117
118
    np_analysed_averages = np.ctypeslib.as_array(analysed_averages[0])
    np_analysed_averages = np_analysed_averages.reshape(analysed_averages[1])

    np_analysed_std = np.ctypeslib.as_array(analysed_std[0])
    np_analysed_std = np_analysed_std.reshape(analysed_std[1])

119
120
121
122
    result_table = Table()
    result_table.add_column(Column(obsid.data, name="observation_id"))
    for index, variable in enumerate(analysed_variables):
        result_table.add_column(Column(
123
            np_analysed_averages[:, index],
124
125
126
            name=variable
        ))
        result_table.add_column(Column(
127
            np_analysed_std[:, index],
128
129
            name=variable+"_err"
        ))
130
131
    result_table.write(OUT_DIR + filename, format='ascii.fixed_width',
                       delimiter=None)
132
133


Médéric Boquien's avatar
Médéric Boquien committed
134
135
def save_table_best(filename, obsid, chi2, chi2_red, variables, fluxes,
                    filters, info_keys):
136
137
138
139
    """Save the values corresponding to the best fit

    Parameters
    ----------
140
141
    filename: name of the file to save
        Name of the output file
142
143
    obsid: table column
        Names of the objects
144
145
146
147
148
    chi2: RawArray
        Best χ² for each object
    chi2_red: RawArray
        Best reduced χ² for each object
    variables: RawArray
149
        All variables corresponding to a SED
150
    fluxes: RawArray
151
        Fluxes in all bands for each object
152
    filters: list
153
        Filters used to compute the fluxes
154
155
    info_keys: list
        Parameters names
156
157

    """
158
159
160
161
162
163
164
165
166
167
    np_fluxes = np.ctypeslib.as_array(fluxes[0])
    np_fluxes = np_fluxes.reshape(fluxes[1])

    np_variables = np.ctypeslib.as_array(variables[0])
    np_variables = np_variables.reshape(variables[1])

    np_chi2 = np.ctypeslib.as_array(chi2[0])

    np_chi2_red = np.ctypeslib.as_array(chi2_red[0])

168
169
    best_model_table = Table()
    best_model_table.add_column(Column(obsid.data, name="observation_id"))
170
171
    best_model_table.add_column(Column(np_chi2, name="chi_square"))
    best_model_table.add_column(Column(np_chi2_red, name="reduced_chi_square"))
172

173
    for index, name in enumerate(info_keys):
174
        column = Column(np_variables[:, index], name=name)
175
176
177
        best_model_table.add_column(column)

    for index, name in enumerate(filters):
178
        column = Column(np_fluxes[:, index], name=name, unit='mJy')
179
180
        best_model_table.add_column(column)

Médéric Boquien's avatar
Médéric Boquien committed
181
    best_model_table.write(OUT_DIR + filename, format='ascii.fixed_width',
182
                           delimiter=None)
183
184


185
def dchi2_over_ds2(s):
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
    """Function used to estimate the normalization factor in the SED fitting
    process when upper limits are included in the dataset to fit (from Eq. A11
    in Sawicki M. 2012, PASA, 124, 1008).

    Parameters
    ----------
    s: Float
        Contains value onto which we perform minimization = normalization
        factor
    obs_fluxes: RawArray
        Contains observed fluxes for each filter.
    obs_errors: RawArray
        Contains observed errors for each filter.
    model_fluxes: RawArray
        Contains modeled fluxes for each filter.
    lim_flag: Boolean
        Tell whether we use upper limits (True) or not (False).

   Returns
    -------
    func: Float
        Eq. A11 in Sawicki M. 2012, PASA, 124, 1008).

    """
    # We enter into this function if lim_flag = True.

    # The mask "data" selects the filter(s) for which measured fluxes are given
    # i.e., when obs_fluxes is >=0. and obs_errors >=0.
    # The mask "lim" selects the filter(s) for which upper limits are given
    # i.e., when obs_fluxes is >=0. and obs_errors = 9990 <= obs_errors < 0.

Médéric Boquien's avatar
Médéric Boquien committed
217
218
    wlim = np.where((gbl_obs_errors >= -9990.) & (gbl_obs_errors < 0.))
    wdata = np.where(gbl_obs_errors >= 0.)
219

220
221
    mod_fluxes_data = gbl_mod_fluxes[wdata]
    mod_fluxes_lim = gbl_mod_fluxes[wlim]
222

223
224
    obs_fluxes_data = gbl_obs_fluxes[wdata]
    obs_fluxes_lim = gbl_obs_fluxes[wlim]
225

226
227
    obs_errors_data = gbl_obs_errors[wdata]
    obs_errors_lim = -gbl_obs_errors[wlim]
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249

    dchi2_over_ds_data = np.sum(
        (obs_fluxes_data-s*mod_fluxes_data) *
        mod_fluxes_data/(obs_errors_data*obs_errors_data))

    dchi2_over_ds_lim = np.sqrt(2./np.pi)*np.sum(
        mod_fluxes_lim*np.exp(
            -np.square(
                (obs_fluxes_lim-s*mod_fluxes_lim)/(np.sqrt(2)*obs_errors_lim)
                      )
                             )/(
            obs_errors_lim*(
                1.+erf(
                   (obs_fluxes_lim-s*mod_fluxes_lim)/(np.sqrt(2)*obs_errors_lim)
                      )
                           )
                               )
                                                )

    func = dchi2_over_ds_data - dchi2_over_ds_lim

    return func
250

Médéric Boquien's avatar
Médéric Boquien committed
251

252
253
254
255
256
257
258
259
260
261
262
263
264
265
def analyse_chi2(chi2):
    """Function to analyse the best chi^2 and find out whether what fraction of
    objects seem to be overconstrainted.

    Parameters
    ----------
    chi2: RawArray
        Contains the reduced chi^2

    """
    chi2_red = np.ctypeslib.as_array(chi2[0])
    # If low values of reduced chi^2, it means that the data are overfitted
    # Errors might be under-estimated or not enough valid data.
    print("\n{}% of the objects have chi^2_red~0 and {}% chi^2_red<0.5"
Médéric Boquien's avatar
Médéric Boquien committed
266
267
          .format(np.round((chi2_red < 1e-12).sum()/chi2_red.size, 1),
                  np.round((chi2_red < 0.5).sum()/chi2_red.size, 1)))