utils.py 14.2 KB
Newer Older
1 2
# -*- coding: utf-8 -*-
# Copyright (C) 2013 Centre de données Astrophysiques de Marseille
3
# Copyright (C) 2013-2014 Institute of Astronomy
4 5
# Copyright (C) 2014 Yannick Roehlly <yannick@iaora.eu>
# Licensed under the CeCILL-v2 licence - see Licence_CeCILL_V2-en.txt
6
# Author: Yannick Roehlly & Médéric Boquien
7

8
from astropy import log
9
from astropy.table import Table, Column
10
import numpy as np
11
from scipy import optimize
12
from scipy.special import erf
13

14 15
from ..utils import OUT_DIR

Médéric Boquien's avatar
Médéric Boquien committed
16 17 18
log.setLevel('ERROR')


19
def save_best_sed(obsid, sed, norm):
20 21 22 23 24 25
    """Save the best SED to a VO table.

    Parameters
    ----------
    obsid: string
        Name of the object. Used to prepend the output file name
26 27
    sed: SED object
        Best SED
28 29 30 31
    norm: float
        Normalisation factor to scale the scale to the observations

    """
32
    sed.to_votable(OUT_DIR + "{}_best_model.xml".format(obsid), mass=norm)
33 34


35
def _save_pdf(obsid, name, model_variable, likelihood):
36
    """Compute and save the PDF to a FITS file
37

38 39
    We estimate the probability density functions (PDF) of the parameter from
    a likelihood-weighted histogram.
40 41 42 43 44

    Parameters
    ----------
    obsid: string
        Name of the object. Used to prepend the output file name
45 46 47 48
    name: string
        Analysed variable name
    model_variable: array
        Values of the model variable
49 50
    likelihood: 1D array
        Likelihood of the "likely" models
51 52

    """
53 54 55 56 57 58

    # We check how many unique parameter values are analysed and if
    # less than Npdf (= 100), the PDF is initally built assuming a
    # number of bins equal to the number of unique values for a given
    # parameter
    Npdf = 100
59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85
    min_hist = np.min(model_variable)
    max_hist = np.max(model_variable)
    Nhist = min(Npdf, len(np.unique(model_variable)))

    if min_hist == max_hist:
        pdf_grid = np.array([min_hist, max_hist])
        pdf_prob = np.array([1., 1.])
    else:
        pdf_prob, pdf_grid = np.histogram(model_variable, Nhist,
                                          (min_hist, max_hist),
                                          weights=likelihood, density=True)
        pdf_x = (pdf_grid[1:]+pdf_grid[:-1]) / 2.

        pdf_grid = np.linspace(min_hist, max_hist, Npdf)
        pdf_prob = np.interp(pdf_grid, pdf_x, pdf_prob)

    if pdf_prob is None:
        print("Can not compute PDF for observation <{}> and variable <{}>."
              "".format(obsid, name))
    else:
        table = Table((
            Column(pdf_grid, name=name),
            Column(pdf_prob, name="probability density")
        ))
        table.write(OUT_DIR + "{}_{}_pdf.fits".format(obsid, name))


86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118
def save_pdf(obsid, names, mass_proportional, model_variables, scaling,
             likelihood, wlikely):
    """Compute and save the PDF of analysed variables

    Parameters
    ----------
    obsid: string
        Name of the object. Used to prepend the output file name
    names: list of strings
        Analysed variables names
    model_variables: array
        Values of the model variables
    likelihood: 1D array
        Likelihood of the "likely" models

    """
    for i, name in enumerate(names):
        if name.endswith('_log'):
            if name[:-4] in mass_proportional:
                model_variable = np.log10(model_variables[:, i][wlikely] *
                                          scaling[wlikely])
            else:
                model_variable = np.log10(model_variables[:, i][wlikely])
        else:
            if name in mass_proportional:
                model_variable = (model_variables[:, i][wlikely] *
                                  scaling[wlikely])
            else:
                model_variable = model_variables[:, i][wlikely]

        _save_pdf(obsid, name, model_variable, likelihood)


119
def _save_chi2(obsid, name, model_variable, chi2):
120
    """Save the best reduced χ² versus an analysed variable
121 122 123 124 125

    Parameters
    ----------
    obsid: string
        Name of the object. Used to prepend the output file name
126 127 128 129 130 131
    name: string
        Analysed variable name
    model_variable: array
        Values of the model variable
    chi2:
        Reduced χ²
132 133

    """
134 135 136
    table = Table((Column(model_variable, name=name),
                   Column(chi2, name="chi2")))
    table.write(OUT_DIR + "{}_{}_chi2.fits".format(obsid, name))
137 138


139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169
def save_chi2(obsid, names, mass_proportional, model_variables, scaling, chi2):
    """Save the best reduced χ² versus analysed variables

    Parameters
    ----------
    obsid: string
        Name of the object. Used to prepend the output file name
    name: list of strings
        Analysed variables names
    model_variables: array
        Values of the model variables
    scaling: array
        Scaling factors of the models
    chi2:
        Reduced χ²
    """
    for i, name in enumerate(names):
        if name.endswith('_log'):
            if name[:-4] in mass_proportional:
                model_variable = np.log10(model_variables[:, i] * scaling)
            else:
                model_variable = np.log10(model_variables[:, i])
        else:
            if name in mass_proportional:
                model_variable = model_variables[:, i] * scaling
            else:
                model_variable = model_variables

        _save_chi2(obsid, name, model_variable, chi2)


170 171 172 173 174
def save_results(filename, obsid, bayes_variables, bayes_mean, bayes_std, chi2,
                 chi2_red, best, fluxes, filters, info_keys):
    """Save the estimated values derived from the analysis of the PDF and the
    parameters associated with the best fit. An simple text file and a FITS
    file are generated.
175 176 177

    Parameters
    ----------
178 179
    filename: string
        Name of the output file without the extension
180 181
    obsid: table column
        Names of the objects
182
    bayes_variables: list
183
        Analysed variable names
184
    bayes_mean: RawArray
185
        Analysed variables values estimates
186
    bayes_std: RawArray
187
        Analysed variables errors estimates
188 189 190 191
    chi2: RawArray
        Best χ² for each object
    chi2_red: RawArray
        Best reduced χ² for each object
192 193
    best: RawArray
        All variables corresponding to the best SED
194
    fluxes: RawArray
195
        Fluxes in all bands for the best SED
196
    filters: list
197
        Filters used to compute the fluxes
198 199
    info_keys: list
        Parameters names
200 201

    """
202 203 204 205 206 207
    np_bayes_mean = np.ctypeslib.as_array(bayes_mean[0])
    np_bayes_mean = np_bayes_mean.reshape(bayes_mean[1])

    np_bayes_std = np.ctypeslib.as_array(bayes_std[0])
    np_bayes_std = np_bayes_std.reshape(bayes_std[1])

208 209 210
    np_fluxes = np.ctypeslib.as_array(fluxes[0])
    np_fluxes = np_fluxes.reshape(fluxes[1])

211 212
    np_best = np.ctypeslib.as_array(best[0])
    np_best = np_best.reshape(best[1])
213 214 215 216 217

    np_chi2 = np.ctypeslib.as_array(chi2[0])

    np_chi2_red = np.ctypeslib.as_array(chi2_red[0])

218 219 220 221 222 223 224 225 226 227 228
    table = Table()

    table.add_column(Column(obsid.data, name="id"))

    for idx, name in enumerate(bayes_variables):
        table.add_column(Column(np_bayes_mean[:, idx], name="bayes."+name))
        table.add_column(Column(np_bayes_std[:, idx],
                         name="bayes."+name+"_err"))

    table.add_column(Column(np_chi2, name="best.chi_square"))
    table.add_column(Column(np_chi2_red, name="best.reduced_chi_square"))
229

230 231
    for idx, name in enumerate(info_keys):
        table.add_column(Column(np_best[:, idx], name="best."+name))
232

233 234 235
    for idx, name in enumerate(filters):
        table.add_column(Column(np_fluxes[:, idx], name="best."+name,
                                unit='mJy'))
236

237 238 239
    table.write(OUT_DIR+filename+".txt", format='ascii.fixed_width',
                delimiter=None)
    table.write(OUT_DIR+filename+".fits", format='fits')
240 241


242
def dchi2_over_ds2(s, obs_fluxes, obs_errors, mod_fluxes):
243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271
    """Function used to estimate the normalization factor in the SED fitting
    process when upper limits are included in the dataset to fit (from Eq. A11
    in Sawicki M. 2012, PASA, 124, 1008).

    Parameters
    ----------
    s: Float
        Contains value onto which we perform minimization = normalization
        factor
    obs_fluxes: RawArray
        Contains observed fluxes for each filter.
    obs_errors: RawArray
        Contains observed errors for each filter.
    model_fluxes: RawArray
        Contains modeled fluxes for each filter.
    lim_flag: Boolean
        Tell whether we use upper limits (True) or not (False).

   Returns
    -------
    func: Float
        Eq. A11 in Sawicki M. 2012, PASA, 124, 1008).

    """
    # We enter into this function if lim_flag = True.

    # The mask "data" selects the filter(s) for which measured fluxes are given
    # i.e., when obs_fluxes is >=0. and obs_errors >=0.
    # The mask "lim" selects the filter(s) for which upper limits are given
272
    # i.e., when obs_errors < 0
273

274
    wlim = np.where(np.isfinite(obs_errors) & (obs_errors < 0.))
275
    wdata = np.where(obs_errors >= 0.)
276

277 278
    mod_fluxes_data = mod_fluxes[wdata]
    mod_fluxes_lim = mod_fluxes[wlim]
279

280 281
    obs_fluxes_data = obs_fluxes[wdata]
    obs_fluxes_lim = obs_fluxes[wlim]
282

283 284
    obs_errors_data = obs_errors[wdata]
    obs_errors_lim = -obs_errors[wlim]
285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306

    dchi2_over_ds_data = np.sum(
        (obs_fluxes_data-s*mod_fluxes_data) *
        mod_fluxes_data/(obs_errors_data*obs_errors_data))

    dchi2_over_ds_lim = np.sqrt(2./np.pi)*np.sum(
        mod_fluxes_lim*np.exp(
            -np.square(
                (obs_fluxes_lim-s*mod_fluxes_lim)/(np.sqrt(2)*obs_errors_lim)
                      )
                             )/(
            obs_errors_lim*(
                1.+erf(
                   (obs_fluxes_lim-s*mod_fluxes_lim)/(np.sqrt(2)*obs_errors_lim)
                      )
                           )
                               )
                                                )

    func = dchi2_over_ds_data - dchi2_over_ds_lim

    return func
307

Médéric Boquien's avatar
Médéric Boquien committed
308

309 310 311 312 313 314 315 316 317 318 319 320 321 322
def analyse_chi2(chi2):
    """Function to analyse the best chi^2 and find out whether what fraction of
    objects seem to be overconstrainted.

    Parameters
    ----------
    chi2: RawArray
        Contains the reduced chi^2

    """
    chi2_red = np.ctypeslib.as_array(chi2[0])
    # If low values of reduced chi^2, it means that the data are overfitted
    # Errors might be under-estimated or not enough valid data.
    print("\n{}% of the objects have chi^2_red~0 and {}% chi^2_red<0.5"
Médéric Boquien's avatar
Médéric Boquien committed
323 324
          .format(np.round((chi2_red < 1e-12).sum()/chi2_red.size, 1),
                  np.round((chi2_red < 0.5).sum()/chi2_red.size, 1)))
325

326

327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350
def _compute_scaling(model_fluxes, obs_fluxes, obs_errors):
    """Compute the scaling factor to be applied to the model fluxes to best fit
    the observations. Note that we look over the bands to avoid the creation of
    an array of the same size as the model_fluxes array. Because we loop on the
    bands and not on the models, the impact on the performance should be small.

    Parameters
    ----------
    model_fluxes: array
        Fluxes of the models
    obs_fluxes: array
        Observed fluxes
    obs_errors: array
        Observed errors

    Returns
    -------
    scaling: array
        Scaling factors minimising the χ²
    """
    num = np.zeros(model_fluxes.shape[0])
    denom = np.zeros(model_fluxes.shape[0])
    for i in range(obs_fluxes.size):
        if np.isfinite(obs_fluxes[i]):
351 352
            num += model_fluxes[:, i] * (obs_fluxes[i] / (obs_errors[i] *
                                                          obs_errors[i]))
353
            denom += np.square(model_fluxes[:, i] / obs_errors[i])
354 355 356

    return num/denom

357 358

def compute_chi2(model_fluxes, obs_fluxes, obs_errors, lim_flag):
359 360 361 362 363
    """Compute the χ² of observed fluxes with respect to the grid of models. We
    take into account upper limits if need be. Note that we look over the bands
    to avoid the creation of an array of the same size as the model_fluxes
    array. Because we loop on the bands and not on the models, the impact on
    the performance should be small.
364 365 366 367 368 369 370 371 372 373

    Parameters
    ----------
    model_fluxes: array
        2D grid containing the fluxes of the models
    obs_fluxes: array
        Fluxes of the observed object
    obs_errors: array
        Uncertainties on the fluxes of the observed object
    lim_flag: boolean
374
        Boolean indicating whether upper limits should be treated (True) or
375 376 377 378 379 380 381 382 383
        discarded (False)

    Returns
    -------
    chi2: array
        χ² for all the models in the grid
    scaling: array
        scaling of the models to obtain the minimum χ²
    """
384 385
    limits = lim_flag and np.any(obs_errors <= 0.)

386
    scaling = _compute_scaling(model_fluxes, obs_fluxes, obs_errors)
387 388 389 390 391 392 393 394
    # Some observations may not have flux values in some filter(s), but
    # they can have upper limit(s).
    if limits == True:
        scaling_orig = scaling.copy()
        for imod in range(len(model_fluxes)):
            scaling[imod] = optimize.root(dchi2_over_ds2, scaling[imod],
                                          args=(obs_fluxes, obs_errors,
                                                model_fluxes[imod, :])).x
395 396 397 398

    # χ² of the comparison of each model to each observation.
    chi2 = np.zeros(model_fluxes.shape[0])
    for i in range(obs_fluxes.size):
399
        if np.isfinite(obs_fluxes[i]) and obs_errors[i] > 0.:
400 401 402 403 404
            chi2 += np.square(
                (obs_fluxes[i] - model_fluxes[:, i] * scaling) / obs_errors[i])

    # Some observations may not have flux values in some filter(s), but
    # they can have upper limit(s).
405
    if limits == True:
406
        mask_lim = (obs_errors <= 0.)
407 408 409 410
        chi2 += -2. * np.sum(
            np.log(
                np.sqrt(np.pi/2.)*(-obs_errors[mask_lim])*(
                    1.+erf(
411 412
                        (obs_fluxes[mask_lim]-model_fluxes[:, mask_lim] *
                         scaling[:, np.newaxis]) /
413 414 415
                        (np.sqrt(2)*(-obs_errors[mask_lim]))))), axis=1)

    return chi2, scaling
416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440


def weighted_param(param, weights):
    """Compute the weighted mean and standard deviation of an array of data.

    Parameters
    ----------
    param: array
        Values of the parameters for the entire grid of models
    weights: array
        Weights by which to weight the parameter values

    Returns
    -------
    mean: float
        Weighted mean of the parameter values
    std: float
        Weighted standard deviation of the parameter values

    """

    mean = np.average(param, weights=weights)
    std = np.sqrt(np.average((param-mean)**2, weights=weights))

    return (mean, std)