utils.py 7.84 KB
Newer Older
1
2
# -*- coding: utf-8 -*-
# Copyright (C) 2013 Centre de données Astrophysiques de Marseille
3
# Copyright (C) 2013-2014 Institute of Astronomy
4
5
# Copyright (C) 2014 Yannick Roehlly <yannick@iaora.eu>
# Licensed under the CeCILL-v2 licence - see Licence_CeCILL_V2-en.txt
6
# Author: Yannick Roehlly & Médéric Boquien
7

8
9
from datetime import datetime
import os
10
from astropy.table import Table, Column
11
import numpy as np
12
13
from scipy.stats import gaussian_kde
from scipy.linalg import LinAlgError
14
from ...warehouse import SedWarehouse
15
import pcigale.analysis_modules.myglobals as gbl
16
17
18
19
20
21
22
23
24

# Directory where the output files are stored
OUT_DIR = "out/"
# Number of points in the PDF
PDF_NB_POINTS = 1000
# Name of the file containing the analysis results
RESULT_FILE = "analysis_results.fits"
# Name of the file containing the best models information
BEST_MODEL_FILE = "best_models.fits"
25
26


27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
def gen_pdf(values, probabilities, grid):
    """Generate a probability density function

    For a list of values and associated probabilities, this function
    generates a probability density using a weighted gaussian kernel
    density estimation.

    This part should definitely be improved as it is done in the simplest
    way: each value is repeated (probability * 100) times and the standard
    scipy gaussian KDE is used.

    Parameters
    ----------
    values : array like of floats
        The values of the variable.
    probabilities : array like of floats
        The probability associated with each value
    grid : array like of float
        The list of values to which the probability will be evaluated.

    Returns
    -------
    The list of probabilities evaluated at each value of the grid. If there
    is only one input value with a probability superior to 0, the scipy KDE
    algorithm will fail and None is returned.

    """

    # We use masked arrays because in the analysis module the arrays are
    # already masked and the mask is important.
    values = np.ma.array(values, dtype=float)
    probabilities = np.ma.array(probabilities, dtype=float)

    probabilities = np.ma.array(np.around(100. * probabilities),
                                dtype=int, copy=True)

    combined_values = []

    # We must convert the values masked array to list and test each value
    # against None because 0. is a valid value. For the probabilities,
    # we don't have this problem because if the probability is 0 we won't add
    # the value.
    for val_idx, val in enumerate(values.tolist()):
        if val is not None and probabilities[val_idx]:
            combined_values += [val] * probabilities[val_idx]

    try:
        result = gaussian_kde(combined_values)(grid)
75
    except (LinAlgError, ValueError):
76
77
78
        result = None

    return result
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190


def save_best_sed(obsid, creation_modules, params, norm):
    """Save the best SED to a VO table.

    Parameters
    ----------
    obsid: string
        Name of the object. Used to prepend the output file name
    creation_modules: list
        List of modules used to generate the SED
    params: list
        List of parameters to generate the SED
    norm: float
        Normalisation factor to scale the scale to the observations

    """
    with SedWarehouse(cache_type="memory") as sed_warehouse:
        sed = sed_warehouse.get_sed(creation_modules, params)
        sed.to_votable(OUT_DIR + "{}_best_model.xml".format(obsid), mass=norm)


def save_pdf(obsid, analysed_variables, model_variables, likelihood):
    """Save the PDF to a FITS file

    We estimate the probability density functions (PDF) of the parameters using
    a weighted kernel density estimation. This part should definitely be
    improved as we simulate the weight by adding as many value as their
    probability * 100.

    Parameters
    ----------
    obsid: string
        Name of the object. Used to prepend the output file name
    analysed_variables: list
        Analysed variables names
    model_variables: 2D array
        Analysed variables values for all models
    likelihood: array
        Likelihood for all models

    """
    for var_index, var_name in enumerate(analysed_variables):
        values = model_variables[:, var_index]
        pdf_grid = np.linspace(values.min(), values.max(), PDF_NB_POINTS)
        pdf_prob = gen_pdf(values, likelihood, pdf_grid)

        if pdf_prob is None:
            # TODO: use logging
            print("Can not compute PDF for observation <{}> and "
                  "variable <{}>.".format(obsid, var_name))
        else:
            table = Table((
                Column(pdf_grid, name=var_name),
                Column(pdf_prob, name="probability density")
            ))
            table.write(OUT_DIR + "{}_{}_pdf.fits".format(obsid, var_name))


def save_chi2(obsid, analysed_variables, model_variables, reduced_chi2):
    """Save the best reduced χ² versus the analysed variables

    Parameters
    ----------
    obsid: string
        Name of the object. Used to prepend the output file name
    analysed_variables: list
        Analysed variable names
    model_variables: 2D array
        Analysed variables values for all models
    reduced_chi2:
        Reduced χ²

    """
    for var_index, var_name in enumerate(analysed_variables):
        table = Table((
            Column(model_variables[:, var_index],
                   name=var_name),
            Column(reduced_chi2, name="chi2")))
        table.write(OUT_DIR + "{}_{}_chi2.fits".format(obsid, var_name))


def save_table_analysis(obsid, analysed_variables, analysed_averages,
                        analysed_std):
    """Save the estimated values derived from the analysis of the PDF

    Parameters
    ----------
    obsid: table column
        Names of the objects
    analysed_variables: list
        Analysed variable names
    analysed_averages: array
        Analysed variables values estimates
    analysed_std: array
        Analysed variables errors estimates

    """
    result_table = Table()
    result_table.add_column(Column(obsid.data, name="observation_id"))
    for index, variable in enumerate(analysed_variables):
        result_table.add_column(Column(
            analysed_averages[:, index],
            name=variable
        ))
        result_table.add_column(Column(
            analysed_std[:, index],
            name=variable+"_err"
        ))
    result_table.write(OUT_DIR + RESULT_FILE)


191
def save_table_best(obsid, chi2, chi2_red, norm, variables, fluxes, filters):
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
    """Save the values corresponding to the best fit

    Parameters
    ----------
    obsid: table column
        Names of the objects
    chi2: array
        Best χ² for each object
    chi2_red: array
        Best reduced χ² for each object
    norm: array
        Normalisation factor for each object
    variables: list
        All variables corresponding to a SED
    fluxes: 2D array
        Fluxes in all bands for each object
    filters: list
        Filters used to compute the fluxes

    """
    best_model_table = Table()
    best_model_table.add_column(Column(obsid.data, name="observation_id"))
    best_model_table.add_column(Column(chi2, name="chi_square"))
    best_model_table.add_column(Column(chi2_red, name="reduced_chi_square"))

217
    for index, name in enumerate(gbl.info_keys):
218
        column = Column([variable[index] for variable in variables], name=name)
219
        if name in gbl.mass_proportional_info:
220
221
222
223
224
225
226
227
            column *= norm
        best_model_table.add_column(column)

    for index, name in enumerate(filters):
        column = Column(fluxes[:, index] * norm, name=name, unit='mJy')
        best_model_table.add_column(column)

    best_model_table.write(OUT_DIR + BEST_MODEL_FILE)
228
229
230
231
232
233
234
235
236
237
238


def backup_dir(directory):
    if os.path.exists(directory):
        new_name = datetime.now().strftime("%Y%m%d%H%M") + "_" + directory
        os.rename(directory, new_name)
        print("The existing {} directory was renamed to {}".format(
            OUT_DIR,
            new_name
        ))
    os.mkdir(OUT_DIR)