psum.py 25.9 KB
Newer Older
1
2
# -*- coding: utf-8 -*-
"""
3
Copyright (C) 2012, 2013 Centre de données Astrophysiques de Marseille
4
5
6
7
8
9
10
Licensed under the CeCILL-v2 licence - see Licence_CeCILL_V2-en.txt

@author: Yannick Roehlly <yannick.roehlly@oamp.fr>

This file implements the statistical analysis as performed by the calcX2_psum
programme of the Fortran Cigale code.

Yannick Roehlly's avatar
Yannick Roehlly committed
11
The models corresponding to all possible combinations of parameters are
Yannick Roehlly's avatar
Yannick Roehlly committed
12
13
computed are the integrated flux in the same filters as the observations are
used to compute the χ² of the fitting. This χ² give a propability that is
Yannick Roehlly's avatar
Yannick Roehlly committed
14
15
associated with the model values for the parameters. At the end, for each
parameter, the (probability) weighted mean and standard deviation are computed
Yannick Roehlly's avatar
Yannick Roehlly committed
16
17
and the best fitting model (the one with the least reduced χ²) is given.

18
19
"""

20
21
import os
import sys
Yannick Roehlly's avatar
Yannick Roehlly committed
22
import atpy
23
import json
24
import numpy as np
25
from copy import deepcopy
26
from scipy import stats
Yannick Roehlly's avatar
Yannick Roehlly committed
27
from progressbar import ProgressBar
Yannick Roehlly's avatar
Yannick Roehlly committed
28
from matplotlib import pyplot as plt
29
from . import common
Yannick Roehlly's avatar
Yannick Roehlly committed
30
from ..warehouse import SedWarehouse
31
from ..sed import utils
32
from ..sed.modules.common import get_module
Yannick Roehlly's avatar
Yannick Roehlly committed
33
from ..data import Database
34
35
36
37


# Tolerance threshold under which any flux or error is considered as 0.
TOLERANCE = 1.e-12
38
# Name of the fits file containing the results
Yannick Roehlly's avatar
Yannick Roehlly committed
39
RESULT_FILE = 'psum_results.fits'
40
41
# Directory where the output files are storeds
OUT_DIR = 'out/'
42
# Wavelength limits (restframe) when plotting the best SED.
43
44
PLOT_L_MIN = 91
PLOT_L_MAX = 1e6
45
46
47
48
49
50
51
52


class Module(common.AnalysisModule):
    """psum analysis

    TODO: Description of the PSUM method.
    """

Yannick Roehlly's avatar
Yannick Roehlly committed
53
    parameter_list = {
54
55
        "analysed_variables": (
            "array of strings",
56
57
            "List of the variables (in the SEDs info dictionaries) for which "
            "the statistical analysis will be done.",
58
59
            ["sfr", "average_sfr"]
        ),
60
61
62
63
64
        "save_best_sed": (
            "boolean",
            "If true, save the best SED for each observation to a file.",
            False
        ),
65
66
67
68
69
70
71
72
73
74
75
        "plot_best_sed": (
            "boolean",
            "If true, for each observation save a plot of the best SED "
            "and the observed fluxes.",
            False
        ),
        "plot_chi2_distribution": (
            "boolean",
            "If true, for each observation and each analysed variable "
            "plot the value vs reduced chi-square distribution.",
            False
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
        ),
        "save_pdf": (
            "boolean",
            "If true, for each observation and each analysed variable "
            "save the probability density function.",
            False
        ),
        "plot_pdf": (
            "boolean",
            "If true, for each observation and each analysed variable "
            "plot the probability density function.",
            False
        ),
        "pdf_max_bin_number": (
            "integer",
            "Maximum number of bins used to compute the probability density "
            "function. This is only used when saving or printing the PDF. "
            "If there are less values, the probability is given for each "
            "one.",
            50
Yannick Roehlly's avatar
Yannick Roehlly committed
96
97
98
99
100
        ),
        "storage_type": (
            "string",
            "Type of storage used to cache the generate SED.",
            "memory"
101
102
        )
    }
103

Yannick Roehlly's avatar
Yannick Roehlly committed
104
    def process(self, data_file, column_list, sed_modules,
105
106
                sed_modules_params, redshift_module_name,
                redshift_configuration, parameters):
Yannick Roehlly's avatar
Yannick Roehlly committed
107
108
        """Process with the psum analysis.

109
110
111
112
        The analysis is done in two nested loops: over each observation and
        over each theoretical SEDs. We first loop over the SEDs to limit the
        number of time the SEDs are created.

Yannick Roehlly's avatar
Yannick Roehlly committed
113
        Parameters
Yannick Roehlly's avatar
Yannick Roehlly committed
114
        ----------
115
116
117
118
119
120
121
122
        data_file: string
            Name of the file containing the observations to fit.
        column_list: list of strings
            Name of the columns from the data file to use for the analysis.
        sed_modules: list of strings
            List of the module names (in the right order) to use for creating
            the SEDs.
        sed_modules_params: list of dictionaries
Yannick Roehlly's avatar
Yannick Roehlly committed
123
            List of the parameter dictionaries for each module.
124
125
126
127
        redshift_module_name : string
            Name of the module used to redshift the SED.
        redshift_configuration : dictionary
            Configuration dictionary for the module used to redshift the SED.
128
129
        parameters: dictionary
            Dictionnary containing the parameters.
Yannick Roehlly's avatar
Yannick Roehlly committed
130
131

        """
132

133
134
135
136
137
138
139
140
        # Create the output directory and stop it exists.
        try:
            os.mkdir(OUT_DIR)
        except OSError:
            print("pcigale can't create the {} directory, maybe "
                  "it yet exists.".format(OUT_DIR))
            sys.exit()

141
        # Open the warehouse
Yannick Roehlly's avatar
Yannick Roehlly committed
142
143
144
        # TODO Why is parameters["storage_type"] an array?
        sed_warehouse = SedWarehouse(
            cache_type=parameters["storage_type"][0])
145

146
        # Get the parameters
147
        analysed_variables = parameters["analysed_variables"]
148
        save_best_sed = parameters["save_best_sed"]
149
150
        plot_best_sed = parameters["plot_best_sed"]
        plot_chi2_distribution = parameters["plot_chi2_distribution"]
151
152
        save_pdf = parameters["save_pdf"]
        plot_pdf = parameters["plot_pdf"]
153
        pdf_max_bin_number = int(parameters["pdf_max_bin_number"])
154

155
156
157
158
        results = {'galaxy_mass': [], 'galaxy_mass_err': []}
        for variable in analysed_variables:
            results[variable] = []
            results[variable + '_err'] = []
Yannick Roehlly's avatar
Yannick Roehlly committed
159
160
161

        # We get the transmission table and effective wavelength for each
        # used filter.
162
163
        filter_list = [name for name in column_list
                       if not name.endswith('_err')]
Yannick Roehlly's avatar
Yannick Roehlly committed
164
165
166
167
168
169
170
171
172
        transmission = {}
        effective_wavelength = {}
        base = Database()
        for name in filter_list:
            filt = base.get_filter(name)
            transmission[name] = filt.trans_table
            effective_wavelength[name] = filt.effective_wavelength
        base.close()

173
174
175
176
        # We get the redshift module.
        redshift_module = get_module(redshift_module_name)
        redshift_module.parameters = redshift_configuration

177
178
        # Read the observation table and complete it by adding error where
        # none is provided and by adding the systematic deviation.
Yannick Roehlly's avatar
Yannick Roehlly committed
179
        obs_table = atpy.Table(data_file, verbose=False)
Yannick Roehlly's avatar
Yannick Roehlly committed
180
181
182
183
184
185
186
187
188
189
190
191
192
        for name in filter_list:
            name_err = name + '_err'
            if name_err not in column_list:
                if name_err not in obs_table.columns:
                    obs_table.add_column(name_err,
                                         np.zeros(obs_table.data.shape),
                                         dtype=float)
                else:
                    obs_table[name_err] = np.zeros(obs_table.data.shape)

            obs_table[name_err] = adjust_errors(obs_table[name],
                                                obs_table[name_err])

193
194
195
196
197
198
        # As we loop fist on the models (to limit the number of times a model
        # is computed) we need to store the computation results to make the
        # analysis for each observation in a second time. For this, we use a
        # three axis numpy array: axis 1 is the model (based on the index of
        # the sed_module_params list), axis 2 is the observation (base on the
        # data table row index) and axis 3 is the considered variable (based
199
        # on the analysed variables list + reduced_chi2, probability and
200
201
202
203
204
205
        # galaxy_mass at the beginning).
        comp_table = np.zeros((len(sed_modules_params),
                               obs_table.data.shape[0],
                               len(analysed_variables) + 3), dtype=float)
        comp_table[:, :, :] = np.nan

Yannick Roehlly's avatar
Yannick Roehlly committed
206
        # We loop over all the possible theoretical SEDs
Yannick Roehlly's avatar
Yannick Roehlly committed
207
        progress_bar = ProgressBar(maxval=len(sed_modules_params)).start()
Yannick Roehlly's avatar
Yannick Roehlly committed
208
        for model_index, parameters in enumerate(sed_modules_params):
209
            sed = sed_warehouse.get_sed(sed_modules, parameters)
Yannick Roehlly's avatar
Yannick Roehlly committed
210

211
212
213
            # Compute the reduced Chi-square, the galaxy mass (normalisation
            # factor) and probability for each observed SEDs. Add these and
            # the values for the analysed variable to the comp_table.
Yannick Roehlly's avatar
Yannick Roehlly committed
214
            for obs_index in range(obs_table.data.shape[0]):
215
                obs_redshift = obs_table['redshift'][obs_index]
Yannick Roehlly's avatar
Yannick Roehlly committed
216
217
218
219
                obs_fluxes = [obs_table[name][obs_index]
                              for name in filter_list]
                obs_errors = [obs_table[name + '_err'][obs_index]
                              for name in filter_list]
220

221
222
223
224
225
                # We copy the SED before redshifting it.
                red_sed = deepcopy(sed)
                redshift_module.parameters["redshift"] = obs_redshift
                redshift_module.process(red_sed)

226
                # Theoretical fluxes
227
228
229
                theor_fluxes = [red_sed.compute_fnu(transmission[name],
                                                    effective_wavelength[name],
                                                    obs_redshift)
230
231
                                for name in filter_list]

232
233
234
                reduced_chi2, galaxy_mass, probability = compute_chi2(
                    theor_fluxes, obs_fluxes, obs_errors)
                comp_table[model_index, obs_index, 0] = reduced_chi2
235
236
                comp_table[model_index, obs_index, 1] = probability
                comp_table[model_index, obs_index, 2] = galaxy_mass
Yannick Roehlly's avatar
Yannick Roehlly committed
237

238
                for index, variable in enumerate(analysed_variables):
239
240
241
242
243
244
                    if variable in sed.mass_proportional_info:
                        comp_table[model_index, obs_index, index + 3] = \
                            galaxy_mass * sed.info[variable]
                    else:
                        comp_table[model_index, obs_index, index + 3] = \
                            sed.info[variable]
Yannick Roehlly's avatar
Yannick Roehlly committed
245

Yannick Roehlly's avatar
Yannick Roehlly committed
246
247
248
            progress_bar.update(model_index + 1)

        progress_bar.finish()
249

250
251
        # Loop over the observations to find the best fitting model and
        # compute the parametre statistics.
Yannick Roehlly's avatar
Yannick Roehlly committed
252
        for obs_index, obs_name in enumerate(obs_table['id']):
253
254
255
256
257
258
259
260
261
            # We will save the part of the computation table corresponding to
            # the model as a FITS file.
            fits_table = atpy.Table()
            fits_table.table_name = "Analysis computation table"
            fits_table.add_column("reduced_chi-square",
                                  comp_table[:, obs_index, 0])
            fits_table.add_column("probability",
                                  comp_table[:, obs_index, 1])

262
263
            # Find the model corresponding to the least reduced Chi-square;
            # if there more than one model with the minimal chi-square value
Yannick Roehlly's avatar
Yannick Roehlly committed
264
            # only the first is returned.
265
266
267
            best_index = comp_table[:, obs_index, 0].argmin()
            best_chi2 = comp_table[best_index, obs_index, 0]
            best_norm_factor = comp_table[best_index, obs_index, 2]
Yannick Roehlly's avatar
Yannick Roehlly committed
268
            best_params = sed_modules_params[best_index]
269
            best_sed = sed_warehouse.get_sed(sed_modules, best_params)
270
271
272
273
274
            # We need to pass the SED through the redshit/IGM module before
            # plotting it.
            obs_redshift = obs_table['redshift'][obs_index]
            redshift_module.parameters["redshift"] = obs_redshift
            redshift_module.process(best_sed)
275

276
277
278
279
280
281
282
283
            # Save best SED
            # TODO: For now, we only save the lambda vs fnu table. Once
            # we develop a way to serialise the SED, we should save the
            # complete SED object.
            if save_best_sed:
                best_sed_lambda_fnu = best_sed.lambda_fnu(
                    redshift=obs_table['redshift'][obs_index])
                best_sed_table = atpy.Table()
Yannick Roehlly's avatar
Yannick Roehlly committed
284
                best_sed_table.table_name = "Best SED"
285
286
287
288
289
290
291
                best_sed_table.add_column("wavelength",
                                          best_sed_lambda_fnu[0],
                                          "nm")
                best_sed_table.add_column("fnu",
                                          best_norm_factor
                                          * best_sed_lambda_fnu[1],
                                          "mJy")
Yannick Roehlly's avatar
Yannick Roehlly committed
292
293
                best_sed_table.write(OUT_DIR + obs_name + 'bestSED.fits',
                                     verbose=False)
294
295
296
297
298
                # Write the SED modules parameters to a file
                with open(OUT_DIR + obs_name + "bestSED.params", "w") as f:
                    f.write(json.dumps(zip(sed_modules, best_params),
                                       indent=2,
                                       separators=(',', ': ')))
299

Yannick Roehlly's avatar
Yannick Roehlly committed
300
            # Plot the best SED
301
302
303
304
305
            if plot_best_sed:
                best_sed_lambda_fnu = best_sed.lambda_fnu(
                    redshift=obs_table['redshift'][obs_index])
                figure = plt.figure()
                ax = figure.add_subplot(111)
306
                plot_x, plot_y = best_sed_lambda_fnu
307
308
309
310
311
312
                plot_mask = (
                    (plot_x >= utils.redshift_wavelength(PLOT_L_MIN,
                                                         obs_redshift)) &
                    (plot_x <= utils.redshift_wavelength(PLOT_L_MAX,
                                                         obs_redshift))
                )
313
314
                ax.loglog(plot_x[plot_mask],
                          best_norm_factor * plot_y[plot_mask],
315
316
317
318
319
320
321
                          '-b')
                ax.loglog([effective_wavelength[name] for name in filter_list],
                          [obs_table[name][obs_index] for name in filter_list],
                          'or')
                ax.set_xlabel('Wavelength [nm]')
                ax.set_ylabel('Flux [mJy]')
                ax.set_title(obs_name +
322
                             ' best fitting SED - reduced chi2:' +
323
324
                             str(best_chi2))
                figure.savefig(OUT_DIR + obs_name + '_bestSED.pdf')
Yannick Roehlly's avatar
Yannick Roehlly committed
325

326
327
328
329
            # Compute the statistics for the desired variables. First, we
            # build the probability distribution function (PDF) for the
            # variable, then we use it to compute the expected value and
            # the standard deviation.
330
331
            for index, variable in enumerate(['galaxy_mass'] +
                                             analysed_variables):
332
333
                # The variable index in the last axis of comp_table is
                # index+2 as chi2 and probability are at the beginning.
Yannick Roehlly's avatar
Yannick Roehlly committed
334
                # values at the beginning.
335
                idx = index + 2
Yannick Roehlly's avatar
Yannick Roehlly committed
336

337
338
339
                values = comp_table[:, obs_index, idx]
                probabilities = comp_table[:, obs_index, 1]

340
341
                fits_table.add_column(variable, values)

342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
                # We compute the Probability Density Function by binning the
                # data into evenly populated bins. To estimate the binning
                # quality, we also provide the standard deviation of the
                # variable and probability values in the bin.
                pdf_values = []
                pdf_probs = []
                pdf_value_sigma = []
                pdf_prob_sigma = []

                # The maximum number of bins is the least between
                # pdf_max_bin_number and the number of distinct values
                # for the analyse variable.
                pdf_bin_number = min(pdf_max_bin_number,
                                     len(np.unique(values)))

                pdf_bin_boundaries, pdf_bins = bin_evenly(values,
                                                          pdf_bin_number)
                pdf_bin_sizes = [
                    pdf_bin_boundaries[i + 1] - pdf_bin_boundaries[i]
                    for i in range(pdf_bin_number)
                ]

                for bin in range(1, pdf_bin_number + 1):
                    # The bin probability is the average of the probabilities
                    # in the bin.
                    pdf_probs.append(
                        np.average(probabilities[pdf_bins == bin]))
                    pdf_prob_sigma.append(
                        np.std(probabilities[pdf_bins == bin])
                    )
                    pdf_values.append(
                        (pdf_bin_boundaries[bin] +
                         pdf_bin_boundaries[bin - 1]) / 2
                    )
                    pdf_value_sigma.append(
                        np.std(values[pdf_bins == bin])
                    )

                pdf_probs = np.array(pdf_probs)
                pdf_prob_sigma = np.array(pdf_prob_sigma)
                pdf_values = np.array(pdf_values)
                pdf_value_sigma = np.array(pdf_value_sigma)

Yannick Roehlly's avatar
Yannick Roehlly committed
385
386
387
                # We normalise the PDF to 1 over all the parameter values.
                pdf_probs = pdf_probs / np.sum(pdf_bin_sizes * pdf_probs)

388
389
                # From the PDF, compute the expected value and standard
                # deviation.
Yannick Roehlly's avatar
Yannick Roehlly committed
390
391
392
                expected_value = np.sum(pdf_values * pdf_bin_sizes * pdf_probs)

                sigma = np.sqrt(
393
394
395
                    np.sum((pdf_values - expected_value) ** 2 *
                           pdf_bin_sizes *
                           pdf_probs)
Yannick Roehlly's avatar
Yannick Roehlly committed
396
                )
397
                results[variable].append(expected_value)
398
399
                results[variable + '_err'].append(sigma)

400
                # We plot all the (value, reduced_chi2) tuples.
401
402
403
404
405
406
407
408
409
410
411
                if plot_chi2_distribution:
                    figure = plt.figure()
                    ax = figure.add_subplot(111)
                    ax.plot(values,
                            comp_table[:, obs_index, 0],
                            '.')
                    ax.set_xlabel('value')
                    ax.set_ylabel('reduced chi square')
                    ax.set_title(variable)
                    figure.savefig(OUT_DIR +
                                   obs_name + '_' + variable + '_chi2plot.pdf')
Yannick Roehlly's avatar
Yannick Roehlly committed
412

413
414
                if save_pdf:
                    pdf_table = atpy.Table()
Yannick Roehlly's avatar
Yannick Roehlly committed
415
                    pdf_table.table_name = "Probability Density Function"
416
417
418
419
420
421
422
423
424
                    pdf_table.add_column("bin_start",
                                         pdf_bin_boundaries[:-1])
                    pdf_table.add_column("bin_end",
                                         pdf_bin_boundaries[1:])
                    pdf_table.add_column("value", pdf_values)
                    pdf_table.add_column("value_sigma", pdf_value_sigma)
                    pdf_table.add_column("probability", pdf_probs)
                    pdf_table.add_column("probability_sigma", pdf_prob_sigma)
                    pdf_table.write(OUT_DIR + obs_name + "_" + variable +
Yannick Roehlly's avatar
Yannick Roehlly committed
425
                                    "_pdf.fits", verbose=False)
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446

                if plot_pdf:
                    figure = plt.figure()
                    ax = figure.add_subplot(111)
                    ax.bar(
                        pdf_bin_boundaries[:-1],
                        pdf_probs,
                        pdf_bin_sizes
                    )
                    ax.axvline(expected_value, color="r")
                    ax.axvline(expected_value - sigma,
                               color="r",
                               linestyle="-.")
                    ax.axvline(expected_value + sigma,
                               color="r",
                               linestyle="-.")
                    ax.set_title(obs_name + ' ' + variable + ' PDF')
                    ax.set_xlabel(variable)
                    ax.set_ylabel('Probability')
                    figure.savefig(OUT_DIR + obs_name + "_" + variable +
                                   "_pdf.pdf")
447

448
        # Write the computation table FITS
Yannick Roehlly's avatar
Yannick Roehlly committed
449
        fits_table.write(OUT_DIR + obs_name + '_comptable.fits', verbose=False)
450

451
452
        # Write the results to the fits file
        result_table = atpy.Table()
Yannick Roehlly's avatar
Yannick Roehlly committed
453
        result_table.table_name = "Analysis results"
454
        result_table.add_column('id', obs_table['id'])
455
456
457
458
        for variable in (['galaxy_mass'] + analysed_variables):
            result_table.add_column(variable, results[variable])
            result_table.add_column(variable + '_err',
                                    results[variable + '_err'])
Yannick Roehlly's avatar
Yannick Roehlly committed
459
        result_table.write(OUT_DIR + RESULT_FILE, verbose=False)
460

Yannick Roehlly's avatar
Yannick Roehlly committed
461
462
        sed_warehouse.close()

463
464
465
466
467
468
469
470

def adjust_errors(flux, error, default_error=0.1, systematic_deviation=0.1):
    """Adjust the errors replacing the 0 values by the default error and
    adding the systematic deviation.

    The systematic deviation change the error to:
    sqrt( error² + (flux * deviation)² )

Yannick Roehlly's avatar
Yannick Roehlly committed
471
    Parameters
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
    ----------
    flux : array of floats
        Fluxes.
    error : array of floats
        Observational error in the same unit as the fluxes.
    default_error : float
        Default error factor used when the provided error in under the
        tolerance threshold.
    systematic_deviation : float
        Systematic deviation added to the error.

    Returns
    -------
    error : array of floats
        The corrected errors.

    """

    # The arrays must have the same lengths.
    if len(flux) != len(error):
        raise ValueError("The flux and error arrays must have the same "
                         "length.")

    # We copy the error array not to modify the original one.
    error = np.copy(error)

    # Replace errors below tolerance by the default one.
    error[error < TOLERANCE] = (default_error * error[error < TOLERANCE])

    # Add the systematic error.
    error = np.sqrt(np.square(error) + np.square(flux * systematic_deviation))

    return error


def compute_chi2(model_fluxes, obs_fluxes, obs_errors):
    """Compute chi square value and normalisation factor for the comparison
    of a model fluxes to observational ones.

Yannick Roehlly's avatar
Yannick Roehlly committed
511
    Parameters
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
    ----------
    model_fluxes : array of floats
        Model fluxes.
    obs_fluxes : array of floats
        Observation fluxes for the same filters as the model ones and
        in the same unit.
    obs_errors : array of floats
        Error the observation flux. The error must be Gaussian for the
        chi-square to be meaning full.

    Returns
    -------
    chi2_reduced : float
        Reduced chi square value for the comparison. The maximum Chi square
        value returned is 99.
    normalisation_factor : float
        Normalisation factor that must be applied to the model to fit the
        observation.
530
    probability: float
531
        Probability associated with the chi-square.
532
533
534
535
536
537
538
539
540
541

    """

    # The three arrays must have the same length.
    if (len(model_fluxes) != len(obs_fluxes) or
            len(obs_fluxes) != len(obs_errors)):
        raise ValueError("The model fluxes, observation fluxes and "
                         "observation errors arrays must have the "
                         "same length.")

542
    # We copy the arrays not to modify the original ones.
543
544
545
546
547
548
549
550
551
552
553
    model_fluxes = np.copy(model_fluxes)
    obs_fluxes = np.copy(obs_fluxes)
    obs_errors = np.copy(obs_errors)

    # If no observed flux is over the tolerance threshold, or if any error,
    # for valid fluxes, is under the threshold then the observation is set
    # as not fitting at all.
    if (max(obs_fluxes) < TOLERANCE or
            min(obs_errors[obs_fluxes > TOLERANCE]) < TOLERANCE):
        reduced_chi2 = 99
        normalisation_factor = 1
554
        probability = 0
555
556
557
558
559
560
561
562
    else:
        # We make the computation using only the filters for which the
        # observation error is over the tolerance threshold.
        (model_fluxes, obs_fluxes, obs_errors) = \
            (model_fluxes[obs_errors > TOLERANCE],
             obs_fluxes[obs_errors > TOLERANCE],
             obs_errors[obs_errors > TOLERANCE])

563
        # We consider that we fit the observation to the SED, independently of
564
        # the number of parameters used to build it. So the number of degrees
565
        # of freedom depends only on the number of fluxes.
566
567
568
569
570
        degrees_of_freedom = len(model_fluxes) - 1

        if degrees_of_freedom == 0:
            #FIXME
            reduced_chi2 = 0
Yannick Roehlly's avatar
Yannick Roehlly committed
571
            normalisation_factor = np.sum(obs_fluxes) / np.sum(model_fluxes)
572
            probability = 1
573
        else:
574
575
576
577
            normalisation_factor = (np.sum(obs_fluxes * model_fluxes /
                                          (obs_errors * obs_errors)) /
                                    np.sum(model_fluxes * model_fluxes /
                                           (obs_errors * obs_errors)))
578
            norm_model_fluxes = normalisation_factor * model_fluxes
Yannick Roehlly's avatar
Yannick Roehlly committed
579
580
            chi2 = np.sum(np.square((obs_fluxes - norm_model_fluxes) /
                                    obs_errors))
581
            reduced_chi2 = chi2 / degrees_of_freedom
582
583
            reduced_chi2 = min(reduced_chi2, 99)

584
585
586
            # We use the exponential probability associated with the
            # chi square.
            probability = np.exp(-chi2 / 2)
587
588
589
590

    return reduced_chi2, normalisation_factor, probability


591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
def bin_evenly(values, max_bins):
    """Divide some values in evenly populated bins

    Given a list of values and a desired number of bins, this method computes
    the bins boundaries to have bins with the same number of elements in each
    one and digitises the value list with these boundaries.

    Parameters
    ----------
    values : list of floats
        List of values to be binned.
    max_bins : integer
        Maximum number of bins. If there are less distinct value, every value
        is in it's own bin.

    Returns
    -------
    boundaries : array of floats
        The value of the boundaries of the bins.
    bins_digits : numpy array of integers
        Array of the same length as the value list giving for each value the
        bin number (between 1 and nb_of_bins) it belongs to.

    """
    # If there are less values than asked bins, raise an error.
    if max_bins > len(values):
        max_bins = len(values)

    # The bin boundaries are the nb_of_bins + 1 quantiles.
    quantiles = np.linspace(0, 1, max_bins + 1)
    boundaries = stats.mstats.mquantiles(values, quantiles)

    # Because of the way np.digitize works, we must have the last boundary
    # higher than the value maximum to have this maximum belong to the last
    # bin.
    digitize_boundaries = np.copy(boundaries)
    digitize_boundaries[-1] += 1
    bin_digits = np.digitize(values, digitize_boundaries)

    return (boundaries, bin_digits)