Commit 46274080 authored by Médéric Boquien's avatar Médéric Boquien

Cleaned up version of the upper limit handling code written by Denis. Some...

Cleaned up version of the upper limit handling code written by Denis. Some further optimisations should be easy.
parent f8832560
# -*- coding: utf-8 -*-
# Copyright (C) 2012, 2013 Centre de données Astrophysiques de Marseille
# Copyright (C) 2014 Laboratoire d'Astrophysique de Marseille, AMU
# Copyright (C) 2012, 2014 Centre de données Astrophysiques de Marseille
# Licensed under the CeCILL-v2 licence - see Licence_CeCILL_V2-en.txt
# Author: Yannick Roehlly
# Author: Yannick Roehlly & Denis Burgarella
from importlib import import_module
from collections import OrderedDict
......@@ -149,7 +150,7 @@ def get_module(module_name):
raise
def adjust_errors(flux, error, tolerance, default_error=0.1,
def adjust_errors(flux, error, tolerance, lim_flag, default_error=0.1,
systematic_deviation=0.1):
"""Adjust the errors replacing the 0 values by the default error and
adding the systematic deviation.
......@@ -165,6 +166,8 @@ def adjust_errors(flux, error, tolerance, default_error=0.1,
Observational error in the same unit as the fluxes.
tolerance: float
Tolerance threshold under flux error is considered as 0.
lim_flag: boolean
Do we process upper limits (True) or treat them as no-data (False)?
default_error: float
Default error factor used when the provided error in under the
tolerance threshold.
......@@ -177,7 +180,6 @@ def adjust_errors(flux, error, tolerance, default_error=0.1,
The corrected errors.
"""
# The arrays must have the same lengths.
if len(flux) != len(error):
raise ValueError("The flux and error arrays must have the same "
......@@ -186,16 +188,34 @@ def adjust_errors(flux, error, tolerance, default_error=0.1,
# We copy the error array not to modify the original one.
error = np.copy(error)
# We define:
# 1) upper limit == (lim_flag==True) and
# [(flux > tolerance) and (-9990. < error < tolerance)]
# 2) no data == (flux < -9990.) and (error < -9990.)
# Note that the upper-limit test must be performed before the no-data test
# because if lim_flag is False, we process upper limits as no-data.
#
# Replace errors below tolerance by the default one.
error[error < tolerance] = (default_error * flux[error < tolerance])
mask_noerror = np.logical_and(flux>tolerance, error<-9990.)
error[mask_noerror] = (default_error * flux[mask_noerror])
# Add the systematic error.
error = np.sqrt(np.square(error) + np.square(flux * systematic_deviation))
mask_limflag = np.logical_and.reduce(
(flux>tolerance, error>=-9990., error<tolerance))
# Replace upper limits by no data if lim_flag==False
if not lim_flag:
flux[mask_limflag] = -9999.
error[mask_limflag] = -9999.
mask_ok = np.logical_and(flux>tolerance, error>tolerance)
# Add the systematic error.
error[mask_ok] = np.sqrt(np.square(error[mask_ok]) +
np.square(flux[mask_ok] * systematic_deviation))
return error
def complete_obs_table(obs_table, used_columns, filter_list, tolerance,
def complete_obs_table(obs_table, used_columns, filter_list, tolerance, lim_flag,
default_error=0.1, systematic_deviation=0.1):
"""Complete the observation table
......@@ -215,6 +235,8 @@ def complete_obs_table(obs_table, used_columns, filter_list, tolerance,
The list of filters used in the analysis.
tolerance: float
Tolerance threshold under flux error is considered as 0.
lim_flag: boolean
Do we process upper limits (True) or treat them as no-data (False)?
default_error: float
Default error factor used when the provided error in under the
tolerance threshold.
......@@ -252,6 +274,7 @@ def complete_obs_table(obs_table, used_columns, filter_list, tolerance,
obs_table[name_err] = adjust_errors(obs_table[name],
obs_table[name_err],
tolerance,
lim_flag,
default_error,
systematic_deviation)
return obs_table
......
# -*- coding: utf-8 -*-
# Copyright (C) 2014 Laboratoire d'Astrophysique de Marseille, AMU
# Copyright (C) 2013 Centre de données Astrophysiques de Marseille
# Copyright (C) 2013-2014 Institute of Astronomy
# Copyright (C) 2013-2014 Yannick Roehlly <yannick@iaora.eu>
# Licensed under the CeCILL-v2 licence - see Licence_CeCILL_V2-en.txt
# Author: Yannick Roehlly & Médéric Boquien
# Author: Yannick Roehlly, Médéric Boquien & Denis Burgarella
"""
Probability Density Function analysis module
......@@ -76,6 +77,12 @@ class PdfAnalysis(AnalysisModule):
"the probability density function.",
False
)),
("lim_flag", (
"boolean",
"If true, for each object check whether upper limits are present "
"and analyse them.",
False
)),
("storage_type", (
"string",
"Type of storage used to cache the generate SED.",
......@@ -121,6 +128,7 @@ class PdfAnalysis(AnalysisModule):
n_variables = len(analysed_variables)
save = {key:config["save_{}".format(key)].lower() == "true"
for key in ["best_sed", "chi2", "pdf"]}
lim_flag = config["lim_flag"].lower() == "true"
# Get the needed filters in the pcigale database. We use an ordered
# dictionary because we need the keys to always be returned in the
......@@ -135,7 +143,7 @@ class PdfAnalysis(AnalysisModule):
# Read the observation table and complete it by adding error where
# none is provided and by adding the systematic deviation.
obs_table = complete_obs_table(read_table(data_file), column_list,
filters, TOLERANCE)
filters, TOLERANCE, lim_flag)
n_obs = len(obs_table)
w_redshifting = creation_modules.index('redshifting')
......@@ -208,7 +216,7 @@ class PdfAnalysis(AnalysisModule):
model_fluxes, model_variables, time.time(),
mp.Value('i', 0), analysed_averages, analysed_std,
best_fluxes, best_parameters, best_chi2, best_chi2_red,
save, n_obs)
save, lim_flag, n_obs)
if cores == 1: # Do not create a new process
init_worker_analysis(*initargs)
for idx, obs in enumerate(obs_table):
......
# -*- coding: utf-8 -*-
# Copyright (C) 2013 Centre de données Astrophysiques de Marseille
# Copyright (C) 2013-2014 Institute of Astronomy
# Copyright (C) 2014 Laboratoire d'Astrophysique de Marseille, AMU
# Copyright (C) 2014 Yannick Roehlly <yannick@iaora.eu>
# Licensed under the CeCILL-v2 licence - see Licence_CeCILL_V2-en.txt
# Author: Yannick Roehlly & Médéric Boquien
# Author: Yannick Roehlly, Médéric Boquien & Denis Burgarella
import time
import numpy as np
from scipy import optimize
from scipy.special import erf
from .utils import save_best_sed, save_pdf, save_chi2
from ...warehouse import SedWarehouse
# Probability threshold: models with a lower probability are excluded from
# the moments computation.
# Probability threshold: models with a lower probability are excluded from the
# moments computation.
MIN_PROBABILITY = 1e-20
def init_sed(params, filters, analysed, redshifts, fluxes, variables,
t_begin, n_computed):
"""Initializer of the pool of processes. It is mostly used to convert
......@@ -70,7 +72,7 @@ def init_sed(params, filters, analysed, redshifts, fluxes, variables,
def init_analysis(params, filters, analysed, redshifts, fluxes, variables,
t_begin, n_computed, analysed_averages, analysed_std,
best_fluxes, best_parameters, best_chi2, best_chi2_red, save,
n_obs):
lim_flag, n_obs):
"""Initializer of the pool of processes. It is mostly used to convert
RawArrays into numpy arrays. The latter are defined as global variables to
be accessible from the workers.
......@@ -117,6 +119,7 @@ def init_analysis(params, filters, analysed, redshifts, fluxes, variables,
global gbl_redshifts, gbl_w_redshifts, gbl_analysed_averages
global gbl_analysed_std, gbl_best_fluxes, gbl_best_parameters
global gbl_best_chi2, gbl_best_chi2_red, gbl_save, gbl_n_obs
global gbl_lim_flag
gbl_analysed_averages = np.ctypeslib.as_array(analysed_averages[0])
gbl_analysed_averages = gbl_analysed_averages.reshape(analysed_averages[1])
......@@ -139,8 +142,11 @@ def init_analysis(params, filters, analysed, redshifts, fluxes, variables,
for redshift in gbl_redshifts}
gbl_save = save
gbl_lim_flag = lim_flag
gbl_n_obs = n_obs
def sed(idx):
"""Worker process to retrieve a SED and affect the relevant data to shared
RawArrays.
......@@ -198,10 +204,17 @@ def analysis(idx, obs):
Input data for an individual object
"""
# Tolerance threshold under which any flux or error is considered as 0.
tolerance = 1e-12
global gbl_mod_fluxes, gbl_obs_fluxes, gbl_obs_errors
# We pick up the closest redshift assuming we have limited the number of
# decimals (usually set to 2 decimals).
w = np.where(gbl_w_redshifts[gbl_redshifts[np.abs(obs['redshift'] -
gbl_redshifts).argmin()]])
# We only keep model with fluxes >= -90. If not => no data
model_fluxes = np.ma.masked_less(gbl_model_fluxes[w[0], :], -90.)
model_variables = gbl_model_variables[w[0], :]
......@@ -209,11 +222,27 @@ def analysis(idx, obs):
obs_errors = np.array([obs[name + "_err"] for name in gbl_filters])
# Some observations may not have flux value in some filters, in
# that case the user is asked to put -9999 as value. We mask these
# values. Note, we must mask obs_fluxes after obs_errors.
# that case the user is asked to put -9999. as value. We mask these
# values. Note, we must mask obs_fluxes AFTER obs_errors.
obs_errors = np.ma.masked_where(obs_fluxes < -9990., obs_errors)
obs_fluxes = np.ma.masked_less(obs_fluxes, -9990.)
# Some observations may not have flux values in some filter(s), but
# they can have upper limit(s). To process upper limits, the user
# is asked to put the upper limit as flux value and an error value with
# (obs_errors>=-9990. and obs_errors<0.).
# Next, the user has two options:
# 1) s/he puts True in the boolean lim_flag
# and the limits are processed as upper limits below.
# 2) s/he puts False in the boolean lim_flag
# and the limits are processed as no-data below.
lim_flag = gbl_lim_flag*np.logical_and(obs_errors >= -9990.,
obs_errors < tolerance)
gbl_obs_fluxes = obs_fluxes
gbl_obs_errors = obs_errors
# Normalisation factor to be applied to a model fluxes to best fit
# an observation fluxes. Normalised flux of the models. χ² and
# likelihood of the fitting. Reduced χ² (divided by the number of
......@@ -222,12 +251,41 @@ def analysis(idx, obs):
np.sum(model_fluxes * obs_fluxes / (obs_errors * obs_errors), axis=1) /
np.sum(model_fluxes * model_fluxes / (obs_errors * obs_errors), axis=1)
)
if lim_flag.any() is True:
norm_init = norm_facts
for imod in range(len(model_fluxes)):
gbl_mod_fluxes = model_fluxes[imod, :]
norm_facts[imod] = optimize.newton(dchi2_over_ds2, norm_init[imod],
tol=1e-16)
norm_model_fluxes = model_fluxes * norm_facts[:, np.newaxis]
# χ² of the comparison of each model to each observation.
chi2_ = np.sum(
np.square((obs_fluxes - norm_model_fluxes) / obs_errors),
axis=1)
mask_data = np.logical_and(obs_fluxes > tolerance, obs_errors > tolerance)
if lim_flag.any() is True:
# This mask selects the filter(s) for which measured fluxes are given
# i.e., when (obs_flux is >=0. and obs_errors>=0.) and lim_flag=True
mask_data = (obs_errors >= tolerance)
# This mask selects the filter(s) for which upper limits are given
# i.e., when (obs_flux is >=0. (and obs_errors>=-9990., obs_errors<0.))
# and lim_flag=True
mask_lim = np.logical_and(obs_errors >= -9990., obs_errors < tolerance)
chi2_data = np.sum(np.square(
(obs_fluxes[mask_data]-norm_model_fluxes[:, mask_data]) /
obs_errors[mask_data]), axis=1)
chi2_lim = -2. * np.sum(
np.log(
np.sqrt(np.pi/2.)*(-obs_errors[mask_lim])*(
1.+erf(
(obs_fluxes[mask_lim]-norm_model_fluxes[:, mask_lim]) /
(np.sqrt(2)*(-obs_errors[mask_lim]))))), axis=1)
chi2_ = chi2_data + chi2_lim
else:
chi2_ = np.sum(np.square(
(obs_fluxes[mask_data] - norm_model_fluxes[:, mask_data]) /
obs_errors[mask_data]), axis=1)
# We use the exponential probability associated with the χ² as
# likelihood function.
......@@ -309,3 +367,70 @@ def analysis(idx, obs):
format(n_computed, gbl_n_obs, np.around(t_elapsed, decimals=1),
np.around(n_computed/t_elapsed, decimals=1)),
end="\r")
def dchi2_over_ds2(s):
"""Function used to estimate the normalization factor in the SED fitting
process when upper limits are included in the dataset to fit (from Eq. A11
in Sawicki M. 2012, PASA, 124, 1008).
Parameters
----------
s: Float
Contains value onto which we perform minimization = normalization
factor
obs_fluxes: RawArray
Contains observed fluxes for each filter.
obs_errors: RawArray
Contains observed errors for each filter.
model_fluxes: RawArray
Contains modeled fluxes for each filter.
lim_flag: Boolean
Tell whether we use upper limits (True) or not (False).
Returns
-------
func: Float
Eq. A11 in Sawicki M. 2012, PASA, 124, 1008).
"""
# We enter into this function if lim_flag = True.
# The mask "data" selects the filter(s) for which measured fluxes are given
# i.e., when obs_fluxes is >=0. and obs_errors >=0.
# The mask "lim" selects the filter(s) for which upper limits are given
# i.e., when obs_fluxes is >=0. and obs_errors = 9990 <= obs_errors < 0.
wlim = np.where((gbl_obs_errors >= -9990.)&(gbl_obs_errors < 0.))
wdata = np.where(gbl_obs_errors>=0.)
mod_fluxes_data = gbl_mod_fluxes[wdata]
mod_fluxes_lim = gbl_mod_fluxes[wlim]
obs_fluxes_data = gbl_obs_fluxes[wdata]
obs_fluxes_lim = gbl_obs_fluxes[wlim]
obs_errors_data = gbl_obs_errors[wdata]
obs_errors_lim = -gbl_obs_errors[wlim]
dchi2_over_ds_data = np.sum(
(obs_fluxes_data-s*mod_fluxes_data) *
mod_fluxes_data/(obs_errors_data*obs_errors_data))
dchi2_over_ds_lim = np.sqrt(2./np.pi)*np.sum(
mod_fluxes_lim*np.exp(
-np.square(
(obs_fluxes_lim-s*mod_fluxes_lim)/(np.sqrt(2)*obs_errors_lim)
)
)/(
obs_errors_lim*(
1.+erf(
(obs_fluxes_lim-s*mod_fluxes_lim)/(np.sqrt(2)*obs_errors_lim)
)
)
)
)
func = dchi2_over_ds_data - dchi2_over_ds_lim
return func
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment