Commit e57cb6e6 authored by Médéric Boquien's avatar Médéric Boquien

Computing the parameters and their uncertainties through the histogram of the...

Computing the parameters and their uncertainties through the histogram of the PDF makes the code more complex and can introduce biases in some cases. This patch moves the computation of the histograms to the function that saves them onto the disk. In parallel the estimated value of the parameters and the corresponding uncertainties are simply computed from the weighted mean and standard deviation of the models that are at least 0.1% as likely as the best model to reproduce the observations.
parent 1fccf2f9
......@@ -32,13 +32,11 @@ def save_best_sed(obsid, sed, norm):
sed.to_votable(OUT_DIR + "{}_best_model.xml".format(obsid), mass=norm)
def save_pdf(obsid, analysed_variables, model_variables, likelihood):
"""Save the PDF to a FITS file
def save_pdf(obsid, analysed_variables, model_variables, likelihood, wlikely):
"""Compute and save the PDF to a FITS file
We estimate the probability density functions (PDF) of the parameters using
a weighted kernel density estimation. This part should definitely be
improved as we simulate the weight by adding as many value as their
probability * 100.
We estimate the probability density functions (PDF) of the parameters from
a likelihood-weighted hisogram.
Parameters
----------
......@@ -47,14 +45,38 @@ def save_pdf(obsid, analysed_variables, model_variables, likelihood):
analysed_variables: list
Analysed variables names
model_variables: 2D array
Analysed variables values for all models
likelihood: 2D array
Likelihood for all models
Values of the model variables
likelihood: 1D array
Likelihood of the "likely" models
wlikely: 1D array
Indices of of the likely models to select them from the model_variables
array
"""
for var_index, var_name in enumerate(analysed_variables):
pdf_grid = model_variables[:, var_index]
pdf_prob = likelihood[:, var_index]
# We check how many unique parameter values are analysed and if
# less than Npdf (= 100), the PDF is initally built assuming a
# number of bins equal to the number of unique values for a given
# parameter
Npdf = 100
for i, var_name in enumerate(analysed_variables):
min_hist = np.min(model_variables[wlikely[0], i])
max_hist = np.max(model_variables[wlikely[0], i])
Nhist = min(Npdf, len(np.unique(model_variables[:, i])))
if min_hist == max_hist:
pdf_grid = np.array([min_hist, max_hist])
pdf_prob = np.array([1., 1.])
else:
pdf_prob, pdf_grid = np.histogram(model_variables[wlikely[0], i],
Nhist,
(min_hist, max_hist),
weights=likelihood, density=True)
pdf_x = (pdf_grid[1:]+pdf_grid[:-1]) / 2.
pdf_grid = np.linspace(min_hist, max_hist, Npdf)
pdf_prob = np.interp(pdf_grid, pdf_x, pdf_prob)
if pdf_prob is None:
# TODO: use logging
print("Can not compute PDF for observation <{}> and "
......@@ -330,3 +352,28 @@ def compute_chi2(model_fluxes, obs_fluxes, obs_errors, lim_flag):
(np.sqrt(2)*(-obs_errors[mask_lim]))))), axis=1)
return chi2, scaling
def weighted_param(param, weights):
"""Compute the weighted mean and standard deviation of an array of data.
Parameters
----------
param: array
Values of the parameters for the entire grid of models
weights: array
Weights by which to weight the parameter values
Returns
-------
mean: float
Weighted mean of the parameter values
std: float
Weighted standard deviation of the parameter values
"""
mean = np.average(param, weights=weights)
std = np.sqrt(np.average((param-mean)**2, weights=weights))
return (mean, std)
......@@ -12,7 +12,8 @@ import numpy as np
from scipy.special import erf
import scipy.stats as st
from .utils import save_best_sed, save_pdf, save_chi2, compute_chi2
from .utils import (save_best_sed, save_pdf, save_chi2, compute_chi2,
weighted_param)
from ...warehouse import SedWarehouse
# Probability threshold: models with a lower probability are excluded from the
......@@ -283,46 +284,12 @@ def analysis(idx, obs):
analysed_averages = np.empty(len(gbl_analysed_variables))
analysed_std = np.empty_like(analysed_averages)
# We check how many unique parameter values are analysed and if less
# than Npdf (= 100), the PDF is initally built assuming a number of
# bins equal to the number of unique values for a given parameter
# (e.g., sfr, age, attenuation.uv_bump_amplitude,
# dust.luminosity, attenuation.FUV, etc.).
Npdf = 100
var = np.empty((Npdf, len(analysed_averages)))
pdf = np.empty((Npdf, len(analysed_averages)))
min_hist = np.min(model_variables, axis=0)
max_hist = np.max(model_variables, axis=0)
for i, val in enumerate(analysed_averages):
Nhist = min(Npdf, len(np.unique(model_variables[:, i])))
if min_hist[i] == max_hist[i]:
analysed_averages[i] = model_variables[0, i]
analysed_std[i] = 0.
var[:, i] = max_hist[i]
pdf[:, i] = 1.
else:
pdf_prob, pdf_grid = np.histogram(model_variables[wlikely[0], i],
Nhist,
(min_hist[i], max_hist[i]),
weights=likelihood, density=True)
pdf_x = (pdf_grid[1:]+pdf_grid[:-1])/2
pdf_y = pdf_x * pdf_prob
analysed_averages[i] = np.sum(pdf_y) / np.sum(pdf_prob)
analysed_std[i] = np.sqrt(
np.sum(
np.square(pdf_x-analysed_averages[i]) * pdf_prob
) / np.sum(pdf_prob)
)
analysed_std[i] = max(0.05*analysed_averages[i],
analysed_std[i])
var[:, i] = np.linspace(min_hist[i], max_hist[i], Npdf)
pdf[:, i] = np.interp(var[:, i], pdf_x, pdf_prob)
# TODO Merge with above computation after checking it is fine with a MA
for i in range(len(analysed_averages)):
mean, std = weighted_param(model_variables[wlikely[0], i],
likelihood)
analysed_averages[i] = mean
analysed_std[i] = max(0.05 * mean, std)
gbl_analysed_averages[idx, :] = analysed_averages
gbl_analysed_std[idx, :] = analysed_std
......@@ -342,7 +309,8 @@ def analysis(idx, obs):
save_chi2(obs['id'], gbl_analysed_variables, model_variables,
chi2 / (obs_fluxes.size - 1.))
if gbl_save['pdf']:
save_pdf(obs['id'], gbl_analysed_variables, var, pdf)
save_pdf(obs['id'], gbl_analysed_variables, model_variables,
likelihood, wlikely)
with gbl_n_computed.get_lock():
gbl_n_computed.value += 1
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment