Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
cigale
CIGALE
Commits
c17db61e
Commit
c17db61e
authored
Feb 18, 2014
by
Yannick Roehlly
Browse files
First version of the pdf_analysis module
Maybe we should change the name.
parent
f23e702a
Changes
2
Hide whitespace changes
Inline
Side-by-side
pcigale/analysis_modules/pdf_analysis/__init__.py
0 → 100644
View file @
c17db61e
# -*- coding: utf-8 -*-
# Copyright (C) 2013 Centre de données Astrophysiques de Marseille
# Copyright (C) 2013-2014 Yannick Roehlly
# Licensed under the CeCILL-v2 licence - see Licence_CeCILL_V2-en.txt
# Author: Yannick Roehlly
"""
Probability Density Function analysis module
============================================
This module builds the probability density functions (PDF) of the SED
parameters to compute their moments.
The models corresponding to all possible combinations of parameters are
computed and their fluxes in the same filters as the observations are
integrated. These fluxes are compared to the observed ones to compute the
χ² value of the fitting. This χ² give a probability that is associated with
the model values for the parameters.
At the end, for each parameter, the probability-weighted mean and standard
deviation are computed and the best fitting model (the one with the least
reduced χ²) is given for each observation.
"""
import
os
import
numpy
as
np
from
numpy
import
newaxis
from
collections
import
OrderedDict
from
datetime
import
datetime
from
progressbar
import
ProgressBar
from
astropy.table
import
Table
,
Column
from
...utils
import
read_table
from
..
import
AnalysisModule
,
complete_obs_table
from
...creation_modules
import
get_module
as
get_creation_module
from
.utils
import
gen_compute_fluxes_at_redshift
from
...warehouse
import
SedWarehouse
from
...data
import
Database
# Tolerance threshold under which any flux or error is considered as 0.
TOLERANCE
=
1.e-12
# Probability threshold: models with a lower probability are excluded from
# the moments computation.
MIN_PROBABILITY
=
1.e-20
# Name of the file containing the analysis results
RESULT_FILE
=
"analysis_results.fits"
# Name of the file containing the best models information
BEST_MODEL_FILE
=
"best_models.fits"
# Directory where the output files are stored
OUT_DIR
=
"out/"
# Wavelength limits (restframe) when plotting the best SED.
PLOT_L_MIN
=
91
PLOT_L_MAX
=
1e6
class
PdfAnalysis
(
AnalysisModule
):
"""PDF analysis module"""
parameter_list
=
OrderedDict
([
(
"analysed_variables"
,
(
"array of strings"
,
"List of the variables (in the SEDs info dictionaries) for which "
"the statistical analysis will be done."
,
[
"sfr"
,
"average_sfr"
]
)),
(
"use_observation_redshift"
,
(
"boolean"
,
"If true, the redshift of each observation will be taken from the "
"redshift column in the observation table (default) and you must "
"not use a redshifting module. Set to false if you want to use a "
"redshifting module to test various redshifts."
,
True
)),
(
"save_best_sed"
,
(
"boolean"
,
"If true, save the best SED for each observation to a file."
,
False
)),
(
"plot_best_sed"
,
(
"boolean"
,
"If true, for each observation save a plot of the best SED "
"and the observed fluxes."
,
False
)),
(
"plot_chi2_distribution"
,
(
"boolean"
,
"If true, for each observation and each analysed variable "
"plot the value vs reduced chi-square distribution."
,
False
)),
(
"save_pdf"
,
(
"boolean"
,
"If true, for each observation and each analysed variable "
"save the probability density function."
,
False
)),
(
"plot_pdf"
,
(
"boolean"
,
"If true, for each observation and each analysed variable "
"plot the probability density function."
,
False
)),
(
"pdf_max_bin_number"
,
(
"integer"
,
"Maximum number of bins used to compute the probability density "
"function. This is only used when saving or printing the PDF. "
"If there are less values, the probability is given for each "
"one."
,
50
)),
(
"storage_type"
,
(
"string"
,
"Type of storage used to cache the generate SED."
,
"memory"
))
])
def
process
(
self
,
data_file
,
column_list
,
creation_modules
,
creation_modules_params
,
parameters
):
"""Process with the psum analysis.
The analysis is done in two nested loops: over each observation and
over each theoretical SEDs. We first loop over the SEDs to limit the
number of time the SEDs are created.
Parameters
----------
data_file: string
Name of the file containing the observations to fit.
column_list: list of strings
Name of the columns from the data file to use for the analysis.
creation_modules: list of strings
List of the module names (in the right order) to use for creating
the SEDs.
creation_modules_params: list of dictionaries
List of the parameter dictionaries for each module.
parameters: dictionary
Dictionary containing the parameters.
"""
# Rename the output directory if it exists
if
os
.
path
.
exists
(
OUT_DIR
):
new_name
=
datetime
.
now
().
strftime
(
"%Y%m%d%H%M"
)
+
"_"
+
OUT_DIR
os
.
rename
(
OUT_DIR
,
new_name
)
print
(
"The existing {} directory was renamed to {}"
.
format
(
OUT_DIR
,
new_name
))
os
.
mkdir
(
OUT_DIR
)
# Get the parameters
analysed_variables
=
parameters
[
"analysed_variables"
]
use_observation_redshift
=
(
parameters
[
"use_observation_redshift"
]
.
lower
()
==
"true"
)
save_best_sed
=
(
parameters
[
"save_best_sed"
].
lower
()
==
"true"
)
plot_best_sed
=
(
parameters
[
"plot_best_sed"
].
lower
()
==
"true"
)
plot_chi2_distribution
=
(
parameters
[
"plot_chi2_distribution"
].
lower
()
==
"true"
)
save_pdf
=
(
parameters
[
"save_pdf"
].
lower
()
==
"true"
)
plot_pdf
=
(
parameters
[
"plot_pdf"
].
lower
()
==
"true"
)
pdf_max_bin_number
=
int
(
parameters
[
"pdf_max_bin_number"
])
# Get the needed filters in the pcigale database. We use an ordered
# dictionary because we need the keys to always be returned in the
# same order.
with
Database
()
as
base
:
filters
=
OrderedDict
([(
name
,
base
.
get_filter
(
name
))
for
name
in
column_list
if
not
name
.
endswith
(
'_err'
)])
# If needed, get the redshifting and IGM attenuation modules (we set
# redshift=0 but the redshift value is adapted later).
redshifting_module
=
None
igm_module
=
None
if
use_observation_redshift
:
redshifting_module
=
get_creation_module
(
"redshifting"
,
redshift
=
0
)
igm_module
=
get_creation_module
(
"igm_attenuation"
)
# Read the observation table and complete it by adding error where
# none is provided and by adding the systematic deviation.
obs_table
=
complete_obs_table
(
read_table
(
data_file
),
column_list
,
filters
,
TOLERANCE
)
##################################################################
# Model computation #
##################################################################
print
(
"Computing the models fluxes..."
)
# First, we compute for all the possible theoretical models (one for
# each parameter set in sed_module_parameters) the fluxes in all the
# filters at the redshift of each observed galaxy. These fluxes are
# stored in:
# model_fluxes:
# - axis 0: model index
# - axis 1: observation index
# - axis 2: filter index
# We use a numpy masked array to mask the fluxes of models that would
# be older than the age of the Universe at the considered redshift
# The values for the analysed variables are stored in:
# model_variables:
# - axis 0: the model index in sed_module_params
# - axis 1: the variable index in analysed_variables
model_fluxes
=
np
.
ma
.
zeros
((
len
(
creation_modules_params
),
len
(
obs_table
),
len
(
filters
)))
model_variables
=
np
.
ma
.
zeros
((
len
(
creation_modules_params
),
len
(
analysed_variables
)))
# We keep the information (i.e. the content of the sed.info
# dictionary) for each model.
model_info
=
[]
progress_bar
=
ProgressBar
(
maxval
=
len
(
creation_modules_params
)).
start
()
# The SED warehouse is used to retrieve SED corresponding to some
# modules and parameters.
with
SedWarehouse
(
cache_type
=
parameters
[
"storage_type"
])
as
\
sed_warehouse
:
for
model_index
,
parameters
in
enumerate
(
creation_modules_params
):
sed
=
sed_warehouse
.
get_sed
(
creation_modules
,
parameters
)
# Cached function to compute the SED fluxes at a redshift
gen_fluxes
=
gen_compute_fluxes_at_redshift
(
sed
,
filters
.
values
(),
redshifting_module
,
igm_module
)
for
obs_index
,
redshift
in
enumerate
(
obs_table
[
"redshift"
]):
model_fluxes
[
model_index
,
obs_index
]
=
gen_fluxes
(
redshift
)
model_variables
[
model_index
]
=
np
.
array
(
[
sed
.
info
[
name
]
for
name
in
analysed_variables
]
)
model_info
.
append
(
sed
.
info
.
values
())
progress_bar
.
update
(
model_index
+
1
)
# Mask the invalid fluxes
model_fluxes
=
np
.
ma
.
masked_less
(
model_fluxes
,
-
99
)
progress_bar
.
finish
()
##################################################################
# Observations to models comparison #
##################################################################
print
(
"Comparing the observations to the models..."
)
# To accelerate the computations, we put observations fluxes and
# errors in multi-dimensional numpy array:
# - axis 0: the observation index
# - axis 1: the filter index
obs_fluxes
=
np
.
array
([
obs_table
[
name
]
for
name
in
filters
]
).
transpose
()
obs_errors
=
np
.
array
([
obs_table
[
name
+
"_err"
]
for
name
in
filters
]
).
transpose
()
# Some observations may not have flux value in some filters, in that
# case the user is asked to put -9999 as value. We mask these values.
# Note, we must mask obs_fluxes after obs_errors.
obs_errors
=
np
.
ma
.
masked_where
(
obs_fluxes
<
-
9990.
,
obs_errors
)
obs_fluxes
=
np
.
ma
.
masked_less
(
obs_fluxes
,
-
9990.
)
# Normalisation factor to be applied to a model fluxes to best fit an
# observation fluxes. Normalised flux of the models. χ² and
# likelihood of the fitting. Reduced χ² (divided by the number of
# filters to do the fit).
# axis 0: model index
# axis 1: observation index
normalisation_factors
=
(
np
.
sum
(
model_fluxes
*
obs_fluxes
[
newaxis
,
:,
:]
/
(
obs_errors
*
obs_errors
)[
newaxis
,
:,
:],
axis
=
2
)
/
np
.
sum
(
model_fluxes
*
model_fluxes
/
(
obs_errors
*
obs_errors
)[
newaxis
,
:,
:],
axis
=
2
)
)
norm_model_fluxes
=
model_fluxes
*
normalisation_factors
[:,
:,
newaxis
]
# χ² of the comparison of each model to each observation. The
# chi_squares array is:
# axis 0: model index
# axis 1: observation index
chi_squares
=
np
.
sum
(
np
.
square
(
(
obs_fluxes
[
newaxis
,
:,
:]
-
norm_model_fluxes
)
/
obs_errors
[
newaxis
,
:,
:]
),
axis
=
2
)
# We define the reduced χ² as the χ² divided by the number of fluxes
# used for the fitting.
reduced_chi_squares
=
chi_squares
/
obs_fluxes
.
count
(
1
)[
newaxis
,
:]
# We use the exponential probability associated with the χ² as
# likelihood function. The likelihood array is:
# axis 0: model index
# axis 1: observation index
likelihood
=
np
.
exp
(
-
chi_squares
/
2
)
# For the analysis, we consider that the computed models explain each
# observation. We normalise the likelihood function to have a
# total likelihood of 1 for each observation.
likelihood
/=
np
.
sum
(
likelihood
,
axis
=
0
)[
newaxis
,
:]
# We don't want to take into account the models with a probability
# less that the threshold.
likelihood
=
np
.
ma
.
masked_less
(
likelihood
,
MIN_PROBABILITY
)
# We re-normalise the likelihood.
likelihood
/=
np
.
sum
(
likelihood
,
axis
=
0
)[
newaxis
,
:]
# Some model variables depend on the galaxy mass (the normalisation
# factor) so we expand the model_variables array to have a new axis
# corresponding to the observation and multiply (when needed) the
# value of the variables by the normalisation factor of the fitting
# for each observation.
# The new array will have be:
# axis 0: model index
# axis 1: observation index
# axis 2: variable index
model_variables
=
model_variables
[:,
newaxis
,
:].
repeat
(
len
(
obs_table
),
axis
=
1
)
# We take the mass-dependent variable list from the last computed sed.
for
index
,
variable
in
enumerate
(
analysed_variables
):
if
variable
in
sed
.
mass_proportional_info
:
model_variables
[:,
:,
index
]
*=
normalisation_factors
# We also add the galaxy mass to the analysed variables
analysed_variables
.
insert
(
0
,
"galaxy_mass"
)
model_variables
=
np
.
dstack
((
normalisation_factors
,
model_variables
))
##################################################################
# Variable analysis #
##################################################################
# We compute the weighted average and standard deviation using the
# likelihood as weight. We first build the weight array by expanding
# the likelihood along a new axis corresponding to the analysed
# variable.
weights
=
likelihood
[:,
:,
newaxis
].
repeat
(
len
(
analysed_variables
),
axis
=
2
)
# Analysed variables average and standard devisation arrays.
# axis 0: observation index
# axis 1: variable index
analysed_averages
=
np
.
ma
.
average
(
model_variables
,
axis
=
0
,
weights
=
weights
)
analysed_variances
=
np
.
ma
.
average
(
(
model_variables
-
analysed_averages
[
newaxis
,
:,
:])
**
2
,
axis
=
0
,
weights
=
weights
)
analysed_std
=
np
.
ma
.
sqrt
(
analysed_variances
)
# Create and save the result table.
result_table
=
Table
()
result_table
.
add_column
(
Column
(
obs_table
[
"id"
].
data
,
name
=
"observation_id"
))
for
index
,
variable
in
enumerate
(
analysed_variables
):
result_table
.
add_column
(
Column
(
analysed_averages
[:,
index
],
name
=
variable
))
result_table
.
add_column
(
Column
(
analysed_std
[:,
index
],
name
=
variable
+
"_err"
))
result_table
.
write
(
OUT_DIR
+
RESULT_FILE
)
##################################################################
# Best models #
##################################################################
# We define the best fitting model for each observation as the one
# with the least χ².
best_model_index
=
list
(
reduced_chi_squares
.
argmin
(
axis
=
0
))
# We take the list of information added to the SEDs from the last
# computed one.
model_info_names
=
sed
.
info
.
keys
()
best_model_table
=
Table
()
best_model_table
.
add_column
(
Column
(
obs_table
[
"id"
].
data
,
name
=
"observation_id"
))
best_model_table
.
add_column
(
Column
(
chi_squares
[
best_model_index
,
range
(
len
(
best_model_index
))],
name
=
"chi_square"
))
best_model_table
.
add_column
(
Column
(
reduced_chi_squares
[
best_model_index
,
range
(
len
(
best_model_index
))],
name
=
"reduced_chi_square"
))
best_model_table
.
add_column
(
Column
(
normalisation_factors
[
best_model_index
,
range
(
len
(
best_model_index
))],
name
=
"galaxy_mass"
,
unit
=
"Msun"
))
for
index
,
name
in
enumerate
(
model_info_names
):
best_model_table
.
add_column
(
Column
(
[
model_info
[
model_idx
][
index
]
for
model_idx
in
best_model_index
],
name
=
name
))
best_model_table
.
write
(
OUT_DIR
+
BEST_MODEL_FILE
)
# AnalysisModule to be returned by get_module
Module
=
PdfAnalysis
pcigale/analysis_modules/pdf_analysis/utils.py
0 → 100644
View file @
c17db61e
# -*- coding: utf-8 -*-
# Copyright (C) 2013 Centre de données Astrophysiques de Marseille
# Copyright (C) 2014 Yannick Roehlly <yannick@iaora.eu>
# Licensed under the CeCILL-v2 licence - see Licence_CeCILL_V2-en.txt
# Author: Yannick Roehlly
import
numpy
as
np
from
copy
import
deepcopy
from
...sed.cosmology
import
cosmology
from
...creation_modules
import
get_module
as
get_creation_module
def
gen_compute_fluxes_at_redshift
(
sed
,
filters
,
redshifting_module
,
igm_module
):
""""Generate function to compute the fluxes of a SED at a given redshift
Given a SED, a list of filters and a redshift module name, this generator
returns a function computing the fluxes of the SED in all the filters at
a given redshift. If the SED is older than the Universe at the given
redshift, the fluxes returned by this function will be all -99.
If the redshift module name is None, the SED is not redshifted by the
returned function. This means that it will always return the same fluxes,
whatever its redshift parameter. This can be used for computing
photometric redshifts.
Parameters
----------
sed : pcigale.sed
The pcigale SED object.
filters : list of pcigale.data.filters
List of pcigale filters objects.
redshifting_module : picgale.creation_modules.Module
A pcigale SED creation module to redshift the SED. It must accept a
redshift parameter (or be None).
igm_module : picgale.creation_modules.Module
A pcigale SED creation module to add the IGM attenuation to the SED.
Is not used if the redshifting module is None.
Return
------
gen_fluxes : function
Function computing the fluxes of the SED in all filters at a given
redshift.
"""
# The returned function is memoized.
cache
=
{}
def
gen_fluxes
(
redshift
):
"""Compute the flux of the SED in various filters.
Parameters
----------
redshift : float
Returns
-------
array fo floats
"""
# If the function is generated without a redshift module, it always
# computes the fluxes at the SED redshift (the SED may be already
# redshifted).
if
not
redshifting_module
:
redshift
=
sed
.
redshift
if
redshift
not
in
cache
:
# Age of the Universe at redshift.
# astropy 0.3 cosmology functions return quantities
try
:
age_at_redshift
=
cosmology
.
age
(
redshift
).
value
*
1000
except
AttributeError
:
age_at_redshift
=
cosmology
.
age
(
redshift
)
*
1000
if
sed
.
info
[
"age"
]
>
age_at_redshift
:
cache
[
redshift
]
=
-
99
*
np
.
ones
(
len
(
filters
))
else
:
if
redshifting_module
:
red_sed
=
deepcopy
(
sed
)
redshifting_module
.
parameters
[
"redshift"
]
=
redshift
redshifting_module
.
process
(
red_sed
)
if
igm_module
:
igm_module
.
process
(
red_sed
)
else
:
red_sed
=
sed
cache
[
redshift
]
=
np
.
array
(
[
red_sed
.
compute_fnu
(
filt_
.
trans_table
,
filt_
.
effective_wavelength
)
for
filt_
in
filters
])
return
cache
[
redshift
]
return
gen_fluxes
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment