Commit 58dbf020 authored by Médéric Boquien's avatar Médéric Boquien

Fix merge conflict from the properties branch.

parents ad367c62 e9b4b8d3
......@@ -95,7 +95,6 @@ class PdfAnalysis(AnalysisModule):
))
])
def _compute_models(self, conf, obs, params, iblock):
models = ModelsManager(conf, obs, params, iblock)
......@@ -181,16 +180,16 @@ class PdfAnalysis(AnalysisModule):
# Rename the output directory if it exists
self.prepare_dirs()
# Store the grid of parameters in a manager to facilitate the
# computation of the models
params = ParametersManager(conf)
# Store the observations in a manager which sanitises the data, checks
# all the required fluxes are present, adding errors if needed,
# discarding invalid fluxes, etc.
obs = ObservationsManager(conf)
obs = ObservationsManager(conf, params)
obs.save('observations')
# Store the grid of parameters in a manager to facilitate the
# computation of the models
params = ParametersManager(conf)
results = self._compute(conf, obs, params)
results.best.analyse_chi2()
......
......@@ -15,11 +15,12 @@ from scipy.special import erf
log.setLevel('ERROR')
def save_chi2(obs, variable, models, chi2, values):
"""Save the chi² and the associated physocal properties
"""
fname = 'out/{}_{}_chi2-block-{}.npy'.format(obs['id'], variable,
fname = 'out/{}_{}_chi2-block-{}.npy'.format(obs.id, variable.replace('/', '\/'),
models.iblock)
data = np.memmap(fname, dtype=np.float64, mode='w+',
shape=(2, chi2.size))
......@@ -53,7 +54,7 @@ def compute_corr_dz(model_z, obs_z):
return (cosmo.luminosity_distance(obs_z).value * 1e5)**2.
def dchi2_over_ds2(s, obs_fluxes, obs_errors, mod_fluxes):
def dchi2_over_ds2(s, obs_values, obs_errors, mod_values):
"""Function used to estimate the normalization factor in the SED fitting
process when upper limits are included in the dataset to fit (from Eq. A11
in Sawicki M. 2012, PASA, 124, 1008).
......@@ -63,12 +64,13 @@ def dchi2_over_ds2(s, obs_fluxes, obs_errors, mod_fluxes):
s: Float
Contains value onto which we perform minimization = normalization
factor
obs_fluxes: RawArray
Contains observed fluxes for each filter.
obs_value: RawArray
Contains observed fluxes for each filter and obseved extensive
properties.
obs_errors: RawArray
Contains observed errors for each filter.
model_fluxes: RawArray
Contains modeled fluxes for each filter.
Contains observed errors for each filter and extensive properties.
model_values: RawArray
Contains modeled fluxes for each filter and extensive properties.
lim_flag: Boolean
Tell whether we use upper limits (True) or not (False).
......@@ -88,28 +90,28 @@ def dchi2_over_ds2(s, obs_fluxes, obs_errors, mod_fluxes):
wlim = np.where(np.isfinite(obs_errors) & (obs_errors < 0.))
wdata = np.where(obs_errors >= 0.)
mod_fluxes_data = mod_fluxes[wdata]
mod_fluxes_lim = mod_fluxes[wlim]
mod_value_data = mod_value[wdata]
mod_value_lim = mod_value[wlim]
obs_fluxes_data = obs_fluxes[wdata]
obs_fluxes_lim = obs_fluxes[wlim]
obs_value_data = obs_value[wdata]
obs_value_lim = obs_value[wlim]
obs_errors_data = obs_errors[wdata]
obs_errors_lim = -obs_errors[wlim]
dchi2_over_ds_data = np.sum(
(obs_fluxes_data-s*mod_fluxes_data) *
mod_fluxes_data/(obs_errors_data*obs_errors_data))
(obs_value_data-s*mod_value_data) *
mod_value_data/(obs_errors_data*obs_errors_data))
dchi2_over_ds_lim = np.sqrt(2./np.pi)*np.sum(
mod_fluxes_lim*np.exp(
mod_value_lim*np.exp(
-np.square(
(obs_fluxes_lim-s*mod_fluxes_lim)/(np.sqrt(2)*obs_errors_lim)
(obs_value_lim-s*mod_value_lim)/(np.sqrt(2)*obs_errors_lim)
)
)/(
obs_errors_lim*(
1.+erf(
(obs_fluxes_lim-s*mod_fluxes_lim)/(np.sqrt(2)*obs_errors_lim)
(obs_fluxes_lim-s*mod_value_lim)/(np.sqrt(2)*obs_errors_lim)
)
)
)
......@@ -120,7 +122,7 @@ def dchi2_over_ds2(s, obs_fluxes, obs_errors, mod_fluxes):
return func
def _compute_scaling(model_fluxes, obs_fluxes, obs_errors):
def _compute_scaling(model_fluxes, model_propsmass, observation):
"""Compute the scaling factor to be applied to the model fluxes to best fit
the observations. Note that we look over the bands to avoid the creation of
an array of the same size as the model_fluxes array. Because we loop on the
......@@ -130,28 +132,42 @@ def _compute_scaling(model_fluxes, obs_fluxes, obs_errors):
----------
model_fluxes: array
Fluxes of the models
obs_fluxes: array
Observed fluxes
obs_errors: array
Observed errors
model_propsmass: array
Extensive properties of the models to be fitted
observation: Class
Class instance containing the fluxes, intensive properties, extensive
properties and their errors, for a sigle observation.
Returns
-------
scaling: array
Scaling factors minimising the χ²
"""
obs_fluxes = observation.fluxes
obs_fluxes_err = observation.fluxes_err
obs_propsmass = observation.extprops
obs_propsmass_err = observation.extprops_err
num = np.zeros(model_fluxes.shape[1])
denom = np.zeros(model_fluxes.shape[1])
for i in range(obs_fluxes.size):
if np.isfinite(obs_fluxes[i]) and obs_errors[i] > 0.:
num += model_fluxes[i, :] * (obs_fluxes[i] / (obs_errors[i] *
obs_errors[i]))
denom += np.square(model_fluxes[i, :] * (1./obs_errors[i]))
if np.isfinite(obs_fluxes[i]) and obs_fluxes_err[i] > 0.:
num += model_fluxes[i, :] * (obs_fluxes[i] / (obs_fluxes_err[i] *
obs_fluxes_err[i]))
denom += np.square(model_fluxes[i, :] * (1./obs_fluxes_err[i]))
for i in range(obs_propsmass.size):
if np.isfinite(obs_propsmass[i]) and obs_propsmass_err[i] > 0.:
num += model_propsmass[i, :] * (obs_propsmass[i] /
(obs_propsmass_err[i] *
obs_propsmass_err[i]))
denom += np.square(model_propsmass[i, :] *
(1./obs_propsmass_err[i]))
return num/denom
def compute_chi2(model_fluxes, obs_fluxes, obs_errors, lim_flag):
def compute_chi2(model_fluxes, model_props, model_propsmass, observation,
lim_flag):
"""Compute the χ² of observed fluxes with respect to the grid of models. We
take into account upper limits if need be. Note that we look over the bands
to avoid the creation of an array of the same size as the model_fluxes
......@@ -162,10 +178,13 @@ def compute_chi2(model_fluxes, obs_fluxes, obs_errors, lim_flag):
----------
model_fluxes: array
2D grid containing the fluxes of the models
obs_fluxes: array
Fluxes of the observed object
obs_errors: array
Uncertainties on the fluxes of the observed object
model_props: array
2D grid containing the intensive properties of the models
model_propsmass: array
2D grid containing the extensive properties of the models
observation: Class
Class instance containing the fluxes, intensive properties, extensive
properties and their errors, for a sigle observation.
lim_flag: boolean
Boolean indicating whether upper limits should be treated (True) or
discarded (False)
......@@ -177,34 +196,51 @@ def compute_chi2(model_fluxes, obs_fluxes, obs_errors, lim_flag):
scaling: array
scaling of the models to obtain the minimum χ²
"""
limits = lim_flag and np.any(obs_errors <= 0.)
limits = lim_flag and np.any(observation.fluxes_err +
observation.extprops_err <= 0.)
scaling = _compute_scaling(model_fluxes, model_propsmass, observation)
scaling = _compute_scaling(model_fluxes, obs_fluxes, obs_errors)
obs_fluxes = observation.fluxes
obs_fluxes_err = observation.fluxes_err
obs_props = observation.intprops
obs_props_err = observation.intprops_err
obs_propsmass = observation.extprops
obs_propsmass_err = observation.extprops_err
# Some observations may not have flux values in some filter(s), but
# they can have upper limit(s).
if limits == True:
obs_values = np.concatenate((obs_fluxes, obs_propsmass))
obs_values_err = np.concatenate((obs_fluxes_err, obs_propsmass_err))
model_values = np.concatenate((model_fluxes, model_propsmass))
for imod in range(scaling.size):
scaling[imod] = optimize.root(dchi2_over_ds2, scaling[imod],
args=(obs_fluxes, obs_errors,
model_fluxes[:, imod])).x
args=(obs_values, obs_values_err,
model_values[:, imod])).x
# χ² of the comparison of each model to each observation.
chi2 = np.zeros(model_fluxes.shape[1])
for i in range(obs_fluxes.size):
if np.isfinite(obs_fluxes[i]) and obs_errors[i] > 0.:
chi2 += np.square(
(obs_fluxes[i] - model_fluxes[i, :] * scaling) * (1./obs_errors[i]))
# Some observations may not have flux values in some filter(s), but
if np.isfinite(obs_fluxes[i]) and obs_fluxes_err[i] > 0.:
chi2 += np.square((obs_fluxes[i] - model_fluxes[i, :] * scaling) *
(1./obs_fluxes_err[i]))
for i in range(obs_propsmass.size):
if np.isfinite(obs_propsmass[i]):
chi2 += np.square((obs_propsmass[i] - model_propsmass[i, :] *
scaling) * (1./obs_propsmass_err[i]))
for i in range(obs_props.size):
if np.isfinite(obs_props[i]):
chi2 += np.square((obs_props[i] - model_props[i, :]) *
(1./obs_props_err[i]))
# they can have upper limit(s).
if limits == True:
for i, obs_error in enumerate(obs_errors):
for i, obs_error in enumerate(obs_fluxes_err):
if obs_error < 0.:
chi2 -= 2. * np.log(.5 *
(1. + erf(((obs_fluxes[i] -
model_fluxes[i, :] * scaling) /
(-np.sqrt(2.)*obs_errors[i])))))
model_fluxes[i, :] * scaling) /
(-np.sqrt(2.)*obs_fluxes_err[i])))))
return chi2, scaling
......
......@@ -46,6 +46,7 @@ def init_sed(models, t0, ncomputed):
gbl_t0 = t0
gbl_ncomputed = ncomputed
def init_analysis(models, results, t0, ncomputed):
"""Initializer of the pool of processes to share variables between workers.
......@@ -61,9 +62,10 @@ def init_analysis(models, results, t0, ncomputed):
Number of computed models. Shared among workers.
"""
global gbl_models, gbl_results, gbl_t0, gbl_ncomputed
global gbl_models, gbl_obs, gbl_results, gbl_t0, gbl_ncomputed
gbl_models = models
gbl_obs = models.obs
gbl_results = results
gbl_t0 = t0
gbl_ncomputed = ncomputed
......@@ -103,6 +105,7 @@ def init_bestfit(conf, params, observations, results, t0, ncomputed):
gbl_t0 = t0
gbl_ncomputed = ncomputed
def sed(idx, midx):
"""Worker process to retrieve a SED and affect the relevant data to an
instance of ModelsManager.
......@@ -126,12 +129,18 @@ def sed(idx, midx):
if 'sfh.age' in sed.info and sed.info['sfh.age'] > sed.info['universe.age']:
gbl_models.fluxes[:, idx] = np.full(len(gbl_obs.bands), np.nan)
gbl_models.properties[:, idx] = np.full(len(gbl_properties), np.nan)
gbl_models.intprops[:, idx] = np.full(len(gbl_obs.intprops), np.nan)
gbl_models.extprops[:, idx] = np.full(len(gbl_obs.extprops), np.nan)
else:
gbl_models.fluxes[:, idx] = [sed.compute_fnu(filter_)
for filter_ in gbl_obs.bands]
gbl_models.properties[:, idx] = [sed.info[name]
for name in gbl_properties]
gbl_models.intprops[:, idx] = [sed.info[name]
for name in gbl_obs.intprops]
gbl_models.extprops[:, idx] = [sed.info[name]
for name in gbl_obs.extprops]
with gbl_ncomputed.get_lock():
gbl_ncomputed.value += 1
ncomputed = gbl_ncomputed.value
......@@ -142,6 +151,7 @@ def sed(idx, midx):
format(ncomputed, nmodels, dt, ncomputed/dt),
end="\n" if ncomputed == nmodels else "\r")
def analysis(idx, obs):
"""Worker process to analyse the PDF and estimate parameters values and
store them in an instance of ResultsManager.
......@@ -157,21 +167,22 @@ def analysis(idx, obs):
"""
np.seterr(invalid='ignore')
if obs['redshift'] >= 0.:
if obs.redshift >= 0.:
# We pick the the models with the closest redshift using a slice to
# work on views of the arrays and not on copies to save on RAM.
z = np.array(gbl_models.conf['sed_modules_params']['redshifting']['redshift'])
wz = slice(np.abs(obs['redshift']-z).argmin(), None, z.size)
corr_dz = compute_corr_dz(z[wz.start], obs['redshift'])
z = np.array(
gbl_models.conf['sed_modules_params']['redshifting']['redshift'])
wz = slice(np.abs(obs.redshift-z).argmin(), None, z.size)
corr_dz = compute_corr_dz(z[wz.start], obs.redshift)
else: # We do not know the redshift so we use the full grid
wz = slice(0, None, 1)
corr_dz = 1.
obs_fluxes = np.array([obs[name] for name in gbl_models.obs.bands])
obs_errors = np.array([obs[name + "_err"] for name in
gbl_models.obs.bands])
chi2, scaling = compute_chi2(gbl_models.fluxes[:, wz], obs_fluxes,
obs_errors, gbl_models.conf['analysis_params']['lim_flag'])
observation = gbl_obs.observations[idx]
chi2, scaling = compute_chi2(gbl_models.fluxes[:, wz],
gbl_models.intprops[:, wz],
gbl_models.extprops[:, wz], observation,
gbl_models.conf['analysis_params']['lim_flag'])
if np.any(np.isfinite(chi2)):
# We use the exponential probability associated with the χ² as
......@@ -206,7 +217,7 @@ def analysis(idx, obs):
else:
# It sometimes happens because models are older than the Universe's age
print("No suitable model found for the object {}. One possible origin "
"is that models are older than the Universe.".format(obs['id']))
"is that models are older than the Universe.".format(obs.id))
with gbl_ncomputed.get_lock():
gbl_ncomputed.value += 1
......@@ -216,6 +227,7 @@ def analysis(idx, obs):
format(ncomputed, len(gbl_models.obs), dt, ncomputed/dt),
end="\n" if ncomputed == len(gbl_models.obs) else "\r")
def bestfit(oidx, obs):
"""Worker process to compute and save the best fit.
......@@ -240,14 +252,16 @@ def bestfit(oidx, obs):
# We compute the model at the exact redshift not to have to correct for the
# difference between the object and the grid redshifts.
params = deepcopy(gbl_params.from_index(best_index))
if obs['redshift'] >= 0.:
params[-1]['redshift'] = obs['redshift']
if obs.redshift >= 0.:
params[-1]['redshift'] = obs.redshift
sed = gbl_warehouse.get_sed(gbl_params.modules, params)
fluxes = np.array([sed.compute_fnu(filt) for filt in gbl_obs.bands])
obs_fluxes = np.array([obs[name] for name in gbl_obs.bands])
obs_errors = np.array([obs[name + '_err'] for name in gbl_obs.bands])
_, scaling = compute_chi2(fluxes[:, None], obs_fluxes, obs_errors,
intprops = np.array([sed.info[prop] for prop in gbl_obs.intprops])
extprops = np.array([sed.info[prop] for prop in gbl_obs.extprops])
_, scaling = compute_chi2(fluxes[:, None], intprops[:, None],
extprops[:, None], obs,
gbl_conf['analysis_params']['lim_flag'])
gbl_results.best.properties[oidx, :] = [sed.info[k] for k in
......
......@@ -9,13 +9,10 @@ compute them, such as the configuration, the observations, and the parameters
of the models.
"""
import ctypes
from multiprocessing.sharedctypes import RawArray
from astropy.table import Table, Column
import numpy as np
from ..warehouse import SedWarehouse
from .utils import SharedArray, get_info
class ModelsManager(object):
......@@ -34,41 +31,43 @@ class ModelsManager(object):
self.block = params.blocks[iblock]
self.propertiesnames = conf['analysis_params']['variables']
self.allpropertiesnames, self.massproportional = self._get_info()
self.allpropertiesnames, self.massproportional = get_info(self)
self._fluxes_shape = (len(obs.bands), len(self.block))
self._props_shape = (len(self.propertiesnames), len(self.block))
self._fluxes = SharedArray((len(self.obs.bands), len(self.block)))
self._properties = SharedArray((len(self.propertiesnames),
len(self.block)))
# Arrays where we store the data related to the models. For memory
# efficiency reasons, we use RawArrays that will be passed in argument
# to the pool. Each worker will fill a part of the RawArrays. It is
# important that there is no conflict and that two different workers do
# not write on the same section.
self._fluxes = self._shared_array(self._fluxes_shape)
self._properties = self._shared_array(self._props_shape)
if conf['analysis_method'] == 'pdf_analysis':
self._intprops = SharedArray((len(self.obs.intprops),
len(self.block)))
self._extprops = SharedArray((len(self.obs.extprops),
len(self.block)))
@property
def fluxes(self):
"""Returns a shared array containing the fluxes of the models.
"""
return np.ctypeslib.as_array(self._fluxes).reshape(self._fluxes_shape)
return self._fluxes.data
@property
def properties(self):
"""Returns a shared array containing the properties of the models.
"""
return np.ctypeslib.as_array(self._properties).reshape(self._props_shape)
return self._properties.data
def _get_info(self):
warehouse = SedWarehouse()
sed = warehouse.get_sed(self.conf['sed_modules'],
self.params.from_index(0))
info = list(sed.info.keys())
info.sort()
@property
def intprops(self):
"""Returns a shared array containing the intensive properties to fit.
"""
return self._intprops.data
return (info, sed.mass_proportional_info)
@property
def extprops(self):
"""Returns a shared array containing the extensive properties to fit.
"""
return self._extprops.data
def save(self, filename):
"""Save the fluxes and properties of all the models into a table.
......@@ -87,7 +86,3 @@ class ModelsManager(object):
table.write("out/{}.fits".format(filename))
table.write("out/{}.txt".format(filename), format='ascii.fixed_width',
delimiter=None)
@staticmethod
def _shared_array(shape):
return RawArray(ctypes.c_double, int(np.product(shape)))
......@@ -7,6 +7,8 @@ from astropy.table import Column
import numpy as np
from ..utils import read_table
from .utils import get_info
class ObservationsManager(object):
"""Class to abstract the handling of the observations and provide a
......@@ -16,13 +18,31 @@ class ObservationsManager(object):
check the consistency of the data, replace invalid values with NaN, etc.
"""
def __new__(cls, config, **kwargs):
def __new__(cls, config, params=None, **kwargs):
if config['data_file']:
return ObservationsManagerPassbands(config, **kwargs)
return ObservationsManagerPassbands(config, params, **kwargs)
else:
return ObservationsManagerVirtual(config, **kwargs)
class Observation(object):
"""Class to take one row of the observations table and extract the list of
fluxes, intensive properties, extensive properties and their errors, that
are going to be considered in the fit.
"""
def __init__(self, row, cls):
self.redshift = row['redshift']
self.id = row['id']
self.fluxes = np.array([row[band] for band in cls.bands])
self.fluxes_err = np.array([row[band + '_err'] for band in cls.bands])
self.intprops = np.array([row[prop] for prop in cls.intprops])
self.intprops_err = np.array([row[prop + '_err'] for prop in
cls.intprops])
self.extprops = np.array([row[prop] for prop in cls.extprops])
self.extprops_err = np.array([row[prop + '_err'] for prop in
cls.extprops])
class ObservationsManagerPassbands(object):
"""Class to generate a manager for data files providing fluxes in
passbands.
......@@ -31,14 +51,29 @@ class ObservationsManagerPassbands(object):
at each iteration.
"""
def __init__(self, config, defaulterror=0.1, modelerror=0.1,
def __init__(self, config, params, defaulterror=0.1, modelerror=0.1,
threshold=-9990.):
self.conf = config
self.params = params
self.allpropertiesnames, self.massproportional = get_info(self)
self.table = read_table(config['data_file'])
self.bands = [band for band in config['bands'] if not
band.endswith('_err')]
self.errors = [band for band in config['bands'] if
band.endswith('_err')]
self.bands_err = [band for band in config['bands'] if
band.endswith('_err')]
self.intprops = [prop for prop in config['properties'] if (prop not in
self.massproportional and not prop.endswith('_err'))]
self.intprops_err = [prop for prop in config['properties'] if
(prop.endswith('_err') and prop[:-4] not in
self.massproportional)]
self.extprops = [prop for prop in config['properties'] if (prop in
self.massproportional and not prop.endswith('_err'))]
self.extprops_err = [prop for prop in config['properties'] if
(prop.endswith('_err') and prop[:-4] in
self.massproportional)]
self.tofit = self.bands + self.intprops + self.extprops
self.tofit_err = self.bands_err + self.intprops_err + self.extprops_err
# Sanitise the input
self._check_filters()
......@@ -47,43 +82,45 @@ class ObservationsManagerPassbands(object):
threshold)
self._add_model_error(modelerror)
self.observations = list([Observation(row, self) for row in self.table])
def __len__(self):
return len(self.table)
return len(self.observations)
def __iter__(self):
self.idx = 0
self.max = len(self.table)
self.max = len(self.observations)
return self
def __next__(self):
if self.idx < self.max:
obs = self.table[self.idx]
obs = self.observations[self.idx]
self.idx += 1
return obs
raise StopIteration
def _check_filters(self):
"""Check whether the list of filters makes sense.
"""Check whether the list of filters and poperties makes sense.
Two situations are checked:
* If a filter to be included in the fit is missing from the data file,
an exception is raised.
* If a filter is given in the input file but is not to be included in
the fit, a warning is displayed
* If a filter or property to be included in the fit is missing from
the data file, an exception is raised.
* If a filter or property is given in the input file but is not to be
included in the fit, a warning is displayed
"""
for band in self.bands + self.errors:
if band not in self.table.colnames:
for item in self.tofit + self.tofit_err:
if item not in self.table.colnames:
raise Exception("{} to be taken in the fit but not present "
"in the observation table.".format(band))
"in the observation table.".format(item))
for band in self.table.colnames:
if (band != 'id' and band != 'redshift' and
band not in self.bands + self.errors):
self.table.remove_column(band)
print("Warning: {} in the input file but not to be taken into "
"account in the fit.".format(band))
for item in self.table.colnames:
if (item != 'id' and item != 'redshift' and item not in self.tofit +
self.tofit_err):
self.table.remove_column(item)
print("Warning: {} in the input file but not to be taken into"
" account in the fit.".format(item))
def _check_errors(self, defaulterror=0.1):
"""Check whether the error columns are present. If not, add them.
......@@ -107,16 +144,19 @@ class ObservationsManagerPassbands(object):
if defaulterror < 0.:
raise ValueError("The relative default error must be positive.")
for band in self.bands:
banderr = band + '_err'
if banderr not in self.errors or banderr not in self.table.colnames:
colerr = Column(data=np.fabs(self.table[band] * defaulterror),
name=banderr)
for item in self.tofit:
error = item + '_err'
if item in self.intprops:
if error not in self.intprops_err or error not in self.table.colnames:
raise ValueError("Intensive properties errors must be in input file.")