Commit b646d54f authored by Médéric Boquien's avatar Médéric Boquien

Introduce the possibility of saving only specific output parameters with...

Introduce the possibility of saving only specific output parameters with savefluxes. This should accelerate the computation while reducing memory usage.
parent 92b9d707
......@@ -2,6 +2,8 @@
## Unreleased
### Added
- When using the savefluxes module, all the output parameters were saved. This is not efficient when the user is only interested in some of the output parameters but not all. We introduce the "variables" configuration parameter for savefluxes to list the output parameters the user wants to save. If the list is left empty, all parameters are saved, preserving the current behaviour. This should increase the speed substantially when saving memory.
### Changed
### Fixed
### Optimised
......
......@@ -42,6 +42,12 @@ class SaveFluxes(AnalysisModule):
"""
parameter_list = dict([
("variables", (
"array of strings",
"List of variables to be saved. If the list is left empty, all"
"variables will be saved.",
['']
)),
("output_file", (
"string",
"Name of the output file that contains the parameters of the "
......@@ -112,13 +118,17 @@ class SaveFluxes(AnalysisModule):
params = ParametersHandler(creation_modules, creation_modules_params)
n_params = params.size
# Retrieve an arbitrary SED to obtain the list of output parameters
warehouse = SedWarehouse()
sed = warehouse.get_sed(creation_modules, params.from_index(0))
info = list(sed.info.keys())
if parameters["variables"] == '':
# Retrieve an arbitrary SED to obtain the list of output parameters
warehouse = SedWarehouse()
sed = warehouse.get_sed(creation_modules, params.from_index(0))
info = list(sed.info.keys())
del warehouse, sed
else:
info = parameters["variables"]
n_info = len(info)
info.sort()
n_info = len(sed.info)
del warehouse, sed
n_info = len(info)
model_fluxes = (RawArray(ctypes.c_double,
n_params * n_filters),
......@@ -127,7 +137,7 @@ class SaveFluxes(AnalysisModule):
n_params * n_info),
(n_params, n_info))
initargs = (params, filters, save_sed, model_fluxes,
initargs = (params, filters, save_sed, info, model_fluxes,
model_parameters, time.time(), mp.Value('i', 0))
if cores == 1: # Do not create a new process
init_worker_fluxes(*initargs)
......
......@@ -13,7 +13,8 @@ from ...warehouse import SedWarehouse
from ..utils import OUT_DIR
def init_fluxes(params, filters, save_sed, fluxes, info, t_begin, n_computed):
def init_fluxes(params, filters, save_sed, variables, fluxes, info, t_begin,
n_computed):
"""Initializer of the pool of processes. It is mostly used to convert
RawArrays into numpy arrays. The latter are defined as global variables to
be accessible from the workers.
......@@ -26,6 +27,8 @@ def init_fluxes(params, filters, save_sed, fluxes, info, t_begin, n_computed):
Contains the names of the filters to compute the fluxes.
save_sed: boolean
Indicates whether the SED should be saved.
variables: list
List of variables to be computed
fluxes: RawArray and tuple containing the shape
Fluxes of individual models. Shared among workers.
n_computed: Value
......@@ -36,7 +39,7 @@ def init_fluxes(params, filters, save_sed, fluxes, info, t_begin, n_computed):
"""
global gbl_model_fluxes, gbl_model_info, gbl_n_computed, gbl_t_begin
global gbl_params, gbl_previous_idx, gbl_filters, gbl_save_sed
global gbl_warehouse, gbl_keys
global gbl_warehouse, gbl_variables
gbl_model_fluxes = np.ctypeslib.as_array(fluxes[0])
gbl_model_fluxes = gbl_model_fluxes.reshape(fluxes[1])
......@@ -55,9 +58,10 @@ def init_fluxes(params, filters, save_sed, fluxes, info, t_begin, n_computed):
gbl_save_sed = save_sed
gbl_variables = variables
gbl_warehouse = SedWarehouse()
gbl_keys = None
def fluxes(idx):
"""Worker process to retrieve a SED and affect the relevant data to shared
......@@ -79,20 +83,17 @@ def fluxes(idx):
sed = gbl_warehouse.get_sed(gbl_params.modules,
gbl_params.from_index(idx))
if gbl_save_sed is True:
sed.to_votable(OUT_DIR + "{}_best_model.xml".format(idx))
if 'sfh.age' in sed.info and sed.info['sfh.age'] > sed.info['universe.age']:
gbl_model_fluxes[idx, :] = np.full(len(gbl_filters), np.nan)
gbl_model_info[idx, :] = np.full(len(gbl_variables), np.nan)
else:
gbl_model_fluxes[idx, :] = np.array([sed.compute_fnu(filter_) for
filter_ in gbl_filters])
if gbl_keys is None:
gbl_keys = list(sed.info.keys())
gbl_keys.sort()
gbl_model_info[idx, :] = np.array([sed.info[k] for k in gbl_keys])
gbl_model_info[idx, :] = np.array([sed.compute_fnu(filter_) for
filter_ in gbl_filters])
gbl_model_info[idx, :] = np.array([sed.info[name] for name in
gbl_variables])
if gbl_save_sed is True:
sed.to_votable(OUT_DIR + "{}_best_model.xml".format(idx))
with gbl_n_computed.get_lock():
gbl_n_computed.value += 1
n_computed = gbl_n_computed.value
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment