Commit adccbbed authored by Médéric Boquien's avatar Médéric Boquien

Get rid of the array containing the redshifts of all the models. Now that we...

Get rid of the array containing the redshifts of all the models. Now that we infer the redshift of individual models from the list of unique redshifts it is not needed anymore. This change saves 8 bytes per model. The code should be marginally faster too.
parent 41cb2352
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
### Changed ### Changed
### Fixed ### Fixed
### Optimised ### Optimised
- Prior to version 0.7.0, we needed to maintain the list of redshifts for all the computed models. Past 0.7.0 we just infer the redshift from a list unique redshifts. This means that we can now discard the list of redshifts for all the models and only keep the list of unique redshifts. This saves ~8 MB of memory for every 10⁶ models. the models should be computed slightly faster but it is in the measurement noise.
## 0.8.0 (2015-12-01) ## 0.8.0 (2015-12-01)
### Added ### Added
......
...@@ -148,7 +148,8 @@ class PdfAnalysis(AnalysisModule): ...@@ -148,7 +148,8 @@ class PdfAnalysis(AnalysisModule):
z = np.unique(np.around(obs_table['redshift'], z = np.unique(np.around(obs_table['redshift'],
decimals=REDSHIFT_DECIMALS)) decimals=REDSHIFT_DECIMALS))
creation_modules_params[w_redshifting]['redshift'] = z creation_modules_params[w_redshifting]['redshift'] = z
del z else:
z = np.array(creation_modules_params[w_redshifting]['redshift'])
# The parameters handler allows us to retrieve the models parameters # The parameters handler allows us to retrieve the models parameters
# from a 1D index. This is useful in that we do not have to create # from a 1D index. This is useful in that we do not have to create
...@@ -175,8 +176,6 @@ class PdfAnalysis(AnalysisModule): ...@@ -175,8 +176,6 @@ class PdfAnalysis(AnalysisModule):
# not write on the same section. # not write on the same section.
# We put the shape in a tuple along with the RawArray because workers # We put the shape in a tuple along with the RawArray because workers
# need to know the shape to create the numpy array from the RawArray. # need to know the shape to create the numpy array from the RawArray.
model_redshifts = (RawArray(ctypes.c_double, n_params),
(n_params))
model_fluxes = (RawArray(ctypes.c_double, model_fluxes = (RawArray(ctypes.c_double,
n_params * n_filters), n_params * n_filters),
(n_params, n_filters)) (n_params, n_filters))
...@@ -184,9 +183,8 @@ class PdfAnalysis(AnalysisModule): ...@@ -184,9 +183,8 @@ class PdfAnalysis(AnalysisModule):
n_params * n_variables), n_params * n_variables),
(n_params, n_variables)) (n_params, n_variables))
initargs = (params, filters, analysed_variables_nolog, model_redshifts, initargs = (params, filters, analysed_variables_nolog, model_fluxes,
model_fluxes, model_variables, time.time(), model_variables, time.time(), mp.Value('i', 0))
mp.Value('i', 0))
if cores == 1: # Do not create a new process if cores == 1: # Do not create a new process
init_worker_sed(*initargs) init_worker_sed(*initargs)
for idx in range(n_params): for idx in range(n_params):
...@@ -210,11 +208,11 @@ class PdfAnalysis(AnalysisModule): ...@@ -210,11 +208,11 @@ class PdfAnalysis(AnalysisModule):
best_chi2 = (RawArray(ctypes.c_double, n_obs), (n_obs)) best_chi2 = (RawArray(ctypes.c_double, n_obs), (n_obs))
best_chi2_red = (RawArray(ctypes.c_double, n_obs), (n_obs)) best_chi2_red = (RawArray(ctypes.c_double, n_obs), (n_obs))
initargs = (params, filters, analysed_variables, model_redshifts, initargs = (params, filters, analysed_variables, z, model_fluxes,
model_fluxes, model_variables, time.time(), model_variables, time.time(), mp.Value('i', 0),
mp.Value('i', 0), analysed_averages, analysed_std, analysed_averages, analysed_std, best_fluxes,
best_fluxes, best_parameters, best_chi2, best_chi2_red, best_parameters, best_chi2, best_chi2_red, save, lim_flag,
save, lim_flag, n_obs) n_obs)
if cores == 1: # Do not create a new process if cores == 1: # Do not create a new process
init_worker_analysis(*initargs) init_worker_analysis(*initargs)
for idx, obs in enumerate(obs_table): for idx, obs in enumerate(obs_table):
...@@ -258,11 +256,11 @@ class PdfAnalysis(AnalysisModule): ...@@ -258,11 +256,11 @@ class PdfAnalysis(AnalysisModule):
for idx, name in enumerate(filters): for idx, name in enumerate(filters):
mock_table[name] = mock_fluxes[:, idx] mock_table[name] = mock_fluxes[:, idx]
initargs = (params, filters, analysed_variables, model_redshifts, initargs = (params, filters, analysed_variables, z, model_fluxes,
model_fluxes, model_variables, time.time(), model_variables, time.time(), mp.Value('i', 0),
mp.Value('i', 0), analysed_averages, analysed_std, analysed_averages, analysed_std, best_fluxes,
best_fluxes, best_parameters, best_chi2, best_parameters, best_chi2, best_chi2_red, save,
best_chi2_red, save, lim_flag, n_obs) lim_flag, n_obs)
if cores == 1: # Do not create a new process if cores == 1: # Do not create a new process
init_worker_analysis(*initargs) init_worker_analysis(*initargs)
for idx, mock in enumerate(mock_table): for idx, mock in enumerate(mock_table):
......
...@@ -16,8 +16,7 @@ from .utils import (save_best_sed, save_pdf, save_chi2, compute_chi2, ...@@ -16,8 +16,7 @@ from .utils import (save_best_sed, save_pdf, save_chi2, compute_chi2,
from ...warehouse import SedWarehouse from ...warehouse import SedWarehouse
def init_sed(params, filters, analysed, redshifts, fluxes, variables, def init_sed(params, filters, analysed, fluxes, variables, t_begin, n_computed):
t_begin, n_computed):
"""Initializer of the pool of processes. It is mostly used to convert """Initializer of the pool of processes. It is mostly used to convert
RawArrays into numpy arrays. The latter are defined as global variables to RawArrays into numpy arrays. The latter are defined as global variables to
be accessible from the workers. be accessible from the workers.
...@@ -30,8 +29,6 @@ def init_sed(params, filters, analysed, redshifts, fluxes, variables, ...@@ -30,8 +29,6 @@ def init_sed(params, filters, analysed, redshifts, fluxes, variables,
Contains the names of the filters to compute the fluxes. Contains the names of the filters to compute the fluxes.
analysed: list analysed: list
Variable names to be analysed. Variable names to be analysed.
redshifts: RawArray and tuple containing the shape
Redshifts of individual models. Shared among workers.
fluxes: RawArray and tuple containing the shape fluxes: RawArray and tuple containing the shape
Fluxes of individual models. Shared among workers. Fluxes of individual models. Shared among workers.
variables: RawArray and tuple containing the shape variables: RawArray and tuple containing the shape
...@@ -42,11 +39,9 @@ def init_sed(params, filters, analysed, redshifts, fluxes, variables, ...@@ -42,11 +39,9 @@ def init_sed(params, filters, analysed, redshifts, fluxes, variables,
Time of the beginning of the computation. Time of the beginning of the computation.
""" """
global gbl_model_redshifts, gbl_model_fluxes, gbl_model_variables global gbl_model_fluxes, gbl_model_variables, gbl_n_computed, gbl_t_begin
global gbl_n_computed, gbl_t_begin, gbl_params, gbl_previous_idx global gbl_params, gbl_previous_idx, gbl_filters, gbl_analysed_variables
global gbl_filters, gbl_analysed_variables, gbl_warehouse global gbl_warehouse
gbl_model_redshifts = np.ctypeslib.as_array(redshifts[0])
gbl_model_fluxes = np.ctypeslib.as_array(fluxes[0]) gbl_model_fluxes = np.ctypeslib.as_array(fluxes[0])
gbl_model_fluxes = gbl_model_fluxes.reshape(fluxes[1]) gbl_model_fluxes = gbl_model_fluxes.reshape(fluxes[1])
...@@ -67,7 +62,7 @@ def init_sed(params, filters, analysed, redshifts, fluxes, variables, ...@@ -67,7 +62,7 @@ def init_sed(params, filters, analysed, redshifts, fluxes, variables,
gbl_warehouse = SedWarehouse() gbl_warehouse = SedWarehouse()
def init_analysis(params, filters, analysed, redshifts, fluxes, variables, def init_analysis(params, filters, analysed, z, fluxes, variables,
t_begin, n_computed, analysed_averages, analysed_std, t_begin, n_computed, analysed_averages, analysed_std,
best_fluxes, best_parameters, best_chi2, best_chi2_red, save, best_fluxes, best_parameters, best_chi2, best_chi2_red, save,
lim_flag, n_obs): lim_flag, n_obs):
...@@ -83,7 +78,7 @@ def init_analysis(params, filters, analysed, redshifts, fluxes, variables, ...@@ -83,7 +78,7 @@ def init_analysis(params, filters, analysed, redshifts, fluxes, variables,
Contains filters to compute the fluxes. Contains filters to compute the fluxes.
analysed: list analysed: list
Variable names to be analysed Variable names to be analysed
redshifts: RawArray and tuple containing the shape. z: RawArray and tuple containing the shape.
Redshifts of individual models. Shared among workers. Redshifts of individual models. Shared among workers.
fluxes: RawArray fluxes: RawArray
Fluxes of individual models. Shared among workers. Fluxes of individual models. Shared among workers.
...@@ -112,9 +107,8 @@ def init_analysis(params, filters, analysed, redshifts, fluxes, variables, ...@@ -112,9 +107,8 @@ def init_analysis(params, filters, analysed, redshifts, fluxes, variables,
Number of observations. Number of observations.
""" """
init_sed(params, filters, analysed, redshifts, fluxes, variables, init_sed(params, filters, analysed, fluxes, variables, t_begin, n_computed)
t_begin, n_computed) global gbl_z, gbl_analysed_averages, gbl_analysed_std
global gbl_redshifts, gbl_analysed_averages, gbl_analysed_std
global gbl_best_fluxes, gbl_best_parameters, gbl_best_chi2 global gbl_best_fluxes, gbl_best_parameters, gbl_best_chi2
global gbl_best_chi2_red, gbl_save, gbl_n_obs, gbl_lim_flag, gbl_keys global gbl_best_chi2_red, gbl_save, gbl_n_obs, gbl_lim_flag, gbl_keys
...@@ -134,8 +128,7 @@ def init_analysis(params, filters, analysed, redshifts, fluxes, variables, ...@@ -134,8 +128,7 @@ def init_analysis(params, filters, analysed, redshifts, fluxes, variables,
gbl_best_chi2_red = np.ctypeslib.as_array(best_chi2_red[0]) gbl_best_chi2_red = np.ctypeslib.as_array(best_chi2_red[0])
gbl_redshifts = gbl_model_redshifts[np.unique(gbl_model_redshifts, gbl_z = z
return_index=True)[1]]
gbl_save = save gbl_save = save
gbl_lim_flag = lim_flag gbl_lim_flag = lim_flag
...@@ -174,8 +167,6 @@ def sed(idx): ...@@ -174,8 +167,6 @@ def sed(idx):
for name in for name in
gbl_analysed_variables]) gbl_analysed_variables])
gbl_model_redshifts[idx] = sed.info['universe.redshift']
with gbl_n_computed.get_lock(): with gbl_n_computed.get_lock():
gbl_n_computed.value += 1 gbl_n_computed.value += 1
n_computed = gbl_n_computed.value n_computed = gbl_n_computed.value
...@@ -209,8 +200,7 @@ def analysis(idx, obs): ...@@ -209,8 +200,7 @@ def analysis(idx, obs):
if obs['redshift'] >= 0.: if obs['redshift'] >= 0.:
# We pick the the models with the closest redshift using a slice to # We pick the the models with the closest redshift using a slice to
# work on views of the arrays and not on copies to save on RAM. # work on views of the arrays and not on copies to save on RAM.
wz = slice(np.abs(obs['redshift'] - gbl_redshifts).argmin(), None, wz = slice(np.abs(obs['redshift'] - gbl_z).argmin(), None, gbl_z.size)
gbl_redshifts.size)
else: # We do not know the redshift so we use the full grid else: # We do not know the redshift so we use the full grid
wz = slice(0, None, 1) wz = slice(0, None, 1)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment