From 0d88332091635e23ca7957ae4f6a044679b2173d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A9d=C3=A9ric=20Boquien?= Date: Sat, 30 Jan 2016 15:16:43 -0300 Subject: [PATCH] Pratical implementation of the validation of the parameters. The patch is quite long as it has a direct effect on the structure of the configuration dictionary. The validation has the advantage of automatically convert the parameters to the right type. Therefore rather than building a dictionary ourselves, we rather use the ready-made dictionary from ConfigObj. Because the names of the sections are not the same, quite a bit of code had to be adapted. Finally, note that the validation file containing the specification of each variable, pcigale.ini.spec, is created while building the pcigale.ini file. Also because it is needed to convert the data to the right type, one cannot run cigale without a correct validation file. --- CHANGELOG.md | 1 + pcigale/__init__.py | 12 +- pcigale/analysis_modules/__init__.py | 2 +- .../analysis_modules/pdf_analysis/__init__.py | 19 +- .../analysis_modules/savefluxes/__init__.py | 8 +- pcigale/analysis_modules/utils.py | 1 + pcigale/handlers/parameters_handler.py | 4 +- pcigale/session/configuration.py | 211 +++++++----------- 8 files changed, 111 insertions(+), 147 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d7ad1b90..e8c1c299 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ - Similarly to the savefluxes module, in the pdf_analysis module if the list of physical properties is left empty, all physical parameters are now analysed. (Médéric Boquien) - It is now possible to pass the parameters of the models to be computed from a file rather than having to indicate them in pcigale.ini. This means that the models do not necessarily need to be computed on a systematic grid of parameters. The name of the file is passed as an argument to the parameters\_file keyword in pcigale.ini. If this is done, the creation\_modules argument is ignored. Finally, the file must be formatted as following: each row is a different model and each column a different parameter. They must follow the naming scheme: module\_name.parameter\_name, that is "bc03.imf" for instance. (Médéric Boquien) - Addition of the schreiber2016 SED creation module implementing the Schreiber et al. (2016) dust models. (Laure Ciesla) +- The physical parameters provided in pcigale.ini were not checked at startup against what the modules could accept. This could lead to a runtime crash if an unexpected value was passed to the module. Now the parameters are checked at startup. If an issue is found, it is indicated and the user is asked to fix it before launching cigale again. The validation file is build at the same time as pcigale.ini. (Médéric Boquien) ### Changed - The estimates of the physical parameters from the analysis of the PDF and from the best fit were recorded in separate files. This can be bothersome when trying to compare quantities from different files. Rather, we generate a single file containing all quantities. The ones estimated from the analysis of the PDF are prefixed with "bayes" and the ones from the best fit with "best". (Médéric Boquien) diff --git a/pcigale/__init__.py b/pcigale/__init__.py index a669ea80..0adf180d 100644 --- a/pcigale/__init__.py +++ b/pcigale/__init__.py @@ -37,16 +37,20 @@ def check(config): # TODO: Check if all the parameters that don't have default values are # given for each module. configuration = config.configuration - print("With this configuration cigale will compute {} " - "models.".format(ParametersHandler(configuration).size)) + + if configuration: + print("With this configuration cigale will compute {} " + "models.".format(ParametersHandler(configuration).size)) def run(config): """Run the analysis. """ configuration = config.configuration - analysis_module = get_module(configuration['analysis_method']) - analysis_module.process(configuration) + + if configuration: + analysis_module = get_module(configuration['analysis_method']) + analysis_module.process(configuration) def main(): diff --git a/pcigale/analysis_modules/__init__.py b/pcigale/analysis_modules/__init__.py index 0d64700c..0796cd5b 100644 --- a/pcigale/analysis_modules/__init__.py +++ b/pcigale/analysis_modules/__init__.py @@ -71,7 +71,7 @@ class AnalysisModule(object): KeyError: when not all the needed parameters are given. """ - parameters = configuration['analysis_method_params'] + parameters = configuration['analysis_params'] # For parameters that are present on the parameter_list with a default # value and that are not in the parameters dictionary, we add them # with their default value. diff --git a/pcigale/analysis_modules/pdf_analysis/__init__.py b/pcigale/analysis_modules/pdf_analysis/__init__.py index 831b6832..7daed44a 100644 --- a/pcigale/analysis_modules/pdf_analysis/__init__.py +++ b/pcigale/analysis_modules/pdf_analysis/__init__.py @@ -66,13 +66,13 @@ class PdfAnalysis(AnalysisModule): False )), ("save_chi2", ( - "boolean{}", + "boolean()", "If true, for each observation and each analysed variable save " "the reduced chi2.", False )), ("save_pdf", ( - "boolean{}", + "boolean()", "If true, for each observation and each analysed variable save " "the probability density function.", False @@ -115,16 +115,16 @@ class PdfAnalysis(AnalysisModule): # Initalise variables from input arguments. creation_modules = conf['creation_modules'] - creation_modules_params = conf['creation_modules_params'] - analysed_variables = conf['analysis_method_params']["analysed_variables"] + creation_modules_params = conf['sed_modules_params'] + analysed_variables = conf['analysis_params']["analysed_variables"] analysed_variables_nolog = [variable[:-4] if variable.endswith('_log') else variable for variable in analysed_variables] n_variables = len(analysed_variables) - save = {key: conf['analysis_method_params']["save_{}".format(key)].lower() == "true" - for key in ["best_sed", "chi2", "pdf"]} - lim_flag = conf['analysis_method_params']["lim_flag"].lower() == "true" - mock_flag = conf['analysis_method_params']["mock_flag"].lower() == "true" + save = {key: conf['analysis_params']["save_{}".format(key)] for key in + ["best_sed", "chi2", "pdf"]} + lim_flag = conf['analysis_params']["lim_flag"] + mock_flag = conf['analysis_params']["mock_flag"] filters = [name for name in conf['column_list'] if not name.endswith('_err')] @@ -137,8 +137,7 @@ class PdfAnalysis(AnalysisModule): lim_flag) n_obs = len(obs_table) - w_redshifting = creation_modules.index('redshifting') - z = np.array(creation_modules_params[w_redshifting]['redshift']) + z = np.array(creation_modules_params['redshifting']['redshift']) # The parameters handler allows us to retrieve the models parameters # from a 1D index. This is useful in that we do not have to create diff --git a/pcigale/analysis_modules/savefluxes/__init__.py b/pcigale/analysis_modules/savefluxes/__init__.py index 1582a91e..a1c98556 100644 --- a/pcigale/analysis_modules/savefluxes/__init__.py +++ b/pcigale/analysis_modules/savefluxes/__init__.py @@ -81,9 +81,9 @@ class SaveFluxes(AnalysisModule): # Rename the output directory if it exists backup_dir() - out_file = conf['analysis_method_params']['output_file'] - out_format = conf['analysis_method_params']['output_format'] - save_sed = conf['analysis_method_params']['save_sed'].lower() == "true" + out_file = conf['analysis_params']['output_file'] + out_format = conf['analysis_params']['output_format'] + save_sed = conf['analysis_params']['save_sed'] filters = [name for name in conf['column_list'] if not name.endswith('_err')] @@ -97,7 +97,7 @@ class SaveFluxes(AnalysisModule): params = ParametersHandler(conf) n_params = params.size - info = conf['analysis_method_params']['variables'] + info = conf['analysis_params']['variables'] n_info = len(info) model_fluxes = (RawArray(ctypes.c_double, n_params * n_filters), diff --git a/pcigale/analysis_modules/utils.py b/pcigale/analysis_modules/utils.py index f6604444..26179e8e 100644 --- a/pcigale/analysis_modules/utils.py +++ b/pcigale/analysis_modules/utils.py @@ -33,6 +33,7 @@ def backup_dir(directory=OUT_DIR): )) os.mkdir(directory) shutil.copyfile('pcigale.ini', directory + 'pcigale.ini') + shutil.copyfile('pcigale.ini.spec', directory + 'pcigale.ini.spec') def save_fluxes(model_fluxes, model_parameters, filters, names, filename, diff --git a/pcigale/handlers/parameters_handler.py b/pcigale/handlers/parameters_handler.py index 5f6ac72d..3540c174 100644 --- a/pcigale/handlers/parameters_handler.py +++ b/pcigale/handlers/parameters_handler.py @@ -46,8 +46,8 @@ class ParametersHandlerGrid(object): """ self.modules = configuration['creation_modules'] - self.parameters = [self._param_dict_combine(dictionary) for dictionary - in configuration['creation_modules_params']] + self.parameters = [self._param_dict_combine(configuration['sed_modules_params'][module]) + for module in self.modules] self.shape = tuple(len(parameter) for parameter in self.parameters) self.size = int(np.product(self.shape)) diff --git a/pcigale/session/configuration.py b/pcigale/session/configuration.py index cd844776..b15e0ba0 100644 --- a/pcigale/session/configuration.py +++ b/pcigale/session/configuration.py @@ -12,6 +12,7 @@ import configobj from glob import glob # To allow the use of glob() in "eval..." import pkg_resources import numpy as np +import validate from ..handlers.parameters_handler import ParametersHandler from ..data import Database @@ -19,64 +20,13 @@ from ..utils import read_table from .. import creation_modules from .. import analysis_modules from ..warehouse import SedWarehouse +from . import validation # Limit the redshift to this number of decimals REDSHIFT_DECIMALS = 2 -def evaluate_description(description): - """Evaluate a description from the config file as a list. - - The description is read from the config file by configobj that transforms - coma separated value in a list. From this description, this function try - to evaluate the desired list of values: - - If the description is a string beginning with 'eval ', then its content - (without 'eval ') is evaluated as Python code and its result returned. - An array is expected. - - If the description is a string beginning by 'range', the start, step and - stop values are then expected and the range is evaluated (stop included - if reached. - - Then the function tries to evaluate the description as a Numpy array of - float and returns the mere list if this fails. - - Parameters - ---------- - description: string or list - The description to be evaluated. - - Returns - ------- - results: list - The evaluated list of values. - - """ - results = description - if type(description) == str: - if description.startswith('eval '): - results = eval(description[4:]) - # If the evaluation lead to a single value, we put it in a list. - if not isinstance(results, Iterable): - results = [results] - elif description.startswith('range '): - start, stop, step = [float(item) for item - in description[5:].split()] - results = np.arange(start, stop+step, step) - else: - # We need to return a list to combine the list of possible values - # for each parameter. - results = [results] - - # We prefer to evaluate the parameter as a numpy array of floats if - # possible. - try: - results = np.array(results, float) - except ValueError: - pass - - return results - - class Configuration(object): """This class manages the configuration of pcigale. """ @@ -90,10 +40,22 @@ class Configuration(object): Name of the configuration file (pcigale.conf by default). """ + self.spec = configobj.ConfigObj(filename+'.spec', + write_empty_values=True, + indent_type=' ', + encoding='UTF8', + list_values=False, + _inspec=True) self.config = configobj.ConfigObj(filename, write_empty_values=True, indent_type=' ', - encoding='UTF8') + encoding='UTF8', + configspec=self.spec) + + # We validate the configuration so that the variables are converted to + # the expected that. We do not handle errors at the point but only when + # we actually return the configuration file from the property() method. + self.config.validate(validate.Validator(validation.functions)) def create_blank_conf(self): """Create the initial configuration file @@ -112,6 +74,7 @@ class Configuration(object): "'_err' suffix for the uncertainties. The fluxes and the " "uncertainties must be in mJy. This file is optional to generate " "the configuration file, in particular for the savefluxes module.") + self.spec['data_file'] = "string" self.config['parameters_file'] = "" self.config.comments['parameters_file'] = [""] + wrap( @@ -122,6 +85,7 @@ class Configuration(object): "one. Finally, if this parameters is not left empty, cigale will " "not interpret the configuration parameters given in pcigale.ini. " "They will be given only for information.") + self.spec['parameters_file'] = "string()" self.config['creation_modules'] = [] self.config.comments['creation_modules'] = ([""] + @@ -134,18 +98,22 @@ class Configuration(object): ["AGN: dale2014, fritz2006"] + ["Radio: radio"] + ["Redshift: redshifting (mandatory!)"]) + self.spec['creation_modules'] = "cigale_string_list()" self.config['analysis_method'] = "" self.config.comments['analysis_method'] = [""] + wrap( "Method used for statistical analysis. Available methods: " "pdf_analysis, savefluxes.") + self.spec['analysis_method'] = "string()" self.config['cores'] = "" self.config.comments['cores'] = [""] + wrap( "Number of CPU cores available. This computer has {} cores." .format(mp.cpu_count())) + self.spec['cores'] = "integer(min=1)" self.config.write() + self.spec.write() def generate_conf(self): """Generate the full configuration file @@ -192,16 +160,20 @@ class Configuration(object): self.config.comments['column_list'] = [""] + wrap( "List of the columns in the observation data file to use for " "the fitting.") + self.spec['column_list'] = "cigale_string_list()" # SED creation modules configurations. For each module, we generate # the configuration section from its parameter list. - self.config['sed_creation_modules'] = {} - self.config.comments['sed_creation_modules'] = ["", ""] + wrap( + self.config['sed_modules_params'] = {} + self.config.comments['sed_modules_params'] = ["", ""] + wrap( "Configuration of the SED creation modules.") + self.spec['sed_modules_params'] = {} for module_name in self.config['creation_modules']: - self.config["sed_creation_modules"][module_name] = {} - sub_config = self.config["sed_creation_modules"][module_name] + self.config['sed_modules_params'][module_name] = {} + self.spec['sed_modules_params'][module_name] = {} + sub_config = self.config['sed_modules_params'][module_name] + sub_spec = self.spec['sed_modules_params'][module_name] for name, (typ, description, default) in \ creation_modules.get_module( @@ -211,93 +183,62 @@ class Configuration(object): default = '' sub_config[name] = default sub_config.comments[name] = wrap(description) - - self.config['sed_creation_modules'].comments[module_name] = [ + sub_spec[name] = typ + self.config['sed_modules_params'].comments[module_name] = [ creation_modules.get_module(module_name, blank=True).comments] self.check_modules() # Configuration for the analysis method - self.config['analysis_configuration'] = {} - self.config.comments['analysis_configuration'] = ["", ""] + wrap( + self.config['analysis_params'] = {} + self.config.comments['analysis_params'] = ["", ""] + wrap( "Configuration of the statistical analysis method.") + self.spec['analysis_params'] = {} + module_name = self.config['analysis_method'] for name, (typ, desc, default) in \ analysis_modules.get_module(module_name).parameter_list.items(): if default is None: default = '' - self.config['analysis_configuration'][name] = default - self.config['analysis_configuration'].comments[name] = wrap(desc) + self.config['analysis_params'][name] = default + self.config['analysis_params'].comments[name] = wrap(desc) + self.spec['analysis_params'][name] = typ self.config.write() + self.spec.write() @property def configuration(self): - """Returns a dictionary for the session configuration. + """Returns a dictionary for the session configuration if it is valid. + Otherwise, print the erroneous keys. Returns ------- - configuration['data_file']: string - File containing the observations to fit. - configuration['column_list']: list of strings - List of the columns of data_file to use in the fitting. - configuration['creation_modules']: list of strings - List of the modules (in the right order) used to create the SEDs. - configuration['creation_modules_params']: list of dictionaries - Configuration parameters for each module. To each parameter, the - dictionary associates a list of possible values (possibly only - one). - configuration['analysis_method']: string - Statistical analysis module used to fit the data. - configuration['analysis_method_params']: dictionary - Parameters for the statistical analysis module. To each parameter - is associated a list of possible values. + configuration: dictionary + Dictionary containing the information provided in pcigale.ini. """ - configuration = {} - - # Before building the configuration dictionary, we ensure that all the - # fields are filled - if not self.config['parameters_file']: - self.complete_redshifts() - - for section in ['data_file', 'parameters_file', 'column_list', - 'creation_modules', 'analysis_method']: - configuration[section] = self.config[section] - configuration['cores'] = int(self.config['cores']) - - # Parsing the SED modules parameters - configuration['creation_modules_params'] = [] - for module in self.config['creation_modules']: - module_params = {} - for key, value in \ - self.config['sed_creation_modules'][module].items(): - module_params[key] = evaluate_description(value) - configuration['creation_modules_params'].append(module_params) - - if (self.config['analysis_method'] == 'savefluxes' and - not self.config['analysis_configuration']['variables']): - warehouse = SedWarehouse() - params = ParametersHandler(configuration) - sed = warehouse.get_sed(params.modules, - params.from_index(0)) - info = list(sed.info.keys()) - info.sort() - self.config['analysis_configuration']['variables'] = info - elif (self.config['analysis_method'] == 'pdf_analysis' and - not self.config['analysis_configuration']['analysed_variables']): - warehouse = SedWarehouse() - params = ParametersHandler(configuration) - sed = warehouse.get_sed(params.modules, - params.from_index(0)) - info = list(sed.info.keys()) - info.sort() - self.config['analysis_configuration']['analysed_variables'] = info + self.complete_redshifts() + self.complete_analysed_parameters() + + vdt = validate.Validator(validation.functions) + validity = self.config.validate(vdt, preserve_errors=True) + + if validity is not True: + print("The following issues have been found in pcigale.ini:") + for module, param, message in configobj.flatten_errors(self.config, + validity): + if len(module) > 0: + print("Module {}, parameter {}: {}".format('/'.join(module), + param, message)) + else: + print("Parameter {}: {}".format(param, message)) + print("Run the same command after having fixed pcigale.ini. If you" + " want to disable error checking, simply remove the " + "pcigale.ini.spec file.") - # Analysis method parameters - configuration['analysis_method_params'] = \ - self.config['analysis_configuration'] + return None - return configuration + return self.config.dict() def check_modules(self): """Make a basic check to ensure that some required modules are present. @@ -340,13 +281,31 @@ class Configuration(object): configuration file and must be extracted from the input flux file. """ - z_mod = self.config['sed_creation_modules']['redshifting']['redshift'] + z_mod = self.config['sed_modules_params']['redshifting']['redshift'] if type(z_mod) is str and not z_mod: if self.config['data_file']: obs_table = read_table(self.config['data_file']) - z = np.unique(np.around(obs_table['redshift'], - decimals=REDSHIFT_DECIMALS)) - self.config['sed_creation_modules']['redshifting']['redshift'] = z + z = list(np.unique(np.around(obs_table['redshift'], + decimals=REDSHIFT_DECIMALS))) + self.config['sed_modules_params']['redshifting']['redshift'] = z else: raise Exception("No flux file and no redshift indicated. " "The spectra cannot be computed. Aborting.") + + def complete_analysed_parameters(self): + """Complete the configuration when the variables are missing from the + configuration file and must be extract from a dummy run.""" + if self.config['analysis_method'] == 'savefluxes': + name = 'variables' + elif self.config['analysis_method'] == 'pdf_analysis': + name = 'analysed_variables' + else: + raise Exception("Unknown analysis method") + + if not self.config['analysis_params'][name]: + warehouse = SedWarehouse() + params = ParametersHandler(self.config.dict()) + sed = warehouse.get_sed(params.modules, params.from_index(0)) + info = list(sed.info.keys()) + info.sort() + self.config['analysis_params'][name] = info -- GitLab