From 08068e8cb1788036d22722e66416b0fd433219f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?M=C3=A9d=C3=A9ric=20Boquien?= Date: Sun, 11 Oct 2015 13:24:47 -0300 Subject: [PATCH] Magic values are a bit of a pain to handle and are not safe. This patch removes the use of magic values and rather replaces them with NaN whenever appropriate. The logic of the input file stays the same but the magic values are converted internally after reading it so that the rest of the code does not have to deal with that. --- pcigale/analysis_modules/__init__.py | 90 +++++++++---------- .../analysis_modules/pdf_analysis/__init__.py | 1 + .../analysis_modules/pdf_analysis/utils.py | 13 +-- .../analysis_modules/pdf_analysis/workers.py | 26 +++--- .../analysis_modules/savefluxes/workers.py | 2 +- pcigale/sed/__init__.py | 4 +- 6 files changed, 66 insertions(+), 70 deletions(-) diff --git a/pcigale/analysis_modules/__init__.py b/pcigale/analysis_modules/__init__.py index 436cf1c7..4fd5aab4 100644 --- a/pcigale/analysis_modules/__init__.py +++ b/pcigale/analysis_modules/__init__.py @@ -149,20 +149,18 @@ def get_module(module_name): raise -def adjust_errors(flux, error, tolerance, lim_flag, default_error=0.1, - systematic_deviation=0.1): - """Adjust the errors replacing the 0 values by the default error and - adding the systematic deviation. - - The systematic deviation change the error to: - sqrt( error² + (flux * deviation)² ) +def adjust_data(fluxes, errors, tolerance, lim_flag, default_error=0.1, + systematic_deviation=0.1): + """Adjust the fluxes and errors replacing the invalid values by NaN, and + adding the systematic deviation. The systematic deviation changes the + errors to: sqrt(errors² + (fluxes*deviation)²) Parameters ---------- - flux: array of floats - Fluxes. - error: array of floats - Observational error in the same unit as the fluxes. + fluxes: array of floats + Observed fluxes. + errors: array of floats + Observational errors in the same unit as the fluxes. tolerance: float Tolerance threshold under flux error is considered as 0. lim_flag: boolean @@ -180,38 +178,35 @@ def adjust_errors(flux, error, tolerance, lim_flag, default_error=0.1, """ # The arrays must have the same lengths. - if len(flux) != len(error): + if len(fluxes) != len(errors): raise ValueError("The flux and error arrays must have the same " "length.") - # We copy the error array not to modify the original one. - error = np.copy(error) + # We copy the arrays not to modify the original ones. + fluxes = fluxes.copy() + errors = errors.copy() - # We define: - # 1) upper limit == (lim_flag==True) and - # [(flux > tolerance) and (-9990. < error < tolerance)] - # 2) no data == (flux < -9990.) and (error < -9990.) - # Note that the upper-limit test must be performed before the no-data test - # because if lim_flag is False, we process upper limits as no-data. - # - # Replace errors below tolerance by the default one. - mask_noerror = np.logical_and(flux > tolerance, error < -9990.) - error[mask_noerror] = (default_error * flux[mask_noerror]) + # We set invalid data to NaN + mask_invalid = np.where((fluxes <= tolerance) | (errors < -9990.)) + fluxes[mask_invalid] = np.nan + errors[mask_invalid] = np.nan - mask_limflag = np.logical_and.reduce( - (flux > tolerance, error >= -9990., error < tolerance)) + # Replace missing errors by the default ones. + mask_noerror = np.where((fluxes > tolerance) & ~np.isfinite(errors)) + errors[mask_noerror] = (default_error * fluxes[mask_noerror]) # Replace upper limits by no data if lim_flag==False if not lim_flag: - flux[mask_limflag] = -9999. - error[mask_limflag] = -9999. - - mask_ok = np.logical_and(flux > tolerance, error > tolerance) + mask_limflag = np.where((fluxes > tolerance) & (errors < tolerance)) + fluxes[mask_limflag] = np.nan + errors[mask_limflag] = np.nan # Add the systematic error. - error[mask_ok] = np.sqrt(error[mask_ok]**2 + - (flux[mask_ok]*systematic_deviation)**2) - return error + mask_ok = np.where((fluxes > tolerance) & (errors > tolerance)) + errors[mask_ok] = np.sqrt(errors[mask_ok]**2 + + (fluxes[mask_ok]*systematic_deviation)**2) + + return fluxes, errors def complete_obs_table(obs_table, used_columns, filter_list, tolerance, @@ -260,20 +255,17 @@ def complete_obs_table(obs_table, used_columns, filter_list, tolerance, "the observation table.".format(name)) name_err = name + "_err" - if name_err not in used_columns: - if name_err not in obs_table.columns: - obs_table.add_column(Column( - name=name_err, - data=np.full(len(obs_table), -9999.)), - index=obs_table.colnames.index(name)+1 - ) - else: - obs_table[name_err] = np.full(len(obs_table), -9999.) - - obs_table[name_err] = adjust_errors(obs_table[name], - obs_table[name_err], - tolerance, - lim_flag, - default_error, - systematic_deviation) + if name_err not in obs_table.columns: + obs_table.add_column(Column(name=name_err, + data=np.full(len(obs_table), np.nan)), + index=obs_table.colnames.index(name)+1) + elif name_err not in used_columns: + obs_table[name_err] = np.full(len(obs_table), np.nan) + + obs_table[name], obs_table[name_err] = adjust_data(obs_table[name], + obs_table[name_err], + tolerance, + lim_flag, + default_error, + systematic_deviation) return obs_table diff --git a/pcigale/analysis_modules/pdf_analysis/__init__.py b/pcigale/analysis_modules/pdf_analysis/__init__.py index a4f0df5c..1c690b2a 100644 --- a/pcigale/analysis_modules/pdf_analysis/__init__.py +++ b/pcigale/analysis_modules/pdf_analysis/__init__.py @@ -117,6 +117,7 @@ class PdfAnalysis(AnalysisModule): Number of cores to run the analysis on """ + np.seterr(invalid='ignore') print("Initialising the analysis module... ") diff --git a/pcigale/analysis_modules/pdf_analysis/utils.py b/pcigale/analysis_modules/pdf_analysis/utils.py index 2908f1c6..d6d0ab02 100644 --- a/pcigale/analysis_modules/pdf_analysis/utils.py +++ b/pcigale/analysis_modules/pdf_analysis/utils.py @@ -232,9 +232,9 @@ def dchi2_over_ds2(s, obs_fluxes, obs_errors, mod_fluxes): # The mask "data" selects the filter(s) for which measured fluxes are given # i.e., when obs_fluxes is >=0. and obs_errors >=0. # The mask "lim" selects the filter(s) for which upper limits are given - # i.e., when obs_fluxes is >=0. and obs_errors = 9990 <= obs_errors < 0. + # i.e., when obs_errors < 0 - wlim = np.where((obs_errors >= -9990.) & (obs_errors < 0.)) + wlim = np.where(np.isfinite(obs_errors) & (obs_errors < 0.)) wdata = np.where(obs_errors >= 0.) mod_fluxes_data = mod_fluxes[wdata] @@ -314,10 +314,10 @@ def compute_chi2(model_fluxes, obs_fluxes, obs_errors, lim_flag): """ # Some observations may not have fluxes in some filters, but they can have - # upper limits, indicated with 0.>obs_errors≥-9990. To treat them as such, + # upper limits, indicated with obs_errors≤0. To treat them as such, # lim_flag has to be set to True. tolerance = 1e-12 - limits = lim_flag and np.any((obs_errors >= -9990.) & + limits = lim_flag and np.any(np.isfinite(obs_errors) & (obs_errors < tolerance)) # Scaling factor to be applied to a model fluxes to minimise the χ². @@ -342,8 +342,9 @@ def compute_chi2(model_fluxes, obs_fluxes, obs_errors, lim_flag): obs_errors[mask_data]), axis=1) if limits == True: # This mask selects the filter(s) for which upper limits are given - # i.e., when (obs_flux≥0. (and obs_errors≥-9990., obs_errors<0.)) - mask_lim = np.logical_and(obs_errors >= -9990., obs_errors < tolerance) + # i.e., when obs_errors<0. + mask_lim = np.logical_and(np.isfinite(obs_errors), + obs_errors < tolerance) chi2 += -2. * np.sum( np.log( np.sqrt(np.pi/2.)*(-obs_errors[mask_lim])*( diff --git a/pcigale/analysis_modules/pdf_analysis/workers.py b/pcigale/analysis_modules/pdf_analysis/workers.py index 719f384f..6fa95890 100644 --- a/pcigale/analysis_modules/pdf_analysis/workers.py +++ b/pcigale/analysis_modules/pdf_analysis/workers.py @@ -171,8 +171,8 @@ def sed(idx): gbl_params.from_index(idx)) if 'sfh.age' in sed.info and sed.info['sfh.age'] > sed.info['universe.age']: - model_fluxes = np.full(len(gbl_filters), -99.) - model_variables = np.full(len(gbl_analysed_variables), -99.) + model_fluxes = np.full(len(gbl_filters), np.nan) + model_variables = np.full(len(gbl_analysed_variables), np.nan) else: model_fluxes = np.array([sed.compute_fnu(filter_) for filter_ in gbl_filters]) @@ -207,6 +207,8 @@ def analysis(idx, obs): Input data for an individual object """ + np.seterr(invalid='ignore') + # Tolerance threshold under which any flux or error is considered as 0. global gbl_keys tolerance = 1e-12 @@ -223,17 +225,17 @@ def analysis(idx, obs): wz = np.where(gbl_w_redshifts[gbl_redshifts[np.abs(obs['redshift'] - gbl_redshifts).argmin()]]) - # We only keep model with fluxes >= -90. If not => no data - # Probably because age > age of the universe (see function sed(idx) above). + # We only keep models with finite fluxes. Otherwise it means the models are + # invalid. This is probably because age > age of the universe (see function + # sed() above). model_fluxes = gbl_model_fluxes[wz[0], :] model_fluxes = model_fluxes[:, wobs[0]] model_variables = gbl_model_variables[wz[0], :] - wvalid = np.where(model_variables[:, 0] >= -90.) - model_fluxes = model_fluxes[wvalid[0], :] - model_variables = model_variables[wvalid[0], :] - + wvalid = np.isfinite(model_variables[:, 0]) + model_fluxes = model_fluxes[wvalid, :] + model_variables = model_variables[wvalid, :] chi2, scaling = compute_chi2(model_fluxes, obs_fluxes, obs_errors, gbl_lim_flag) @@ -266,11 +268,11 @@ def analysis(idx, obs): if gbl_previous_idx > -1: gbl_warehouse.partial_clear_cache( gbl_params.index_module_changed(gbl_previous_idx, - wz[0][wvalid[0][best_index]])) - gbl_previous_idx = wz[0][wvalid[0][best_index]] + wz[0][wvalid][best_index])) + gbl_previous_idx = wz[0][wvalid][best_index] sed = gbl_warehouse.get_sed(gbl_params.modules, - gbl_params.from_index([wz[0][wvalid[0][best_index]]])) + gbl_params.from_index([wz[0][wvalid][best_index]])) # We correct the mass-dependent parameters for key in sed.mass_proportional_info: @@ -293,7 +295,7 @@ def analysis(idx, obs): gbl_analysed_averages[idx, :] = analysed_averages gbl_analysed_std[idx, :] = analysed_std - gbl_best_fluxes[idx, :] = gbl_model_fluxes[wz[0][wvalid[0][best_index]], :] \ + gbl_best_fluxes[idx, :] = gbl_model_fluxes[wz[0][wvalid][best_index], :] \ *scaling[best_index] if gbl_keys is None: diff --git a/pcigale/analysis_modules/savefluxes/workers.py b/pcigale/analysis_modules/savefluxes/workers.py index 37e62973..5cbe17c7 100644 --- a/pcigale/analysis_modules/savefluxes/workers.py +++ b/pcigale/analysis_modules/savefluxes/workers.py @@ -83,7 +83,7 @@ def fluxes(idx): sed.to_votable(OUT_DIR + "{}_best_model.xml".format(idx)) if 'sfh.age' in sed.info and sed.info['sfh.age'] > sed.info['universe.age']: - model_fluxes = np.full(len(gbl_filters), -99.) + model_fluxes = np.full(len(gbl_filters), np.nan) else: model_fluxes = np.array([sed.compute_fnu(filter_) for filter_ in gbl_filters]) diff --git a/pcigale/sed/__init__.py b/pcigale/sed/__init__.py index f66c44ea..fcd3a090 100644 --- a/pcigale/sed/__init__.py +++ b/pcigale/sed/__init__.py @@ -262,7 +262,7 @@ class SED(object): Fν is computed in W/m²/Hz and then converted to mJy. If the SED spectrum does not cover all the filter response table, - -99 is returned. + NaN is returned. Parameters ---------- @@ -306,7 +306,7 @@ class SED(object): # Test if the filter covers all the spectrum extent. If not then # the flux is not defined if ((wavelength[0] > lambda_min) or (wavelength[-1] < lambda_max)): - return -99. + return np.nan # We regrid both spectrum and filter to the best wavelength grid # to avoid interpolating a high wavelength density curve to a low -- GitLab