observations.py 12.7 KB
Newer Older
1 2 3 4 5 6 7
# -*- coding: utf-8 -*-
# Copyright (C) 2017 Universidad de Antofagasta
# Licensed under the CeCILL-v2 licence - see Licence_CeCILL_V2-en.txt
# Author: Médéric Boquien

from astropy.table import Column
import numpy as np
8
from scipy.constants import parsec
9

10
from ..utils.cosmology import luminosity_distance
11
from ..utils.io import read_table
12 13
from .utils import get_info

14 15 16 17 18 19 20 21 22

class ObservationsManager(object):
    """Class to abstract the handling of the observations and provide a
    consistent interface for the rest of cigale to deal with observations.

    An ObservationsManager is in charge of reading the input file to memory,
    check the consistency of the data, replace invalid values with NaN, etc.

    """
23
    def __new__(cls, config, params=None, **kwargs):
24
        if config['data_file']:
25
            return ObservationsManagerPassbands(config, params, **kwargs)
26 27 28 29 30 31 32 33 34 35 36 37
        else:
            return ObservationsManagerVirtual(config, **kwargs)


class ObservationsManagerPassbands(object):
    """Class to generate a manager for data files providing fluxes in
    passbands.

    A class instance can be used as a sequence. In that case a row is returned
    at each iteration.
    """

38
    def __init__(self, config, params, defaulterror=0.1, modelerror=0.1,
39 40
                 threshold=-9990.):

41 42 43
        self.conf = config
        self.params = params
        self.allpropertiesnames, self.massproportional = get_info(self)
44
        self.table = read_table(config['data_file'])
45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
        self.bands = [band for band in config['bands']
                      if not band.endswith('_err')]
        self.bands_err = [band for band in config['bands']
                          if band.endswith('_err')]
        self.intprops = [prop for prop in config['properties']
                         if (prop not in self.massproportional
                             and not prop.endswith('_err'))]
        self.intprops_err = [prop for prop in config['properties']
                             if (prop.endswith('_err')
                                 and prop[:-4] not in self.massproportional)]
        self.extprops = [prop for prop in config['properties']
                         if (prop in self.massproportional
                             and not prop.endswith('_err'))]
        self.extprops_err = [prop for prop in config['properties']
                             if (prop.endswith('_err')
                                 and prop[:-4] in self.massproportional)]
61 62
        self.tofit = self.bands + self.intprops + self.extprops
        self.tofit_err = self.bands_err + self.intprops_err + self.extprops_err
63 64 65 66 67 68 69 70

        # Sanitise the input
        self._check_filters()
        self._check_errors(defaulterror)
        self._check_invalid(config['analysis_params']['lim_flag'],
                            threshold)
        self._add_model_error(modelerror)

71 72 73 74
        # Rebuild the quantities to fit after vetting them
        self.tofit = self.bands + self.intprops + self.extprops
        self.tofit_err = self.bands_err + self.intprops_err + self.extprops_err

75 76
        self.observations = list([Observation(row, self)
                                  for row in self.table])
77

78
    def __len__(self):
79
        return len(self.observations)
80 81 82

    def __iter__(self):
        self.idx = 0
83
        self.max = len(self.observations)
84 85 86 87 88

        return self

    def __next__(self):
        if self.idx < self.max:
89
            obs = self.observations[self.idx]
90 91 92 93 94
            self.idx += 1
            return obs
        raise StopIteration

    def _check_filters(self):
95
        """Check whether the list of filters and poperties makes sense.
96 97

        Two situations are checked:
98 99 100 101
        * If a filter or property to be included in the fit is missing from
        the data file, an exception is raised.
        * If a filter or property is given in the input file but is not to be
        included in the fit, a warning is displayed
102 103

        """
104 105
        for item in self.tofit + self.tofit_err:
            if item not in self.table.colnames:
106 107
                raise Exception(f"{item} to be taken in the fit but not present"
                                f" in the observation table.")
108

109
        for item in self.table.colnames:
110
            if (item != 'id' and item != 'redshift' and item != 'distance' and
111
                    item not in self.tofit + self.tofit_err):
112
                self.table.remove_column(item)
113 114
                print(f"Warning: {item} in the input file but not to be taken "
                      f"into account in the fit.")
115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137

    def _check_errors(self, defaulterror=0.1):
        """Check whether the error columns are present. If not, add them.

        This happens in two cases. Either when the error column is not in the
        list of bands to be analysed or when the error column is not present
        in the data file. Note that an error cannot be included explicitly if
        it is not present in the input file. Such a case would be ambiguous
        and will have been caught by self._check_filters().

        We take the error as defaulterror × flux, so by default 10% of the
        flux. The absolute value of the flux is taken in case it is negative.

        Parameters
        ----------
        defaulterror: float
            Fraction of the flux to take as the error when the latter is not
            given in input. By default: 10%.

        """
        if defaulterror < 0.:
            raise ValueError("The relative default error must be positive.")

138 139 140
        for item in self.tofit:
            error = item + '_err'
            if item in self.intprops:
141
                if (error not in self.intprops_err or
142
                        error not in self.table.colnames):
143 144 145 146
                    raise ValueError("Intensive properties errors must be in "
                                     "input file.")
            elif (error not in self.tofit_err or
                  error not in self.table.colnames):
147 148
                colerr = Column(data=np.fabs(self.table[item] * defaulterror),
                                name=error)
149
                self.table.add_column(colerr,
150
                                      index=self.table.colnames.index(item)+1)
151 152
                print(f"Warning: {defaulterror * 100}% of {item} taken as "
                      f"errors.")
153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172

    def _check_invalid(self, upperlimits=False, threshold=-9990.):
        """Check whether invalid data are correctly marked as such.

        This happens in two cases:
        * Data are marked as upper limits (negative error) but the upper
        limit flag is set to False.
        * Data or errors are lower than -9990.

        We mark invalid data as np.nan. In case the entire column is made of
        invalid data, we remove them from the table

        Parameters
        ----------
        threshold: float
            Threshold under which the data are consisdered invalid.

        """
        allinvalid = []

173 174 175 176 177
        for item in self.bands + self.extprops:
            error = item + '_err'
            w = np.where((self.table[item] < threshold) |
                         (self.table[error] < threshold))
            self.table[item][w] = np.nan
178
            self.table[error][w] = np.nan
179
            if upperlimits is False:
180 181
                w = np.where(self.table[error] <= 0.)
                self.table[item][w] = np.nan
182
            else:
183 184 185 186 187 188 189 190 191 192 193 194 195
                w = np.where(self.table[error] == 0.)
                self.table[item][w] = np.nan
            if np.all(~np.isfinite(self.table[item])):
                allinvalid.append(item)

        for item in allinvalid:
            if item in self.bands:
                self.bands.remove(item)
                self.bands_err.remove(item + '_err')
            elif item in self.extprops:
                self.extprops.remove(item)
                self.extprops_err.remove(item + '_err')
            self.table.remove_columns([item, item + '_err'])
196
            print(f"Warning: {allinvalid} removed as no valid data was found.")
197 198 199 200 201 202 203

    def _add_model_error(self, modelerror=0.1):
        """Add in quadrature the error of the model to the input error.

        Parameters
        ----------
        modelerror: float
204 205
            Relative error of the models relative to the flux (or property). By
            default 10%.
206 207 208 209 210

        """
        if modelerror < 0.:
            raise ValueError("The relative model error must be positive.")

211
        for item in self.bands + self.extprops:
212 213 214 215
            error = item + '_err'
            w = np.where(self.table[error] >= 0.)
            self.table[error][w] = np.sqrt(self.table[error][w]**2. + (
                self.table[item][w]*modelerror)**2.)
216 217 218 219 220 221 222 223 224 225 226 227 228 229

    def generate_mock(self, fits):
        """Replaces the actual observations with a mock catalogue. It is
        computed from the best fit fluxes of a previous run. For each object
        and each band, we randomly draw a new flux from a Gaussian distribution
        centered on the best fit flux and with a standard deviation identical
        to the observed one.

        Parameters
        ----------
        fits: ResultsManager
            Best fit fluxes of a previous run.

        """
230
        for band in self.bands:
231
            err = band + '_err'
232
            self.table[band] = np.random.normal(fits.best.flux[band],
233
                                                np.fabs(self.table[err]))
234
        for prop in self.intprops:
235
            err = prop + '_err'
236
            self.table[prop] = np.random.normal(fits.best.intprop[prop],
237
                                                np.fabs(self.table[err]))
238
        for prop in self.extprops:
239
            err = prop + '_err'
240
            self.table[prop] = np.random.normal(fits.best.extprop[prop],
241
                                                np.fabs(self.table[err]))
242

243 244 245
        self.observations = list([Observation(row, self)
                                  for row in self.table])

246 247 248 249 250 251 252 253 254 255 256
    def save(self, filename):
        """Saves the observations as seen internally by the code so it is easy
        to see what fluxes are actually used in the fit. Files are saved in
        FITS and ASCII formats.

        Parameters
        ----------
        filename: str
            Root of the filename where to save the observations.

        """
257 258 259
        self.table.write(f'out/{filename}.fits')
        self.table.write(f'out/{filename}.txt', format='ascii.fixed_width',
                         delimiter=None)
260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278


class ObservationsManagerVirtual(object):
    """Virtual observations manager when there is no observations file given
    as input. In that case we only use the list of bands given in the
    pcigale.ini file.
    """

    def __init__(self, config, **kwargs):
        self.bands = [band for band in config['bands'] if not
                      band.endswith('_err')]

        if len(self.bands) != len(config['bands']):
            print("Warning: error bars were given in the list of bands.")
        elif len(self.bands) == 0:
            print("Warning: no band was given.")

        # We set the other class members to None as they do not make sense in
        # this situation
279
        self.bands_err = None
280
        self.table = None
281 282
        self.intprops = set()
        self.extprops = set()
283 284 285 286 287

    def __len__(self):
        """As there is no observed object the length is naturally 0.
        """
        return 0
288 289 290 291 292 293 294


class Observation(object):
    """Class to take one row of the observations table and extract the list of
    fluxes, intensive properties, extensive properties and their errors, that
    are going to be considered in the fit.
    """
295

296 297 298 299 300 301
    def __init__(self, row, cls):
        self.redshift = row['redshift']
        self.id = row['id']
        if 'distance' in row.colnames and np.isfinite(row['distance']):
            self.distance = row['distance'] * parsec * 1e6
        else:
302 303
            if self.redshift >= 0.:
                self.distance = luminosity_distance(self.redshift)
304 305
            else:
                self.distance = np.nan
306 307 308 309 310
        self.flux = {k: row[k] for k in cls.bands
                     if np.isfinite(row[k]) and row[k + '_err'] > 0.}
        self.flux_ul = {k: row[k] for k in cls.bands
                        if np.isfinite(row[k]) and row[k + '_err'] <= 0.}
        self.flux_err = {k: row[k + '_err'] for k in self.flux.keys()}
311
        self.flux_ul_err = {k: -row[k + '_err'] for k in self.flux_ul.keys()}
312 313 314 315 316 317

        self.extprop = {k: row[k] for k in cls.extprops
                        if np.isfinite(row[k]) and row[k + '_err'] > 0.}
        self.extprop_ul = {k: row[k] for k in cls.extprops
                           if np.isfinite(row[k]) and row[k + '_err'] <= 0.}
        self.extprop_err = {k: row[k + '_err'] for k in self.extprop.keys()}
318
        self.extprop_ul_err = {k: -row[k + '_err']
319 320 321 322
                               for prop in self.extprop_ul.keys()}

        self.intprop = {k: row[k] for k in cls.intprops}
        self.intprop_err = {k: row[k + '_err'] for k in cls.intprops}