results.py 15.9 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12
# -*- coding: utf-8 -*-
# Copyright (C) 2017 Universidad de Antofagasta
# Licensed under the CeCILL-v2 licence - see Licence_CeCILL_V2-en.txt
# Author: Médéric Boquien

"""These classes manage the results from the analysis. The main class
ResultsManager contains instances of BayesResultsManager and BestResultsManager
that contain the bayesian and best-fit estimates of the physical properties
along with the names of the parameters, which are proportional to the mass,
etc. Each of these classes contain a merge() method that allows to combine
results of the analysis with different blocks of models.
"""
13
import ctypes
14 15 16 17

from astropy.table import Table, Column
import numpy as np

18
from .utils import SharedArray
19 20 21 22 23 24 25 26 27 28 29 30 31


class BayesResultsManager(object):
    """This class contains the results of the bayesian estimates of the
    physical properties of the analysed objects. It is constructed from a
    ModelsManager instance, which provides the required information on the
    shape of the arrays. Because it can contain the partial results for only
    one block of models, we also store the sum of the model weights (that is
    the likelihood) so we can merge different instances to compute the combined
    estimates of the physical properties.

    """
    def __init__(self, models):
32
        nobs = len(models.obs)
33
        self.propertiesnames = models.allpropnames
34 35 36 37 38 39
        extpropnames = [prop for prop in models.obs.conf['analysis_params']['variables']
                        if (prop in models.allextpropnames or
                            prop[:-4] in models.allextpropnames)]
        intpropnames = [prop for prop in models.obs.conf['analysis_params']['variables']
                        if (prop in models.allintpropnames or
                            prop[:-4] in models.allintpropnames)]
40
        fluxnames = [name for name in models.conf['analysis_params']['bands']]
41
        self.nproperties = len(intpropnames) + len(extpropnames)
42 43 44 45 46 47

        # Arrays where we store the data related to the models. For memory
        # efficiency reasons, we use RawArrays that will be passed in argument
        # to the pool. Each worker will fill a part of the RawArrays. It is
        # important that there is no conflict and that two different workers do
        # not write on the same section.
48 49 50 51
        self.intmean = {prop: SharedArray(nobs) for prop in intpropnames}
        self.interror = {prop: SharedArray(nobs) for prop in intpropnames}
        self.extmean = {prop: SharedArray(nobs) for prop in extpropnames}
        self.exterror = {prop: SharedArray(nobs) for prop in extpropnames}
52 53
        self.fluxmean = {band: SharedArray(nobs) for band in fluxnames}
        self.fluxerror = {band: SharedArray(nobs) for band in fluxnames}
54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78
        self.weight = SharedArray(nobs)

    @property
    def mean(self):
        return self._mean

    @mean.setter
    def mean(self, mean):
        self._mean = mean

    @property
    def error(self):
        return self._error

    @error.setter
    def error(self, error):
        self._error = error

    @property
    def weight(self):
        return self._weight

    @weight.setter
    def weight(self, weight):
        self._weight = weight
79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96

    @staticmethod
    def merge(results):
        """Merge a list of partial results computed on individual blocks of
        models.

        Parameters
        ----------
        results: list of BayesResultsManager instances
            List of the partial results to be merged

        """
        if not (isinstance(results, list) and
                all(isinstance(result, BayesResultsManager)
                    for result in results)):
            raise TypeError("A list of BayesResultsManager is required.")

        merged = results[0]
97
        intmean = {prop: np.array([result.intmean[prop]
98 99
                                   for result in results])
                   for prop in merged.intmean}
100
        interror = {prop: np.array([result.interror[prop]
101 102
                                    for result in results])
                    for prop in merged.interror}
103
        extmean = {prop: np.array([result.extmean[prop]
104 105
                                   for result in results])
                   for prop in merged.extmean}
106
        exterror = {prop: np.array([result.exterror[prop]
107 108
                                    for result in results])
                    for prop in merged.exterror}
109 110 111 112 113 114
        fluxmean = {band: np.array([result.fluxmean[band]
                                   for result in results])
                   for band in merged.fluxmean}
        fluxerror = {band: np.array([result.fluxerror[band]
                                    for result in results])
                    for band in merged.fluxerror}
115
        weight = np.array([result.weight for result in results])
116

117
        totweight = np.nansum(weight, axis=0)
118 119

        for prop in merged.intmean:
120
            merged.intmean[prop][:] = np.nansum(
121 122 123 124 125 126 127
                intmean[prop] * weight, axis=0) / totweight

            # We compute the merged standard deviation by combining the
            # standard deviations for each block. See
            # http://stats.stackexchange.com/a/10445 where the number of
            # datapoints has been substituted with the weights. In short we
            # exploit the fact that Var(X) = E(Var(X)) + Var(E(X)).
128
            merged.interror[prop][:] = np.sqrt(np.nansum(
129
                weight * (interror[prop]**2. + (intmean[prop]-merged.intmean[prop])**2), axis=0) / totweight)
130 131

        for prop in merged.extmean:
132
            merged.extmean[prop][:] = np.nansum(
133 134 135 136 137 138 139
                extmean[prop] * weight, axis=0) / totweight

            # We compute the merged standard deviation by combining the
            # standard deviations for each block. See
            # http://stats.stackexchange.com/a/10445 where the number of
            # datapoints has been substituted with the weights. In short we
            # exploit the fact that Var(X) = E(Var(X)) + Var(E(X)).
140
            merged.exterror[prop][:] = np.sqrt(np.nansum(
141
                weight * (exterror[prop]**2. + (extmean[prop]-merged.extmean[prop])**2), axis=0) / totweight)
142 143 144

        for prop in merged.extmean:
            if prop.endswith('_log'):
145 146
                merged.exterror[prop][:] = \
                    np.maximum(0.02, merged.exterror[prop])
147
            else:
148 149 150
                merged.exterror[prop][:] = \
                    np.maximum(0.05 * merged.extmean[prop],
                               merged.exterror[prop])
151

152
        for band in merged.fluxmean:
153
            merged.fluxmean[band][:] = np.nansum(
154 155 156 157 158 159 160
                fluxmean[band] * weight, axis=0) / totweight

            # We compute the merged standard deviation by combining the
            # standard deviations for each block. See
            # http://stats.stackexchange.com/a/10445 where the number of
            # datapoints has been substituted with the weights. In short we
            # exploit the fact that Var(X) = E(Var(X)) + Var(E(X)).
161
            merged.fluxerror[band][:] = np.sqrt(np.nansum(
162 163 164
                weight * (fluxerror[band]**2. + (fluxmean[band]-merged.fluxmean[band])**2), axis=0) / totweight)


165 166 167 168 169 170 171 172 173 174 175 176 177 178
        return merged


class BestResultsManager(object):
    """This class contains the physical properties of the best fit of the
    analysed objects. It is constructed from a ModelsManager instance, which
    provides the required information on the shape of the arrays. Because it
    can contain the partial results for only one block of models, we also store
    the index so we can merge different instances to compute the best fit.

    """

    def __init__(self, models):
        self.obs = models.obs
179
        nobs = len(models.obs)
180 181 182 183 184
        # Arrays where we store the data related to the models. For memory
        # efficiency reasons, we use RawArrays that will be passed in argument
        # to the pool. Each worker will fill a part of the RawArrays. It is
        # important that there is no conflict and that two different workers do
        # not write on the same section.
185
        self.flux = {band: SharedArray(nobs) for band in models.obs.bands}
186 187
        allintpropnames = models.allintpropnames
        allextpropnames = models.allextpropnames
188 189 190 191 192 193
        self.intprop = {prop: SharedArray(nobs)
                        for prop in allintpropnames}
        self.extprop = {prop: SharedArray(nobs)
                        for prop in allextpropnames}
        self.chi2 = SharedArray(nobs)
        self.scaling = SharedArray(nobs)
194
        self.index = SharedArray(nobs, ctypes.c_uint32)
195 196

    @property
197
    def flux(self):
198 199 200 201
        """Returns a shared array containing the fluxes of the best fit for
        each observation.

        """
202 203 204 205 206
        return self._flux

    @flux.setter
    def flux(self, flux):
        self._flux = flux
207

208
    @property
209
    def intprop(self):
210 211 212 213
        """Returns a shared array containing the fluxes of the best fit for
        each observation.

        """
214 215 216 217 218
        return self._intprop

    @intprop.setter
    def intprop(self, intprop):
        self._intprop = intprop
219 220

    @property
221
    def extprop(self):
222 223 224 225
        """Returns a shared array containing the fluxes of the best fit for
        each observation.

        """
226
        return self._extprop
227

228 229 230
    @extprop.setter
    def extprop(self, extprop):
        self._extprop = extprop
231 232 233 234 235 236 237

    @property
    def chi2(self):
        """Returns a shared array containing the raw chi² of the best fit for
        each observation.

        """
238
        return self._chi2
239

240 241 242 243
    @chi2.setter
    def chi2(self, chi2):
        self._chi2 = chi2

244 245 246 247 248 249
    @property
    def index(self):
        """Returns a shared array containing the index of the best fit for each
        observation.

        """
250
        return self._index
251

252 253 254 255
    @index.setter
    def index(self, index):
        self._index = index

256 257 258 259 260 261
    @property
    def scaling(self):
        """Returns a shared array containing the scaling of the best fit for each
        observation.

        """
262
        return self._scaling
263 264 265 266 267

    @scaling.setter
    def scaling(self, scaling):
        self._scaling = scaling

268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286
    @staticmethod
    def merge(results):
        """Merge a list of partial results computed on individual blocks of
        models.

        Parameters
        ----------
        results: list of BestResultsManager instances
            List of the partial results to be merged

        """
        if not (isinstance(results, list) and
                all(isinstance(result, BestResultsManager)
                    for result in results)):
            raise TypeError("A list of BestResultsManager is required.")

        if len(results) == 1:
            return results[0]

287
        chi2 = np.array([result.chi2 for result in results])
288
        merged = results[0]
289 290 291 292 293 294 295 296 297 298 299 300 301 302
        for iobs, bestidx in enumerate(np.argsort(chi2, axis=0)[0, :]):
            if np.isfinite(bestidx):
                for band in merged.flux:
                    merged.flux[band][iobs] = \
                        results[bestidx].flux[band][iobs]
                for prop in merged.intprop:
                    merged.intprop[prop][iobs] = \
                        results[bestidx].intprop[prop][iobs]
                for prop in merged.extprop:
                    merged.extprop[prop][iobs] = \
                        results[bestidx].extprop[prop][iobs]
                merged.chi2[iobs] = results[bestidx].chi2[iobs]
                merged.scaling[iobs] = results[bestidx].scaling[iobs]
                merged.index[iobs] = results[bestidx].index[iobs]
303 304 305 306 307 308 309 310

        return merged

    def analyse_chi2(self):
        """Function to analyse the best chi^2 and find out what fraction of
         objects seems to be overconstrainted.

        """
311 312
        # If no best model has been found, it means none could be properly
        # fitted. We warn the user in that case
Yannick Roehlly's avatar
Yannick Roehlly committed
313
        bad = [str(id_) for id_ in self.obs.table['id'][np.isnan(self.chi2)]]
314 315 316 317 318
        if len(bad) > 0:
            print(f"No suitable model found for {', '.join(bad)}. It may be "
                  f"that models are older than the universe or that your χ² are"
                  f" very large.")

319
        obs = [self.obs.table[obs].data for obs in self.obs.tofit]
320 321 322 323
        nobs = np.count_nonzero(np.isfinite(obs), axis=0)
        chi2_red = self.chi2 / (nobs - 1)
        # If low values of reduced chi^2, it means that the data are overfitted
        # Errors might be under-estimated or not enough valid data.
324 325
        print(f"{np.round((chi2_red < 1e-12).sum() / chi2_red.size, 1)}% of "
              f"the objects have χ²_red~0 and "
326
              f"{np.round((chi2_red < 0.5).sum() / chi2_red.size, 1)}% "
327
              f"χ²_red<0.5")
328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378


class ResultsManager(object):
    """This class contains the physical properties (best fit and bayesian) of
    the analysed objects. It is constructed from a ModelsManager instance,
    which provides the required information to initialise the instances of
    BestResultsManager and BayesResultsManager that store the results.

    """

    def __init__(self, models):
        self.conf = models.conf
        self.obs = models.obs
        self.params = models.params

        self.bayes = BayesResultsManager(models)
        self.best = BestResultsManager(models)

    @staticmethod
    def merge(results):
        """Merge a list of partial results computed on individual blocks of
        models.

        Parameters
        ----------
        results: list of ResultsManager instances
            List of the partial results to be merged

        """
        merged = results[0]
        merged.bayes = BayesResultsManager.merge([result.bayes
                                                  for result in results])
        merged.best = BestResultsManager.merge([result.best
                                                for result in results])

        return merged

    def save(self, filename):
        """Save the estimated values derived from the analysis of the PDF and
        the parameters associated with the best fit. A simple text file and a
        FITS file are generated.

        Parameters
        ----------
        filename:

        """
        table = Table()

        table.add_column(Column(self.obs.table['id'], name="id"))

379
        for prop in sorted(self.bayes.intmean):
380
            table.add_column(Column(self.bayes.intmean[prop],
381
                                    name="bayes."+prop))
382
            table.add_column(Column(self.bayes.interror[prop],
383 384
                                    name="bayes."+prop+"_err"))
        for prop in sorted(self.bayes.extmean):
385
            table.add_column(Column(self.bayes.extmean[prop],
386
                                    name="bayes."+prop))
387
            table.add_column(Column(self.bayes.exterror[prop],
388
                                    name="bayes."+prop+"_err"))
389 390 391 392 393
        for band in sorted(self.bayes.fluxmean):
            table.add_column(Column(self.bayes.fluxmean[band],
                                    name="bayes."+band))
            table.add_column(Column(self.bayes.fluxerror[band],
                                    name="bayes."+band+"_err"))
394 395

        table.add_column(Column(self.best.chi2, name="best.chi_square"))
396
        obs = [self.obs.table[obs].data for obs in self.obs.tofit]
397 398 399 400
        nobs = np.count_nonzero(np.isfinite(obs), axis=0)
        table.add_column(Column(self.best.chi2 / (nobs - 1),
                                name="best.reduced_chi_square"))

401
        for prop in sorted(self.best.intprop):
402
            table.add_column(Column(self.best.intprop[prop],
403 404
                                    name="best."+prop))
        for prop in sorted(self.best.extprop):
405
            table.add_column(Column(self.best.extprop[prop],
406 407 408
                                    name="best."+prop))

        for band in self.obs.bands:
Médéric Boquien's avatar
Médéric Boquien committed
409
            if band.startswith('line.') or band.startswith('linefilter.'):
410 411 412
                unit = 'W/m^2'
            else:
                unit = 'mJy'
413
            table.add_column(Column(self.best.flux[band],
414
                                    name="best."+band, unit=unit))
415 416


417
        table.write(f"out/{filename}.txt", format='ascii.fixed_width',
418
                    delimiter=None)
419
        table.write(f"out/{filename}.fits", format='fits')