Source code for bayspec.infer.analyzer

"""Post-fit analyzers for posterior samples and bootstrap ensembles.

:class:`SampleAnalyzer` absorbs an :class:`~bayspec.infer.infer.Infer`
instance, reads a 2D sample matrix (``param_sample`` plus a trailing
log-probability column), attaches the draws to every free parameter's
:class:`~bayspec.util.post.Post`, and exposes point estimates, credible
intervals, and model-selection scores (AIC/AICc/BIC, optionally ``lnZ``).

:class:`Posterior` and :class:`Bootstrap` are thin subclasses that pick
which attribute of the underlying ``Infer`` carries the sample matrix.
"""

from collections import OrderedDict
import os

import numpy as np

from ..util.info import Info
from ..util.post import Post
from ..util.tools import json_dump
from .infer import BayesInfer, Infer, MaxLikeFit



[docs]
class SampleAnalyzer(Infer):
    """Wrap an :class:`Infer` with posterior-/bootstrap-driven summary views.

    The sample matrix is expected to have shape ``(nsample, nfree + 1)``,
    where the last column holds the log-probability associated with each
    draw. Subclasses set :attr:`sample_attribute` to the instance
    attribute (``posterior_sample`` or ``bootstrap_sample``) that stores
    the matrix.

    Attributes:
        sample_attribute: Name of the source ``Infer`` attribute; set by
            each subclass.
        analyzer_type: Display label shown in :meth:`__str__`.
        save_prefix: File-name prefix used by :meth:`save`.
    """

    sample_attribute = None
    analyzer_type = 'Sample Analysis Results'
    save_prefix = 'sample'

    def __init__(self, infer):
        """Absorb ``infer`` and populate the free-parameter posteriors.

        Args:
            infer: An :class:`Infer` instance whose sample matrix is
                stored under :attr:`sample_attribute`.

        Raises:
            TypeError: If ``infer`` is not an :class:`Infer`.
            AttributeError/ValueError: If the sample matrix is missing
                or has the wrong shape.
        """

        self.infer = infer

    @property
    def infer(self):

        return self._infer

    @infer.setter
    def infer(self, new_infer):
        """Absorb ``new_infer``, then load its sample matrix and wire up posteriors.

        Raises:
            TypeError: If ``new_infer`` is not an :class:`Infer`.
            AttributeError/ValueError: If the sample matrix is missing or
                has the wrong shape.
        """

        if not isinstance(new_infer, Infer):
            raise TypeError('expected an instance of Infer')

        self._infer = new_infer
        self.__dict__.update(new_infer.__dict__)

        self._check_sample()
        self._allot_post()

    def _check_sample(self):
        """Load and validate the sample matrix from :attr:`sample_attribute`."""

        if self.sample_attribute is None:
            raise AttributeError('sample_attribute is not defined')

        self.sample = getattr(self, self.sample_attribute, None)
        if self.sample is None:
            raise AttributeError(f'{self.sample_attribute} is not available')

        self.sample = np.asarray(self.sample, dtype=float)
        if self.sample.ndim != 2:
            raise ValueError(f'{self.sample_attribute} is expected to be a 2D array')

        if self.sample.shape[1] != self.free_nparams + 1:
            raise ValueError(
                f'{self.sample_attribute} is expected to have {self.free_nparams + 1} columns'
            )

        self.param_sample = self.sample[:, : self.free_nparams].copy()
        self.prob_sample = self.sample[:, -1].copy()

    def _allot_post(self):
        """Attach a :class:`Post` to every free parameter and seed the best-fit CI."""

        for i in range(self.free_nparams):
            self.free_par[i + 1].post = Post(self.param_sample[:, i], self.prob_sample)

        self._allot_best_ci(q=0.6827)
        self.at_par(self.par_best)

    def _allot_best_ci(self, q=0.6827):
        """Pick the highest-probability draw that lies within every ``q``-interval.

        Args:
            q: Central credible level (0-1) that the chosen draw must
                satisfy on every dimension simultaneously.
        """

        argsort = np.argsort(self.prob_sample)[::-1]
        sort_param_sample = self.param_sample[argsort]

        for sample in sort_param_sample:
            if np.array(
                [(ci[0] <= sample[i] <= ci[1]) for i, ci in enumerate(self.par_interval(q))]
            ).all():
                for par, value in zip(self.free_par.values(), sample, strict=False):
                    par.post.best_ci = value

                break

    @property
    def sample_statistic(self):
        """Mean, median, and 1/2/3-sigma intervals of :attr:`param_sample`."""

        mean = np.mean(self.param_sample, axis=0)
        median = np.median(self.param_sample, axis=0)

        q = 68.27 / 100
        Isigma = np.quantile(self.param_sample, [0.5 - q / 2, 0.5 + q / 2], axis=0)

        q = 95.45 / 100
        IIsigma = np.quantile(self.param_sample, [0.5 - q / 2, 0.5 + q / 2], axis=0)

        q = 99.73 / 100
        IIIsigma = np.quantile(self.param_sample, [0.5 - q / 2, 0.5 + q / 2], axis=0)

        q = 90 / 100
        ninety_percent = np.quantile(self.param_sample, [0.5 - q / 2, 0.5 + q / 2], axis=0)

        return dict(
            [
                ('mean', mean),
                ('median', median),
                ('Isigma', Isigma),
                ('IIsigma', IIsigma),
                ('IIIsigma', IIIsigma),
                ('90%', ninety_percent),
            ]
        )

    @property
    def par_mean(self):
        """Per-parameter posterior means drawn from each :class:`Post`."""

        return [par.post.mean for par in self.free_par.values()]

    @property
    def par_median(self):
        """Per-parameter posterior medians."""

        return [par.post.median for par in self.free_par.values()]

    @property
    def par_best(self):
        """Per-parameter highest-log-probability draws."""

        return [par.post.best for par in self.free_par.values()]

    @property
    def par_best_ci(self):
        """Per-parameter draw selected via :meth:`_allot_best_ci`."""

        return [par.post.best_ci for par in self.free_par.values()]

    @property
    def par_truth(self):
        """Per-parameter truth value stored in each :class:`Post`, or ``None``."""

        return [par.post.truth for par in self.free_par.values()]


[docs]
    def par_quantile(self, q):
        """Per-parameter ``q``-quantile of the posterior.

        Args:
            q: Probability or array of probabilities in ``[0, 1]``.
        """

        return [par.post.quantile(q) for par in self.free_par.values()]



[docs]
    def par_interval(self, q):
        """Per-parameter central ``q``-credible interval."""

        return [par.post.interval(q) for par in self.free_par.values()]


    @property
    def par_Isigma(self):
        """Per-parameter one-sigma credible interval."""

        return [par.post.Isigma for par in self.free_par.values()]

    @property
    def par_IIsigma(self):
        """Per-parameter two-sigma credible interval."""

        return [par.post.IIsigma for par in self.free_par.values()]

    @property
    def par_IIIsigma(self):
        """Per-parameter three-sigma credible interval."""

        return [par.post.IIIsigma for par in self.free_par.values()]

    @property
    def par_ninety_percent(self):
        """Per-parameter 90% credible interval."""

        return [par.post.ninety_percent for par in self.free_par.values()]


[docs]
    def par_error(self, par, q=0.6827):
        """Per-parameter asymmetric errors of ``par`` against the ``q``-interval.

        Args:
            par: Sequence of per-parameter point estimates.
            q: Central credible level.

        Returns:
            List of ``[lower_error, upper_error]`` pairs aligned with
            ``par``.
        """

        ci = self.par_interval(q)

        return [np.diff([c[0], p, c[1]]).tolist() for p, c in zip(par, ci, strict=False)]


    @property
    def max_loglike(self):
        """Log-likelihood evaluated at the best-fit parameter vector."""

        self.at_par(self.par_best)

        return self.loglike

    @property
    def aic(self):
        """Akaike information criterion ``AIC = -2 ln L + 2 k``."""

        return -2 * self.max_loglike + 2 * self.free_nparams

    @property
    def aicc(self):
        """Finite-sample corrected AIC."""

        return self.aic + 2 * self.free_nparams * (self.free_nparams + 1) / (
            self.npoint - self.free_nparams - 1
        )

    @property
    def bic(self):
        """Bayesian information criterion ``BIC = -2 ln L + k ln n``."""

        return -2 * self.max_loglike + self.free_nparams * np.log(self.npoint)

    @property
    def lnZ(self):
        """Log-evidence supplied by the nested sampler, or ``None``."""

        return getattr(self, 'logevidence', None)

    @property
    def free_par_info(self):
        """Tabular :class:`Info` of free parameters with posterior summaries."""

        self._you_free()

        free_params = self.free_params.copy()

        free_params = Info.list_dict_to_dict(free_params)

        del free_params['Posterior']
        del free_params['Mates']
        del free_params['Frozen']
        del free_params['Prior']
        del free_params['Value']

        if None not in self.par_truth:
            free_params['Truth'] = [par for par in self.par_truth]

        free_params['Mean'] = [par for par in self.par_mean]
        free_params['Median'] = [par for par in self.par_median]
        free_params['Best'] = [par for par in self.par_best]
        free_params['1sigma Best'] = [par for par in self.par_best_ci]
        free_params['1sigma CI'] = ['[{:.3f}, {:.3f}]'.format(*tuple(ci)) for ci in self.par_Isigma]
        free_params['90% CI'] = [
            '[{:.3f}, {:.3f}]'.format(*tuple(ci)) for ci in self.par_ninety_percent
        ]

        return Info.from_dict(free_params)

    @property
    def stat_info(self):
        """Tabular :class:`Info` of the fit statistic evaluated at the best fit."""

        self.at_par(self.par_best)

        all_stat = self.all_stat.copy()

        return Info.from_dict(all_stat)

    @property
    def all_IC(self):
        """Ordered dictionary of AIC/AICc/BIC/lnZ values."""

        all_IC = OrderedDict()
        all_IC['AIC'] = self.aic
        all_IC['AICc'] = self.aicc
        all_IC['BIC'] = self.bic
        all_IC['lnZ'] = self.lnZ

        return all_IC

    @property
    def IC_info(self):
        """Tabular :class:`Info` view of :attr:`all_IC`."""

        all_IC = self.all_IC.copy()

        return Info.from_dict(all_IC)


[docs]
    def save(self, savepath):
        """Dump free-parameter, statistic, and IC tables under ``savepath``.

        Args:
            savepath: Directory path. Created if missing.
        """

        if not os.path.exists(savepath):
            os.makedirs(savepath)

        json_dump(
            self.free_par_info.data_list_dict, savepath + f'/{self.save_prefix}_free_par.json'
        )
        json_dump(self.stat_info.data_list_dict, savepath + f'/{self.save_prefix}_stat.json')
        json_dump(self.IC_info.data_list_dict, savepath + f'/{self.save_prefix}_IC.json')


    def __str__(self):

        return (
            f'*** {self.analyzer_type} ***\n'
            f'*** Parameters ***\n'
            f'{self.free_par_info.text_table}\n'
            f'*** Statistics ***\n'
            f'{self.stat_info.text_table}\n'
            f'*** Information Criterias ***\n'
            f'{self.IC_info.text_table}'
        )

    def __repr__(self):

        return self.__str__()

    def _repr_html_(self):

        return (
            f'{self.free_par_info.html_style}'
            f'<details open>'
            f'<summary style="margin-bottom: 10px;"><b>{self.analyzer_type}</b></summary>'
            f'<details open style="margin-top: 10px;">'
            f'<summary style="margin-bottom: 10px;"><b>Parameters</b></summary>'
            f'{self.free_par_info.html_table}'
            f'</details>'
            f'<details open style="margin-top: 10px;">'
            f'<summary style="margin-bottom: 10px;"><b>Statistics</b></summary>'
            f'{self.stat_info.html_table}'
            f'</details>'
            f'<details open style="margin-top: 10px;">'
            f'<summary style="margin-bottom: 10px;"><b>Information Criterias</b></summary>'
            f'{self.IC_info.html_table}'
            f'</details>'
        )




[docs]
class Posterior(SampleAnalyzer):
    """Analyzer specialised for Bayesian posterior samples."""

    sample_attribute = 'posterior_sample'
    analyzer_type = 'Posterior Results'
    save_prefix = 'post'

    def __init__(self, infer):
        """Absorb a :class:`BayesInfer` and attach its ``posterior_sample``.

        Raises:
            TypeError: If ``infer`` is not a :class:`BayesInfer`.
        """

        if not isinstance(infer, BayesInfer):
            raise TypeError('expected an instance of BayesInfer')

        super().__init__(infer)




[docs]
class Bootstrap(SampleAnalyzer):
    """Analyzer specialised for maximum-likelihood bootstrap ensembles.

    The first row of ``bootstrap_sample`` is treated as the best-fit
    truth; its value is copied onto each parameter's :class:`Post` so
    downstream consumers can access it as ``par.post.truth``.
    """

    sample_attribute = 'bootstrap_sample'
    analyzer_type = 'Bootstrap Results'
    save_prefix = 'boot'

    def __init__(self, infer):
        """Absorb a :class:`MaxLikeFit` and attach its ``bootstrap_sample``.

        Raises:
            TypeError: If ``infer`` is not a :class:`MaxLikeFit`.
        """

        if not isinstance(infer, MaxLikeFit):
            raise TypeError('expected an instance of MaxLikeFit')

        super().__init__(infer)

    def _allot_post(self):
        """Attach a :class:`Post` to every free parameter, plus best-CI and truth."""

        for i in range(self.free_nparams):
            self.free_par[i + 1].post = Post(self.param_sample[:, i], self.prob_sample)

        self._allot_best_ci(q=0.6827)
        self._allot_truth()
        self.at_par(self.par_truth)

    def _allot_truth(self):
        """Store the first bootstrap row (the best fit) as each parameter's truth."""

        for par, value in zip(self.free_par.values(), self.param_sample[0].tolist(), strict=False):
            par.post.truth = value

    @property
    def max_loglike(self):
        """Log-likelihood evaluated at the best-fit truth vector."""

        self.at_par(self.par_truth)

        return self.loglike