Source code for pyhf.infer

"""Inference for Statistical Models."""

from .test_statistics import qmu
from .. import get_backend
from .calculators import AsymptoticCalculator


[docs]def hypotest(
    poi_test,
    data,
    pdf,
    init_pars=None,
    par_bounds=None,
    fixed_params=None,
    qtilde=False,
    **kwargs,
):
    r"""
    Compute :math:`p`-values and test statistics for a single value of the parameter of interest.

    Example:
        >>> import pyhf
        >>> pyhf.set_backend("numpy")
        >>> model = pyhf.simplemodels.hepdata_like(
        ...     signal_data=[12.0, 11.0], bkg_data=[50.0, 52.0], bkg_uncerts=[3.0, 7.0]
        ... )
        >>> observations = [51, 48]
        >>> data = pyhf.tensorlib.astensor(observations + model.config.auxdata)
        >>> test_poi = 1.0
        >>> CLs_obs, CLs_exp_band = pyhf.infer.hypotest(
        ...     test_poi, data, model, qtilde=True, return_expected_set=True
        ... )
        >>> CLs_obs
        array(0.05251497)
        >>> CLs_exp_band
        [array(0.00260626), array(0.01382005), array(0.06445321), array(0.23525644), array(0.57303621)]

    Args:
        poi_test (Number or Tensor): The value of the parameter of interest (POI)
        data (Number or Tensor): The data considered
        pdf (~pyhf.pdf.Model): The statistical model adhering to the schema ``model.json``
        init_pars (`tensor`): The initial parameter values to be used for minimization
        par_bounds (`tensor`): The parameter value bounds to be used for minimization
        fixed_params (`tensor`): Whether to fix the parameter to the init_pars value during minimization
        qtilde (Bool): When ``True`` perform the calculation using the alternative
         test statistic, :math:`\tilde{q}_{\mu}`, as defined under the Wald
         approximation in Equation (62) of :xref:`arXiv:1007.1727`.

    Keyword Args:
        return_tail_probs (bool): Bool for returning :math:`\mathrm{CL}_{s+b}` and :math:`\mathrm{CL}_{b}`
        return_expected (bool): Bool for returning :math:`\mathrm{CL}_{\mathrm{exp}}`
        return_expected_set (bool): Bool for returning the :math:`(-2,-1,0,1,2)\sigma` :math:`\mathrm{CL}_{\mathrm{exp}}` --- the "Brazil band"

    Returns:
        Tuple of Floats and lists of Floats:

            - :math:`\mathrm{CL}_{s}`: The modified :math:`p`-value compared to
              the given threshold :math:`\alpha`, typically taken to be :math:`0.05`,
              defined in :xref:`arXiv:1007.1727` as

            .. math::

                \mathrm{CL}_{s} = \frac{\mathrm{CL}_{s+b}}{\mathrm{CL}_{b}} = \frac{p_{s+b}}{1-p_{b}}

            to protect against excluding signal models in which there is little
            sensitivity. In the case that :math:`\mathrm{CL}_{s} \leq \alpha`
            the given signal model is excluded.

            - :math:`\left[\mathrm{CL}_{s+b}, \mathrm{CL}_{b}\right]`: The
              signal + background model hypothesis :math:`p`-value

            .. math::

                \mathrm{CL}_{s+b} = p_{s+b}
                = p\left(q \geq q_{\mathrm{obs}}\middle|s+b\right)
                = \int\limits_{q_{\mathrm{obs}}}^{\infty} f\left(q\,\middle|s+b\right)\,dq
                = 1 - F\left(q_{\mathrm{obs}}(\mu)\,\middle|\mu'\right)

            and 1 minus the background only model hypothesis :math:`p`-value

            .. math::

                \mathrm{CL}_{b} = 1- p_{b}
                = p\left(q \geq q_{\mathrm{obs}}\middle|b\right)
                = 1 - \int\limits_{-\infty}^{q_{\mathrm{obs}}} f\left(q\,\middle|b\right)\,dq
                = 1 - F\left(q_{\mathrm{obs}}(\mu)\,\middle|0\right)

            for signal strength :math:`\mu` and model hypothesis signal strength
            :math:`\mu'`, where the cumulative density functions
            :math:`F\left(q(\mu)\,\middle|\mu'\right)` are given by Equations (57)
            and (65) of :xref:`arXiv:1007.1727` for upper-limit-like test
            statistic :math:`q \in \{q_{\mu}, \tilde{q}_{\mu}\}`.
            Only returned when ``return_tail_probs`` is ``True``.

            .. note::

                The definitions of the :math:`\mathrm{CL}_{s+b}` and
                :math:`\mathrm{CL}_{b}` used are based on profile likelihood
                ratio test statistics.
                This procedure is common in the LHC-era, but differs from
                procedures used in the LEP and Tevatron eras, as briefly
                discussed in :math:`\S` 3.8 of :xref:`arXiv:1007.1727`.

            - :math:`\mathrm{CL}_{s,\mathrm{exp}}`: The expected :math:`\mathrm{CL}_{s}`
              value corresponding to the test statistic under the background
              only hypothesis :math:`\left(\mu=0\right)`.
              Only returned when ``return_expected`` is ``True``.

            - :math:`\mathrm{CL}_{s,\mathrm{exp}}` band: The set of expected
              :math:`\mathrm{CL}_{s}` values corresponding to the median
              significance of variations of the signal strength from the
              background only hypothesis :math:`\left(\mu=0\right)` at
              :math:`(-2,-1,0,1,2)\sigma`.
              That is, the :math:`p`-values that satisfy Equation (89) of
              :xref:`arXiv:1007.1727`

            .. math::

                \mathrm{band}_{N\sigma} = \mu' + \sigma\,\Phi^{-1}\left(1-\alpha\right) \pm N\sigma

            for :math:`\mu'=0` and :math:`N \in \left\{-2, -1, 0, 1, 2\right\}`.
            These values define the boundaries of an uncertainty band sometimes
            referred to as the "Brazil band".
            Only returned when ``return_expected_set`` is ``True``.

    """
    init_pars = init_pars or pdf.config.suggested_init()
    par_bounds = par_bounds or pdf.config.suggested_bounds()
    fixed_params = fixed_params or pdf.config.suggested_fixed()

    calc = AsymptoticCalculator(
        data, pdf, init_pars, par_bounds, fixed_params, qtilde=qtilde
    )
    teststat = calc.teststatistic(poi_test)
    sig_plus_bkg_distribution, b_only_distribution = calc.distributions(poi_test)

    CLsb = sig_plus_bkg_distribution.pvalue(teststat)
    CLb = b_only_distribution.pvalue(teststat)
    CLs = CLsb / CLb

    tensorlib, _ = get_backend()
    # Ensure that all CL values are 0-d tensors
    CLsb, CLb, CLs = (
        tensorlib.astensor(CLsb),
        tensorlib.astensor(CLb),
        tensorlib.astensor(CLs),
    )

    _returns = [CLs]
    if kwargs.get('return_tail_probs'):
        _returns.append([CLsb, CLb])
    if kwargs.get('return_expected_set'):
        CLs_exp = []
        for n_sigma in [2, 1, 0, -1, -2]:

            expected_bonly_teststat = b_only_distribution.expected_value(n_sigma)

            CLs = sig_plus_bkg_distribution.pvalue(
                expected_bonly_teststat
            ) / b_only_distribution.pvalue(expected_bonly_teststat)
            CLs_exp.append(tensorlib.astensor(CLs))
        if kwargs.get('return_expected'):
            _returns.append(CLs_exp[2])
        _returns.append(CLs_exp)
    elif kwargs.get('return_expected'):
        n_sigma = 0
        expected_bonly_teststat = b_only_distribution.expected_value(n_sigma)

        CLs = sig_plus_bkg_distribution.pvalue(
            expected_bonly_teststat
        ) / b_only_distribution.pvalue(expected_bonly_teststat)
        _returns.append(tensorlib.astensor(CLs))
    # Enforce a consistent return type of the observed CLs
    return tuple(_returns) if len(_returns) > 1 else _returns[0]


__all__ = ['qmu', 'hypotest']