"""Inference for Statistical Models."""
from .test_statistics import qmu
from .. import get_backend
from .calculators import AsymptoticCalculator
[docs]def hypotest(
poi_test,
data,
pdf,
init_pars=None,
par_bounds=None,
fixed_params=None,
qtilde=False,
**kwargs,
):
r"""
Compute :math:`p`-values and test statistics for a single value of the parameter of interest.
Example:
>>> import pyhf
>>> pyhf.set_backend("numpy")
>>> model = pyhf.simplemodels.hepdata_like(
... signal_data=[12.0, 11.0], bkg_data=[50.0, 52.0], bkg_uncerts=[3.0, 7.0]
... )
>>> observations = [51, 48]
>>> data = pyhf.tensorlib.astensor(observations + model.config.auxdata)
>>> test_poi = 1.0
>>> CLs_obs, CLs_exp_band = pyhf.infer.hypotest(
... test_poi, data, model, qtilde=True, return_expected_set=True
... )
>>> CLs_obs
array(0.05251497)
>>> CLs_exp_band
[array(0.00260626), array(0.01382005), array(0.06445321), array(0.23525644), array(0.57303621)]
Args:
poi_test (Number or Tensor): The value of the parameter of interest (POI)
data (Number or Tensor): The data considered
pdf (~pyhf.pdf.Model): The statistical model adhering to the schema ``model.json``
init_pars (`tensor`): The initial parameter values to be used for minimization
par_bounds (`tensor`): The parameter value bounds to be used for minimization
fixed_params (`tensor`): Whether to fix the parameter to the init_pars value during minimization
qtilde (Bool): When ``True`` perform the calculation using the alternative
test statistic, :math:`\tilde{q}_{\mu}`, as defined under the Wald
approximation in Equation (62) of :xref:`arXiv:1007.1727`.
Keyword Args:
return_tail_probs (bool): Bool for returning :math:`\mathrm{CL}_{s+b}` and :math:`\mathrm{CL}_{b}`
return_expected (bool): Bool for returning :math:`\mathrm{CL}_{\mathrm{exp}}`
return_expected_set (bool): Bool for returning the :math:`(-2,-1,0,1,2)\sigma` :math:`\mathrm{CL}_{\mathrm{exp}}` --- the "Brazil band"
Returns:
Tuple of Floats and lists of Floats:
- :math:`\mathrm{CL}_{s}`: The modified :math:`p`-value compared to
the given threshold :math:`\alpha`, typically taken to be :math:`0.05`,
defined in :xref:`arXiv:1007.1727` as
.. math::
\mathrm{CL}_{s} = \frac{\mathrm{CL}_{s+b}}{\mathrm{CL}_{b}} = \frac{p_{s+b}}{1-p_{b}}
to protect against excluding signal models in which there is little
sensitivity. In the case that :math:`\mathrm{CL}_{s} \leq \alpha`
the given signal model is excluded.
- :math:`\left[\mathrm{CL}_{s+b}, \mathrm{CL}_{b}\right]`: The
signal + background model hypothesis :math:`p`-value
.. math::
\mathrm{CL}_{s+b} = p_{s+b}
= p\left(q \geq q_{\mathrm{obs}}\middle|s+b\right)
= \int\limits_{q_{\mathrm{obs}}}^{\infty} f\left(q\,\middle|s+b\right)\,dq
= 1 - F\left(q_{\mathrm{obs}}(\mu)\,\middle|\mu'\right)
and 1 minus the background only model hypothesis :math:`p`-value
.. math::
\mathrm{CL}_{b} = 1- p_{b}
= p\left(q \geq q_{\mathrm{obs}}\middle|b\right)
= 1 - \int\limits_{-\infty}^{q_{\mathrm{obs}}} f\left(q\,\middle|b\right)\,dq
= 1 - F\left(q_{\mathrm{obs}}(\mu)\,\middle|0\right)
for signal strength :math:`\mu` and model hypothesis signal strength
:math:`\mu'`, where the cumulative density functions
:math:`F\left(q(\mu)\,\middle|\mu'\right)` are given by Equations (57)
and (65) of :xref:`arXiv:1007.1727` for upper-limit-like test
statistic :math:`q \in \{q_{\mu}, \tilde{q}_{\mu}\}`.
Only returned when ``return_tail_probs`` is ``True``.
.. note::
The definitions of the :math:`\mathrm{CL}_{s+b}` and
:math:`\mathrm{CL}_{b}` used are based on profile likelihood
ratio test statistics.
This procedure is common in the LHC-era, but differs from
procedures used in the LEP and Tevatron eras, as briefly
discussed in :math:`\S` 3.8 of :xref:`arXiv:1007.1727`.
- :math:`\mathrm{CL}_{s,\mathrm{exp}}`: The expected :math:`\mathrm{CL}_{s}`
value corresponding to the test statistic under the background
only hypothesis :math:`\left(\mu=0\right)`.
Only returned when ``return_expected`` is ``True``.
- :math:`\mathrm{CL}_{s,\mathrm{exp}}` band: The set of expected
:math:`\mathrm{CL}_{s}` values corresponding to the median
significance of variations of the signal strength from the
background only hypothesis :math:`\left(\mu=0\right)` at
:math:`(-2,-1,0,1,2)\sigma`.
That is, the :math:`p`-values that satisfy Equation (89) of
:xref:`arXiv:1007.1727`
.. math::
\mathrm{band}_{N\sigma} = \mu' + \sigma\,\Phi^{-1}\left(1-\alpha\right) \pm N\sigma
for :math:`\mu'=0` and :math:`N \in \left\{-2, -1, 0, 1, 2\right\}`.
These values define the boundaries of an uncertainty band sometimes
referred to as the "Brazil band".
Only returned when ``return_expected_set`` is ``True``.
"""
init_pars = init_pars or pdf.config.suggested_init()
par_bounds = par_bounds or pdf.config.suggested_bounds()
fixed_params = fixed_params or pdf.config.suggested_fixed()
calc = AsymptoticCalculator(
data, pdf, init_pars, par_bounds, fixed_params, qtilde=qtilde
)
teststat = calc.teststatistic(poi_test)
sig_plus_bkg_distribution, b_only_distribution = calc.distributions(poi_test)
CLsb = sig_plus_bkg_distribution.pvalue(teststat)
CLb = b_only_distribution.pvalue(teststat)
CLs = CLsb / CLb
tensorlib, _ = get_backend()
# Ensure that all CL values are 0-d tensors
CLsb, CLb, CLs = (
tensorlib.astensor(CLsb),
tensorlib.astensor(CLb),
tensorlib.astensor(CLs),
)
_returns = [CLs]
if kwargs.get('return_tail_probs'):
_returns.append([CLsb, CLb])
if kwargs.get('return_expected_set'):
CLs_exp = []
for n_sigma in [2, 1, 0, -1, -2]:
expected_bonly_teststat = b_only_distribution.expected_value(n_sigma)
CLs = sig_plus_bkg_distribution.pvalue(
expected_bonly_teststat
) / b_only_distribution.pvalue(expected_bonly_teststat)
CLs_exp.append(tensorlib.astensor(CLs))
if kwargs.get('return_expected'):
_returns.append(CLs_exp[2])
_returns.append(CLs_exp)
elif kwargs.get('return_expected'):
n_sigma = 0
expected_bonly_teststat = b_only_distribution.expected_value(n_sigma)
CLs = sig_plus_bkg_distribution.pvalue(
expected_bonly_teststat
) / b_only_distribution.pvalue(expected_bonly_teststat)
_returns.append(tensorlib.astensor(CLs))
# Enforce a consistent return type of the observed CLs
return tuple(_returns) if len(_returns) > 1 else _returns[0]
__all__ = ['qmu', 'hypotest']