Source code for pint.eventstats

"""Various routines for calculating pulsation test statistics on event data and helper functions.

author: M. Kerr <matthew.kerr@gmail.com>
"""
import numpy as np
from numpy import exp, arange, log
from scipy.special import erfc, gamma
from scipy.stats import chi2, norm

__all__ = [
    "sig2sigma",
    "sigma2sig",
    "sigma_trials",
    "z2m",
    "z2mw",
    "cosm",
    "sf_z2m",
    "best_m",
    "em_four",
    "em_lc",
    "hm",
    "hmw",
    "sf_hm",
    "h2sig",
    "sf_h20_dj1989",
    "sf_h20_dj2010",
    "sig2h20",
    "sf_stackedh",
]

TWOPI = np.pi * 2


[docs]def vec(func):
    from numpy import vectorize

    return vectorize(func, doc=func.__doc__)


[docs]def to_array(x, dtype=None):
    x = np.asarray(x, dtype=dtype)
    return np.asarray([x]) if len(x.shape) == 0 else x


[docs]def from_array(x):
    return x[0] if (len(x.shape) == 1) and (x.shape[0] == 1) else x


[docs]def sig2sigma(sig, two_tailed=True, logprob=False):
    """Convert tail probability to "sigma" units.

    Find the value of the argument for the normal distribution beyond which the
    integrated tail probability is sig.  Note that the default is to interpret
    this number as the two-tailed value, as this is the quantity that goes to 0
    when sig goes to 1 and is also what most people mean by "sigma", i.e. a
    chance probability of 0.32 will return a value of 1 sigma.

    Two-tailed:
    \int_{-sigma}^{+sigma} norm(x) dx = sig

    One-tailed:
    \int_{-inf}^{sigma} norm(x) dx = sig

    Note that if sig >= 0.5, the one-tailed result will be negative.


    Parameters
    ----------
    sig
        the chance probability
    two_tailed : bool, optional
        interpret sig as two-tailed or one-tailed probability in converting
    logprob : bool, optional
        if True, the argument is the natural logarithm of the probability
    """
    sig = to_array(sig, dtype=float)
    if logprob:
        logsig = sig.copy()
        sig = np.exp(sig)
    results = np.empty_like(sig)

    if logprob:
        if np.any(logsig > 0):
            raise ValueError("Probability must be between 0 and 1.")
    elif np.any((sig > 1) | (sig <= 0)):
        raise ValueError("Probability must be between 0 and 1.")

    if not two_tailed:
        sig *= 2
        if logprob:
            logsig += np.log(2)

    # Use the exact value for "large" probabilities; for those where numerical
    # precision is a problem, use the asymptotic expansion.
    mask = sig > 1e-300
    results[mask] = norm.isf(sig[mask] * 0.5)
    mask = ~mask
    if logprob:
        x0 = (-2 * (logsig[mask] + np.log(np.pi**0.5))) ** 0.5
    else:
        x0 = (-2 * np.log(sig[mask] * (np.pi) ** 0.5)) ** 0.5
    results[mask] = x0 - np.log(x0) / (1 + 2 * x0)

    return from_array(results)


[docs]def sigma2sig(sigma, two_tailed=True):
    """Convert "sigma" units to chance probability, i.e., return the integral
    of the normal distribution from sigma to infinity, or twice that
    quantity if two_tailed.
    args
    ----
    sigma   argument for the normal distribution

    kwargs
    ------
    two_tailed [True] if True, return twice the integral, else once
    """
    # this appears to handle up to machine precision with no problem

    return erfc(sigma / 2**0.5) if two_tailed else 1 - 0.5 * erfc(-sigma / 2**0.5)


[docs]def sigma_trials(sigma, trials):
    # correct a sigmal value for a trials factor
    # use an asymptotic expansion -- this needs to be checked!
    if sigma >= 20:
        return (sigma**2 - 2 * np.log(trials)) ** 0.5
    p = sigma2sig(sigma) * trials
    return 0 if p >= 1 else sig2sigma(p)


[docs]def z2m(phases, m=2):
    """Return the Z^2_m test for each harmonic up to the specified m.
    See de Jager et al. 1989 for definition.
    """

    phases = np.asarray(phases) * TWOPI  # phase in radians
    n = len(phases)

    if (
        n < 5e3
    ):  # faster for 100s to 1000s of phases, but requires ~20x memory of alternative
        s = (np.cos(np.outer(np.arange(1, m + 1), phases))).sum(axis=1) ** 2 + (
            np.sin(np.outer(np.arange(1, m + 1), phases))
        ).sum(axis=1) ** 2

    else:
        s = (np.asarray([(np.cos(k * phases)).sum() for k in range(1, m + 1)])) ** 2 + (
            np.asarray([(np.sin(k * phases)).sum() for k in range(1, m + 1)])
        ) ** 2

    return (2.0 / n) * np.cumsum(s)


[docs]def z2mw(phases, weights, m=2):
    """Return the Z^2_m test for each harmonic up to the specified m.

    The user provides a list of weights.  In the case that they are
    well-distributed or assumed to be fixed, the CLT applies and the
    statistic remains calibrated.  Nice!
    """

    phases = np.asarray(phases) * (2 * np.pi)  # phase in radians

    s = (
        np.asarray([(np.cos(k * phases) * weights).sum() for k in range(1, m + 1)])
    ) ** 2 + (
        np.asarray([(np.sin(k * phases) * weights).sum() for k in range(1, m + 1)])
    ) ** 2

    return np.cumsum(s) * (2.0 / (weights**2).sum())


[docs]def cosm(phases, m=2):
    """Return the cosine test for each harmonic up to the specified m.
    See de Jager et al. 1994 for definition
    """

    phases = np.asarray(phases) * TWOPI  # phase in radians
    n = len(phases)

    if n < 5e3:
        s = (np.cos(np.outer(np.arange(1, m + 1), phases))).sum(axis=1)
    else:
        s = np.asarray([np.cos(k * phases).sum() for k in range(1, m + 1)])

    return (2.0 / n) * np.cumsum(s)


[docs]def sf_z2m(ts, m=2):
    """Return the survival function (chance probability) according to the
    asymptotic calibration for the Z^2_m test.

    args
    ----
    ts      result of the Z^2_m test
    """

    return chi2.sf(ts, 2 * m)


[docs]def best_m(phases, weights=None, m=100):
    z = z2mw(phases, np.ones_like(phases) if weights is None else weights, m=m)
    return np.arange(1, m + 1)[np.argmax(z - 4 * np.arange(0, m))]


[docs]def em_four(phases, m=2, weights=None):
    """Return the empirical Fourier coefficients up to the mth harmonic.
    These are derived from the empirical trignometric moments."""

    phases = np.asarray(phases) * TWOPI  # phase in radians

    n = len(phases) if weights is None else weights.sum()
    weights = 1.0 if weights is None else weights

    aks = (1.0 / n) * np.asarray(
        [(weights * np.cos(k * phases)).sum() for k in range(1, m + 1)]
    )
    bks = (1.0 / n) * np.asarray(
        [(weights * np.sin(k * phases)).sum() for k in range(1, m + 1)]
    )

    return aks, bks


[docs]def em_lc(coeffs, dom):
    """Evaluate the light curve at the provided phases (0 to 1) for the
    provided coeffs, e.g., as estimated by em_four."""

    dom = np.asarray(dom) * (2 * np.pi)

    aks, bks = coeffs
    rval = np.ones_like(dom)
    for i in range(1, len(aks) + 1):
        rval += 2 * (aks[i - 1] * np.cos(i * dom) + bks[i - 1] * np.sin(i * dom))
    return rval


[docs]def hm(phases, m=20, c=4):
    """Calculate the H statistic (de Jager et al. 1989) for given phases.
    H_m = max(Z^2_k - c*(k-1)), 1 <= k <= m
    m == maximum search harmonic
    c == offset for each successive harmonic
    """
    phases = np.asarray(phases) * (2 * np.pi)  # phase in radians

    s = (np.asarray([(np.cos(k * phases)).sum() for k in range(1, m + 1)])) ** 2 + (
        np.asarray([(np.sin(k * phases)).sum() for k in range(1, m + 1)])
    ) ** 2

    return ((2.0 / len(phases)) * np.cumsum(s) - c * np.arange(0, m)).max()


[docs]def hmw(phases, weights, m=20, c=4):
    """Calculate the H statistic (de Jager et al. 1989) and weight each
    sine/cosine with the weights in the argument.  The distribution
    is corrected such that the CLT still applies, i.e., it maintains
    the same calibration as the unweighted version."""

    phases = np.asarray(phases) * (2 * np.pi)  # phase in radians

    s = (
        np.asarray([(weights * np.cos(k * phases)).sum() for k in range(1, m + 1)])
    ) ** 2 + (
        np.asarray([(weights * np.sin(k * phases)).sum() for k in range(1, m + 1)])
    ) ** 2

    return ((2.0 / (weights**2).sum()) * np.cumsum(s) - c * np.arange(0, m)).max()


# @vec
[docs]def sf_hm(h, m=20, c=4, logprob=False):
    """Return (analytic, asymptotic) survival function (1-F(h)) for the generalized H-test.

    For more details see:

        - docstrings for hm, hmw
        - M. Kerr dissertation (arXiv:1101.6072)
        - Kerr, ApJ 732, 38, 2011 (arXiv:1103.2128)

    Parameters
    ----------
    logprob : bool, optional

    Returns
    -------
    float
        natural logarithm of probability
    """
    if h < 1e-16:
        return 1.0

    fact = lambda x: gamma(x + 1)

    # first, calculate the integrals of unity for all needed orders
    ints = np.empty(m)
    for i in range(m):
        sv = i - arange(0, i)  # summation vector
        ints[i] = exp(i * log(h + i * c) - log(fact(i)))
        ints[i] -= (ints[:i] * exp(sv * log(sv * c) - log(fact(sv)))).sum()

    # next, develop the integrals in the power series
    alpha = 0.5 * exp(-0.5 * c)
    return (
        -0.5 * h + np.log((alpha ** arange(0, m) * ints).sum())
        if logprob
        else exp(-0.5 * h) * (alpha ** arange(0, m) * ints).sum()
    )


[docs]def h2sig(h):
    """Shortcut function for calculating sigma from the H-test."""
    return sig2sigma(sf_hm(h, logprob=True), logprob=True)


@vec
def sf_h20_dj1989(h):
    """Convert the H-test statistic to a chance probability according to the
    formula of de Jager et al. 1989 -- NB the quadratic term is NOT correct."""
    if h <= 23:
        return 0.9999755 * np.exp(-0.39802 * h)
    return 4e-8 if h > 50 else 1.210597 * np.exp(-0.45901 * h + 0.0022900 * h**2)


[docs]def sf_h20_dj2010(h):
    """Use the approximate asymptotic calibration from de Jager et al. 2010."""
    return np.exp(-0.4 * h)


[docs]def sig2h20(sig):
    """Use approximate (de Jager 2010) relation to invert."""
    return -np.log(sig) / 0.4


[docs]def sf_stackedh(k, h, l=0.398405):
    """Return the chance probability for a stacked H test assuming the
    null df for H is exponentially distributed with scale l and that
    there are k sub-integrations yielding a total TS of h.  See, e.g.
    de Jager & Busching 2010."""

    fact = lambda x: gamma(x + 1)
    c = l * h
    p = sum(c**i / fact(i) for i in range(k))
    return p * np.exp(-c)