"""Various routines for calculating pulsation test statistics on event data and helper functions.
author: M. Kerr <matthew.kerr@gmail.com>
"""
import numpy as np
from numpy import exp, arange, log
from scipy.special import erfc, gamma
from scipy.stats import chi2, norm
__all__ = [
"sig2sigma",
"sigma2sig",
"sigma_trials",
"z2m",
"z2mw",
"cosm",
"sf_z2m",
"best_m",
"em_four",
"em_lc",
"hm",
"hmw",
"sf_hm",
"h2sig",
"sf_h20_dj1989",
"sf_h20_dj2010",
"sig2h20",
"sf_stackedh",
]
TWOPI = np.pi * 2
[docs]def vec(func):
from numpy import vectorize
return vectorize(func, doc=func.__doc__)
[docs]def to_array(x, dtype=None):
x = np.asarray(x, dtype=dtype)
return np.asarray([x]) if len(x.shape) == 0 else x
[docs]def from_array(x):
return x[0] if (len(x.shape) == 1) and (x.shape[0] == 1) else x
[docs]def sig2sigma(sig, two_tailed=True, logprob=False):
"""Convert tail probability to "sigma" units.
Find the value of the argument for the normal distribution beyond which the
integrated tail probability is sig. Note that the default is to interpret
this number as the two-tailed value, as this is the quantity that goes to 0
when sig goes to 1 and is also what most people mean by "sigma", i.e. a
chance probability of 0.32 will return a value of 1 sigma.
Two-tailed:
\int_{-sigma}^{+sigma} norm(x) dx = sig
One-tailed:
\int_{-inf}^{sigma} norm(x) dx = sig
Note that if sig >= 0.5, the one-tailed result will be negative.
Parameters
----------
sig
the chance probability
two_tailed : bool, optional
interpret sig as two-tailed or one-tailed probability in converting
logprob : bool, optional
if True, the argument is the natural logarithm of the probability
"""
sig = to_array(sig, dtype=float)
if logprob:
logsig = sig.copy()
sig = np.exp(sig)
results = np.empty_like(sig)
if logprob:
if np.any(logsig > 0):
raise ValueError("Probability must be between 0 and 1.")
elif np.any((sig > 1) | (sig <= 0)):
raise ValueError("Probability must be between 0 and 1.")
if not two_tailed:
sig *= 2
if logprob:
logsig += np.log(2)
# Use the exact value for "large" probabilities; for those where numerical
# precision is a problem, use the asymptotic expansion.
mask = sig > 1e-300
results[mask] = norm.isf(sig[mask] * 0.5)
mask = ~mask
if logprob:
x0 = (-2 * (logsig[mask] + np.log(np.pi**0.5))) ** 0.5
else:
x0 = (-2 * np.log(sig[mask] * (np.pi) ** 0.5)) ** 0.5
results[mask] = x0 - np.log(x0) / (1 + 2 * x0)
return from_array(results)
[docs]def sigma2sig(sigma, two_tailed=True):
"""Convert "sigma" units to chance probability, i.e., return the integral
of the normal distribution from sigma to infinity, or twice that
quantity if two_tailed.
args
----
sigma argument for the normal distribution
kwargs
------
two_tailed [True] if True, return twice the integral, else once
"""
# this appears to handle up to machine precision with no problem
return erfc(sigma / 2**0.5) if two_tailed else 1 - 0.5 * erfc(-sigma / 2**0.5)
[docs]def sigma_trials(sigma, trials):
# correct a sigmal value for a trials factor
# use an asymptotic expansion -- this needs to be checked!
if sigma >= 20:
return (sigma**2 - 2 * np.log(trials)) ** 0.5
p = sigma2sig(sigma) * trials
return 0 if p >= 1 else sig2sigma(p)
[docs]def z2m(phases, m=2):
"""Return the Z^2_m test for each harmonic up to the specified m.
See de Jager et al. 1989 for definition.
"""
phases = np.asarray(phases) * TWOPI # phase in radians
n = len(phases)
if (
n < 5e3
): # faster for 100s to 1000s of phases, but requires ~20x memory of alternative
s = (np.cos(np.outer(np.arange(1, m + 1), phases))).sum(axis=1) ** 2 + (
np.sin(np.outer(np.arange(1, m + 1), phases))
).sum(axis=1) ** 2
else:
s = (np.asarray([(np.cos(k * phases)).sum() for k in range(1, m + 1)])) ** 2 + (
np.asarray([(np.sin(k * phases)).sum() for k in range(1, m + 1)])
) ** 2
return (2.0 / n) * np.cumsum(s)
[docs]def z2mw(phases, weights, m=2):
"""Return the Z^2_m test for each harmonic up to the specified m.
The user provides a list of weights. In the case that they are
well-distributed or assumed to be fixed, the CLT applies and the
statistic remains calibrated. Nice!
"""
phases = np.asarray(phases) * (2 * np.pi) # phase in radians
s = (
np.asarray([(np.cos(k * phases) * weights).sum() for k in range(1, m + 1)])
) ** 2 + (
np.asarray([(np.sin(k * phases) * weights).sum() for k in range(1, m + 1)])
) ** 2
return np.cumsum(s) * (2.0 / (weights**2).sum())
[docs]def cosm(phases, m=2):
"""Return the cosine test for each harmonic up to the specified m.
See de Jager et al. 1994 for definition
"""
phases = np.asarray(phases) * TWOPI # phase in radians
n = len(phases)
if n < 5e3:
s = (np.cos(np.outer(np.arange(1, m + 1), phases))).sum(axis=1)
else:
s = np.asarray([np.cos(k * phases).sum() for k in range(1, m + 1)])
return (2.0 / n) * np.cumsum(s)
[docs]def sf_z2m(ts, m=2):
"""Return the survival function (chance probability) according to the
asymptotic calibration for the Z^2_m test.
args
----
ts result of the Z^2_m test
"""
return chi2.sf(ts, 2 * m)
[docs]def best_m(phases, weights=None, m=100):
z = z2mw(phases, np.ones_like(phases) if weights is None else weights, m=m)
return np.arange(1, m + 1)[np.argmax(z - 4 * np.arange(0, m))]
[docs]def em_four(phases, m=2, weights=None):
"""Return the empirical Fourier coefficients up to the mth harmonic.
These are derived from the empirical trignometric moments."""
phases = np.asarray(phases) * TWOPI # phase in radians
n = len(phases) if weights is None else weights.sum()
weights = 1.0 if weights is None else weights
aks = (1.0 / n) * np.asarray(
[(weights * np.cos(k * phases)).sum() for k in range(1, m + 1)]
)
bks = (1.0 / n) * np.asarray(
[(weights * np.sin(k * phases)).sum() for k in range(1, m + 1)]
)
return aks, bks
[docs]def em_lc(coeffs, dom):
"""Evaluate the light curve at the provided phases (0 to 1) for the
provided coeffs, e.g., as estimated by em_four."""
dom = np.asarray(dom) * (2 * np.pi)
aks, bks = coeffs
rval = np.ones_like(dom)
for i in range(1, len(aks) + 1):
rval += 2 * (aks[i - 1] * np.cos(i * dom) + bks[i - 1] * np.sin(i * dom))
return rval
[docs]def hm(phases, m=20, c=4):
"""Calculate the H statistic (de Jager et al. 1989) for given phases.
H_m = max(Z^2_k - c*(k-1)), 1 <= k <= m
m == maximum search harmonic
c == offset for each successive harmonic
"""
phases = np.asarray(phases) * (2 * np.pi) # phase in radians
s = (np.asarray([(np.cos(k * phases)).sum() for k in range(1, m + 1)])) ** 2 + (
np.asarray([(np.sin(k * phases)).sum() for k in range(1, m + 1)])
) ** 2
return ((2.0 / len(phases)) * np.cumsum(s) - c * np.arange(0, m)).max()
[docs]def hmw(phases, weights, m=20, c=4):
"""Calculate the H statistic (de Jager et al. 1989) and weight each
sine/cosine with the weights in the argument. The distribution
is corrected such that the CLT still applies, i.e., it maintains
the same calibration as the unweighted version."""
phases = np.asarray(phases) * (2 * np.pi) # phase in radians
s = (
np.asarray([(weights * np.cos(k * phases)).sum() for k in range(1, m + 1)])
) ** 2 + (
np.asarray([(weights * np.sin(k * phases)).sum() for k in range(1, m + 1)])
) ** 2
return ((2.0 / (weights**2).sum()) * np.cumsum(s) - c * np.arange(0, m)).max()
# @vec
[docs]def sf_hm(h, m=20, c=4, logprob=False):
"""Return (analytic, asymptotic) survival function (1-F(h)) for the generalized H-test.
For more details see:
- docstrings for hm, hmw
- M. Kerr dissertation (arXiv:1101.6072)
- Kerr, ApJ 732, 38, 2011 (arXiv:1103.2128)
Parameters
----------
logprob : bool, optional
Returns
-------
float
natural logarithm of probability
"""
if h < 1e-16:
return 1.0
fact = lambda x: gamma(x + 1)
# first, calculate the integrals of unity for all needed orders
ints = np.empty(m)
for i in range(m):
sv = i - arange(0, i) # summation vector
ints[i] = exp(i * log(h + i * c) - log(fact(i)))
ints[i] -= (ints[:i] * exp(sv * log(sv * c) - log(fact(sv)))).sum()
# next, develop the integrals in the power series
alpha = 0.5 * exp(-0.5 * c)
return (
-0.5 * h + np.log((alpha ** arange(0, m) * ints).sum())
if logprob
else exp(-0.5 * h) * (alpha ** arange(0, m) * ints).sum()
)
[docs]def h2sig(h):
"""Shortcut function for calculating sigma from the H-test."""
return sig2sigma(sf_hm(h, logprob=True), logprob=True)
@vec
def sf_h20_dj1989(h):
"""Convert the H-test statistic to a chance probability according to the
formula of de Jager et al. 1989 -- NB the quadratic term is NOT correct."""
if h <= 23:
return 0.9999755 * np.exp(-0.39802 * h)
return 4e-8 if h > 50 else 1.210597 * np.exp(-0.45901 * h + 0.0022900 * h**2)
[docs]def sf_h20_dj2010(h):
"""Use the approximate asymptotic calibration from de Jager et al. 2010."""
return np.exp(-0.4 * h)
[docs]def sig2h20(sig):
"""Use approximate (de Jager 2010) relation to invert."""
return -np.log(sig) / 0.4
[docs]def sf_stackedh(k, h, l=0.398405):
"""Return the chance probability for a stacked H test assuming the
null df for H is exponentially distributed with scale l and that
there are k sub-integrations yielding a total TS of h. See, e.g.
de Jager & Busching 2010."""
fact = lambda x: gamma(x + 1)
c = l * h
p = sum(c**i / fact(i) for i in range(k))
return p * np.exp(-c)