Source code for pint.scripts.event_optimize

#!/usr/bin/env python -W ignore::FutureWarning -W ignore::UserWarning -W ignore::DeprecationWarning
import argparse
import sys
import os

import matplotlib.pyplot as plt
import numpy as np
import scipy.optimize as op
from astropy.coordinates import SkyCoord
from scipy.stats import norm, uniform
import pint.logging
from loguru import logger as log

pint.logging.setup(level=pint.logging.script_level)

import pint.fermi_toas as fermi
import pint.models
import pint.plot_utils as plot_utils
import pint.toa as toa
from pint.eventstats import hm, hmw
from pint.fitter import Fitter
from pint.models.priors import Prior
from pint.observatory.satellite_obs import get_satellite_observatory


__all__ = ["read_gaussfitfile", "marginalize_over_phase", "main"]
# log.setLevel('DEBUG')
# np.seterr(all='raise')

# initialization values
# Should probably figure a way to make these not global variables
maxpost = -9e99
numcalls = 0


[docs]class custom_timing( pint.models.spindown.Spindown, pint.models.astrometry.AstrometryEcliptic ): def __init__(self, parfile): super().__init__() self.read_parfile(parfile)
[docs]def read_gaussfitfile(gaussfitfile, proflen): """Read a Gaussian-fit file as created by the output of pygaussfit.py. Parameters ---------- gaussfitfile : str Name of the input file. proflen : int The number of bins to include in the resulting template. Returns ------- np.array A template of length ``proflen``. """ phass = [] ampls = [] fwhms = [] for line in open(gaussfitfile): if line.lstrip().startswith("phas"): phass.append(float(line.split()[2])) if line.lstrip().startswith("ampl"): ampls.append(float(line.split()[2])) if line.lstrip().startswith("fwhm"): fwhms.append(float(line.split()[2])) if not (len(phass) == len(ampls) == len(fwhms)): log.warning( f"Number of phases, amplitudes, and FWHMs are not the same in '{gaussfitfile}'!" ) return 0.0 phass = np.asarray(phass) ampls = np.asarray(ampls) fwhms = np.asarray(fwhms) # Now sort them all according to decreasing amplitude new_order = np.argsort(ampls) new_order = new_order[::-1] ampls = np.take(ampls, new_order) phass = np.take(phass, new_order) fwhms = np.take(fwhms, new_order) # Now put the biggest gaussian at phase = 0.0 phass = phass - phass[0] phass %= 1 template = np.zeros(proflen, dtype="d") for ii in range(len(ampls)): template += ampls[ii] * gaussian_profile(proflen, phass[ii], fwhms[ii]) return template
[docs]def gaussian_profile(N, phase, fwhm): """Return a gaussian pulse profile with 'N' bins and an integrated 'flux' of 1 unit. Parameters ---------- N : int the number of points in the profile phase : float the pulse phase (0-1) fwhm : float the gaussian pulses full width at half-max Note ---- The FWHM of a gaussian is approx 2.35482 sigma. """ sigma = fwhm / 2.35482 mean = phase % 1.0 phsval = np.arange(N, dtype="d") / float(N) if mean < 0.5: phsval = np.where(phsval > (mean + 0.5), phsval - 1.0, phsval) else: phsval = np.where(phsval < (mean - 0.5), phsval + 1.0, phsval) try: zs = (phsval - mean) / sigma okzinds = np.compress(np.fabs(zs) < 20.0, np.arange(N)) okzs = np.take(zs, okzinds) retval = np.zeros(N, "d") np.put( retval, okzinds, np.exp(-0.5 * (okzs) ** 2.0) / (sigma * np.sqrt(2 * np.pi)) ) return retval except OverflowError: log.warning("Problem in gaussian prof: mean = %f sigma = %f" % (mean, sigma)) return np.zeros(N, "d")
[docs]def measure_phase(profile, template, rotate_prof=True): """ measure_phase(profile, template): Call FFTFIT on the profile and template to determine the following parameters: shift,eshift,snr,esnr,b,errb,ngood (returned as a tuple). These are defined as in Taylor's talk at the Royal Society. """ import fftfit c, amp, pha = fftfit.cprof(template) pha1 = pha[0] if rotate_prof: pha = np.fmod(pha - np.arange(1, len(pha) + 1) * pha1, 2.0 * np.pi) shift, eshift, snr, esnr, b, errb, ngood = fftfit.fftfit(profile, amp, pha) return shift, eshift, snr, esnr, b, errb, ngood
[docs]def profile_likelihood(phs, *otherargs): """ A single likelihood calc for matching phases to a template. Likelihood is calculated as per eqn 2 in Pletsch & Clark 2015. """ xvals, phases, template, weights = otherargs phss = phases.astype(np.float64) + phs phss %= 1 probs = np.interp(phss, xvals, template, right=template[0]) if weights is None: return np.log(probs).sum() else: return np.log(weights * probs + 1.0 - weights).sum()
[docs]def neg_prof_like(phs, *otherargs): return -profile_likelihood(phs, *otherargs)
[docs]def marginalize_over_phase( phases, template, weights=None, resolution=1.0 / 1024, minimize=True, fftfit=False, showplot=False, lophs=0.0, hiphs=1.0, ): """Find the best fit pulse profile a pulse profile comprised of combined photon phases. A maximum likelood technique is used. The shift and the max log likehood are returned. You probably want to use "minimize" rathre than "fftfit" unless you are only sampling very close to your known min. """ ltemp = len(template) xtemp = np.arange(ltemp) * 1.0 / ltemp if minimize: phs, like = marginalize_over_phase( phases, template, weights, resolution=1.0 / 64, minimize=False, showplot=showplot, ) phs = 1.0 - phs / ltemp hwidth = 0.03 lophs, hiphs = phs - hwidth, phs + hwidth result = op.minimize( neg_prof_like, [phs], args=(xtemp, phases, template, weights), bounds=[[lophs, hiphs]], ) return ltemp - result["x"] * ltemp, -result["fun"] if fftfit: deltabin = 3 h, x = np.histogram( phases.astype(np.float64), ltemp, range=[0.0, 1.0], weights=weights ) s, es, snr, esnr, b, errb, ngood = measure_phase(h, template, rotate_prof=False) # s is in bins based on the template size lophs = (ltemp - s - deltabin) / float(ltemp) # bins below if lophs < 0.0: lophs += 1.0 hiphs = lophs + 2.0 * deltabin / float(ltemp) # bins above dphss = np.arange(lophs, hiphs, resolution) trials = phases.astype(np.float64) + dphss[:, np.newaxis] # ensure that all the phases are within 0-1 trials[trials > 1.0] -= 1.0 probs = np.interp(trials, xtemp, template, right=template[0]) if weights is None: lnlikes = (np.log(probs)).sum(axis=1) else: lnlikes = (np.log(weights * probs + 1.0 - weights)).sum(axis=1) if showplot: plt.plot(dphss, lnlikes) plt.xlabel("Pulse Phase") plt.ylabel("Log likelihood") plt.show() return ltemp - dphss[lnlikes.argmax()] * ltemp, lnlikes.max()
[docs]def get_fit_keyvals(model, phs=0.0, phserr=0.1): """Read the model to determine fitted keys and their values and errors from the par file""" fitkeys = [p for p in model.params if not getattr(model, p).frozen] fitvals = [] fiterrs = [] for p in fitkeys: fitvals.append(getattr(model, p).value) fiterrs.append(getattr(model, p).uncertainty_value) # The last entry in each of the fit lists is our absolute PHASE term # Hopefully this will become a full PINT model param soon. fitkeys.append("PHASE") fitvals.append(phs) fiterrs.append(phserr) return fitkeys, np.asarray(fitvals), np.asarray(fiterrs)
[docs]def run_sampler_autocorr(sampler, pos, nsteps, burnin, csteps=100, crit1=10): """Runs the sampler and checks for chain convergence. Return the converged sampler and the mean autocorrelation time per 100 steps Parameters ---------- Sampler The Emcee Ensemble Sampler pos The Initial positions of the walkers nsteps : int The number of integration steps csteps : int The interval at which the autocorrelation time is computed. crit1 : int The ratio of chain length to autocorrelation time to satisfy convergence Returns ------- The sampler and the mean autocorrelation times Note ---- The function checks for convergence of the chains every specified number of steps. The criteria to check for convergence is: 1. the chain has to be longer than the specified ratio times the estimated autocorrelation time 2. the change in the estimated autocorrelation time is less than 1% """ autocorr = [] old_tau = np.inf converged1 = False converged2 = False for sample in sampler.sample(pos, iterations=nsteps, progress=True): if not converged1: # Checks if the iteration is past the burnin and checks for convergence at 10% tau change if sampler.iteration >= burnin and sampler.iteration % csteps == 0: tau = sampler.get_autocorr_time(tol=0, quiet=True) if np.any(np.isnan(tau)): continue else: x = np.mean(tau) autocorr.append(x) converged1 = np.all(tau * crit1 < sampler.iteration) converged1 &= np.all(np.abs(old_tau - tau) / tau < 0.1) # log.info("The mean estimated integrated autocorrelation step is: " + str(x)) old_tau = tau if converged1: log.info( "10 % convergence reached with a mean estimated integrated step: " + str(x) ) else: continue else: continue else: if not converged2: # Checks for convergence at every 25 steps instead of 100 and tau change is 1% if sampler.iteration % int(csteps / 4) == 0: tau = sampler.get_autocorr_time(tol=0, quiet=True) if np.any(np.isnan(tau)): continue else: x = np.mean(tau) autocorr.append(x) converged2 = np.all(tau * crit1 < sampler.iteration) converged2 &= np.all(np.abs(old_tau - tau) / tau < 0.01) # log.info("The mean estimated integrated autocorrelation step is: " + str(x)) old_tau = tau converge_step = sampler.iteration else: continue if converged2 and (sampler.iteration - burnin) >= 1000: log.info(f"Convergence reached at {converge_step}") break else: continue return autocorr
[docs]class emcee_fitter(Fitter): def __init__( self, toas=None, model=None, template=None, weights=None, phs=0.5, phserr=0.03 ): # super().__init__(model=model, toas=toas) self.toas = toas self.model = model self.template = template if template is not None: self.ltemp = len(template) self.xtemp = np.arange(self.ltemp) * 1.0 / self.ltemp self.weights = weights self.fitkeys, self.fitvals, self.fiterrs = get_fit_keyvals( self.model, phs, phserr ) self.n_fit_params = len(self.fitvals)
[docs] def get_event_phases(self): """ Return pulse phases based on the current model """ phss = self.model.phase(self.toas).frac return phss.value % 1
[docs] def lnprior(self, theta): """ The log prior evaulated at the parameter values specified """ lnsum = 0.0 for val, key in zip(theta[:-1], self.fitkeys[:-1]): lnsum += getattr(self.model, key).prior_pdf(val, logpdf=True) # Add the phase term return -np.inf if theta[-1] > 1.0 or theta[-1] < 0.0 else lnsum
[docs] def lnposterior(self, theta): """ The log posterior (priors * likelihood) """ global maxpost, numcalls, ftr self.set_params(dict(zip(self.fitkeys[:-1], theta[:-1]))) numcalls += 1 if numcalls % (nwalkers * nsteps / 100) == 0: log.info("~%d%% complete" % (numcalls / (nwalkers * nsteps / 100))) # Evaluate the prior FIRST, then don't even both computing # the posterior if the prior is not finite lnprior = self.lnprior(theta) if not np.isfinite(lnprior): return -np.inf, -np.inf, -np.inf # Call PINT to compute the phases phases = self.get_event_phases() lnlikelihood = profile_likelihood( theta[-1], self.xtemp, phases, self.template, self.weights ) lnpost = lnprior + lnlikelihood if lnpost > maxpost: log.info("New max: %f" % lnpost) for name, val in zip(ftr.fitkeys, theta): log.info(" %8s: %25.15g" % (name, val)) maxpost = lnpost self.maxpost_fitvals = theta return lnpost, lnprior, lnlikelihood
[docs] def minimize_func(self, theta): """ Returns -log(likelihood) so that we can use scipy.optimize.minimize """ # first scale the params based on the errors ntheta = (theta[:-1] * self.fiterrs[:-1]) + self.fitvals[:-1] self.set_params(dict(zip(self.fitkeys[:-1], ntheta))) if not np.isfinite(self.lnprior(ntheta)): return np.inf phases = self.get_event_phases() lnlikelihood = profile_likelihood( theta[-1], self.xtemp, phases, self.template, self.weights ) return -lnlikelihood
[docs] def phaseogram( self, weights=None, bins=100, rotate=0.0, size=5, alpha=0.25, plotfile=None ): """ Make a nice 2-panel phaseogram for the current model """ mjds = self.toas.table["tdbld"].data phss = self.get_event_phases() plot_utils.phaseogram( mjds, phss, weights=self.weights, bins=bins, rotate=rotate, size=size, alpha=alpha, plotfile=plotfile, )
[docs] def prof_vs_weights(self, nbins=50, use_weights=False): """ Show binned profiles (and H-test values) as a function of the minimum weight used. nbins is only for the plots. """ global ftr f, ax = plt.subplots(3, 3, sharex=True) phss = ftr.get_event_phases() htests = [] weights = np.linspace(0.0, 0.95, 20) for ii, minwgt in enumerate(weights): good = ftr.weights > minwgt nphotons = np.sum(good) wgts = ftr.weights[good] if use_weights else None if nphotons <= 0: hval = 0 else: hval = hmw(phss[good], weights=wgts) if use_weights else hm(phss[good]) htests.append(hval) if ii > 0 and ii % 2 == 0 and ii < 20: r, c = ((ii - 2) // 2) // 3, ((ii - 2) // 2) % 3 ax[r][c].hist( phss[good], nbins, range=[0, 1], weights=wgts, color="k", histtype="step", ) ax[r][c].set_title( "%.1f / %.1f / %.0f" % (minwgt, hval, nphotons), fontsize=11 ) if c == 0: ax[r][c].set_ylabel("Htest") if r == 2: ax[r][c].set_xlabel("Phase") f.suptitle( f"{self.model.PSR.value}: Minwgt / H-test / Approx # events", fontweight="bold", ) if use_weights: plt.savefig(f"{ftr.model.PSR.value}_profs_v_wgtcut.png") else: plt.savefig(f"{ftr.model.PSR.value}_profs_v_wgtcut_unweighted.png") plt.close() plt.plot(weights, htests, "k") plt.xlabel("Min Weight") plt.ylabel("H-test") plt.title(self.model.PSR.value) if use_weights: plt.savefig(f"{ftr.model.PSR.value}_htest_v_wgtcut.png") else: plt.savefig(f"{ftr.model.PSR.value}_htest_v_wgtcut_unweighted.png") plt.close()
def plot_priors(self, chains, burnin, bins=100, scale=False): plot_utils.plot_priors( self.model, chains, self.maxpost_fitvals, self.fitvals, burnin=burnin, bins=bins, scale=scale, )
[docs]def main(argv=None): parser = argparse.ArgumentParser( description="PINT tool for MCMC optimization of timing models using event data.", formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) parser.add_argument("eventfile", help="event file to use") parser.add_argument("parfile", help="par file to read model from") parser.add_argument("gaussianfile", help="gaussian file that defines template") parser.add_argument("--ft2", help="Path to FT2 file.", default=None) parser.add_argument( "--weightcol", help="name of weight column (or 'CALC' to have them computed)", default=None, ) parser.add_argument( "--nwalkers", help="Number of MCMC walkers", type=int, default=200 ) parser.add_argument( "--burnin", help="Number of MCMC steps for burn in", type=int, default=100, ) parser.add_argument( "--nsteps", help="Number of MCMC steps to compute", type=int, default=1000, ) parser.add_argument( "--minMJD", help="Earliest MJD to use", type=float, default=54680.0 ) parser.add_argument( "--maxMJD", help="Latest MJD to use", type=float, default=57250.0 ) parser.add_argument( "--phs", help="Starting phase offset [0-1] (def is to measure)", type=float ) parser.add_argument( "--phserr", help="Error on starting phase", type=float, default=0.03 ) parser.add_argument( "--minWeight", help="Minimum weight to include", type=float, default=0.05, ) parser.add_argument( "--wgtexp", help="Raise computed weights to this power (or 0.0 to disable any rescaling of weights)", type=float, default=0.0, ) parser.add_argument( "--testWeights", help="Make plots to evalute weight cuts?", default=False, action="store_true", ) parser.add_argument( "--doOpt", help="Run initial scipy opt before MCMC?", default=False, action="store_true", ) parser.add_argument( "--initerrfact", help="Multiply par file errors by this factor when initializing walker starting values", type=float, default=0.1, ) parser.add_argument( "--priorerrfact", help="Multiple par file errors by this factor when setting gaussian prior widths", type=float, default=10.0, ) parser.add_argument( "--usepickle", help="Read events from pickle file, if available?", default=False, action="store_true", ) parser.add_argument( "--log-level", type=str, choices=pint.logging.levels, default=pint.logging.script_level, help="Logging level", dest="loglevel", ) parser.add_argument( "-v", "--verbosity", default=0, action="count", help="Increase output verbosity" ) parser.add_argument( "-q", "--quiet", default=0, action="count", help="Decrease output verbosity" ) parser.add_argument( "--multicore", default=False, action="store_true", help="Run event optimize on multiple cores", ) parser.add_argument( "--ncores", type=int, default=8, help="The number of cores for parallel processing", ) parser.add_argument( "--backend", help="Save chains to a h5 file", default=False, action="store_true", ) parser.add_argument( "--filepath", type=str, help="File path to save all output files to", ) parser.add_argument( "--basename", type=str, help="Base name for all output files", ) parser.add_argument( "--clobber", help="Overwrite previous output files", default=False, action="store_true", ) parser.add_argument( "--no-autocorr", help="Turn the autocorrelation check function off", default=False, action="store_true", dest="noautocorr", ) args = parser.parse_args(argv) pint.logging.setup( level=pint.logging.get_level(args.loglevel, args.verbosity, args.quiet) ) global nwalkers, nsteps, ftr eventfile = args.eventfile parfile = args.parfile gaussianfile = args.gaussianfile weightcol = args.weightcol if args.ft2 is not None: # Instantiate Fermi observatory once so it gets added to the observatory registry get_satellite_observatory("Fermi", args.ft2) nwalkers = args.nwalkers burnin = args.burnin nsteps = args.nsteps if burnin >= nsteps: log.error("burnin must be < nsteps") sys.exit(1) nbins = 256 # For likelihood calculation based on gaussians file outprof_nbins = 256 # in the text file, for pygaussfit.py, for instance minMJD = args.minMJD maxMJD = args.maxMJD # Usually set by coverage of IERS file minWeight = args.minWeight do_opt_first = args.doOpt wgtexp = args.wgtexp ncores = args.ncores # Read in initial model modelin = pint.models.get_model(parfile) # File name setup and clobber file check filepath = args.filepath or os.getcwd() basename = args.basename or modelin.PSR.value filename = os.path.join(filepath, basename) check_file = os.path.isfile( filename + "_pre.png" ) # Checks to see if the first generated phaseogram file exists if check_file: if args.clobber: log.warning("Clobber flag is on: Preexisting files will be overwritten") else: log.warning( "Clobber flag is not on: Preexisting files will not be overwritten. Change the basename or filepath to avoid overwritting previous results" ) raise Exception # The custom_timing version below is to manually construct the TimingModel # class, which allows it to be pickled. This is needed for parallelizing # the emcee call over a number of threads. So far, it isn't quite working # so it is disabled. The code above constructs the TimingModel class # dynamically, as usual. # modelin = custom_timing(parfile) # Remove the dispersion delay as it is unnecessary # modelin.delay_funcs['L1'].remove(modelin.dispersion_delay) # Set the target coords for automatic weighting if necessary if "ELONG" in modelin.params: tc = SkyCoord( modelin.ELONG.quantity, modelin.ELAT.quantity, frame="barycentrictrueecliptic", ) else: tc = SkyCoord(modelin.RAJ.quantity, modelin.DECJ.quantity, frame="icrs") target = tc if weightcol == "CALC" else None # TODO: make this properly handle long double ts = None if args.usepickle: try: ts = toa.load_pickle(eventfile) except IOError: pass if ts is None: ts = fermi.get_Fermi_TOAs( eventfile, weightcolumn=weightcol, targetcoord=target, minweight=minWeight, minmjd=minMJD, maxmjd=maxMJD, ephem="DE421", planets=False, ) log.info("There are %d events we will use" % len(ts)) ts.filename = eventfile # FIXME: writes to the TOA directory unconditionally try: toa.save_pickle(ts) except IOError: pass if weightcol is not None: if weightcol == "CALC": weights = np.asarray([float(x["weight"]) for x in ts.table["flags"]]) log.info( "Original weights have min / max weights %.3f / %.3f" % (weights.min(), weights.max()) ) # Rescale the weights, if requested (by having wgtexp != 0.0) if wgtexp != 0.0: weights **= wgtexp wmx, wmn = weights.max(), weights.min() # make the highest weight = 1, but keep min weight the same weights = wmn + ((weights - wmn) * (1.0 - wmn) / (wmx - wmn)) for ii, x in enumerate(ts.table["flags"]): x["weight"] = str(weights[ii]) weights = np.asarray([float(x["weight"]) for x in ts.table["flags"]]) log.info( "There are %d events, with min / max weights %.3f / %.3f" % (len(weights), weights.min(), weights.max()) ) else: weights = None log.info("There are %d events, no weights are being used." % ts.ntoas) # Now load in the gaussian template and normalize it gtemplate = read_gaussfitfile(gaussianfile, nbins) gtemplate /= gtemplate.mean() # Set the priors on the parameters in the model, before # instantiating the emcee_fitter # Currently, this adds a gaussian prior on each parameter # with width equal to the par file uncertainty * priorerrfact, # and then puts in some special cases. # *** This should be replaced/supplemented with a way to specify # more general priors on parameters that need certain bounds phs = 0.0 if args.phs is None else args.phs fitkeys, fitvals, fiterrs = get_fit_keyvals(modelin, phs=phs, phserr=args.phserr) for key, v, e in zip(fitkeys[:-1], fitvals[:-1], fiterrs[:-1]): if key == "SINI" or key == "E" or key == "ECC": getattr(modelin, key).prior = Prior(uniform(0.0, 1.0)) elif key == "PX": getattr(modelin, key).prior = Prior(uniform(0.0, 10.0)) elif key.startswith("GLPH"): getattr(modelin, key).prior = Prior(uniform(-0.5, 1.0)) else: getattr(modelin, key).prior = Prior( norm(loc=float(v), scale=float(e * args.priorerrfact)) ) # Now define the requirements for emcee ftr = emcee_fitter(ts, modelin, gtemplate, weights, phs, args.phserr) # Use this if you want to see the effect of setting minWeight if args.testWeights: log.info("Checking H-test vs weights") ftr.prof_vs_weights(use_weights=True) ftr.prof_vs_weights(use_weights=False) sys.exit() # Now compute the photon phases and see if we see a pulse phss = ftr.get_event_phases() maxbin, like_start = marginalize_over_phase( phss, gtemplate, weights=ftr.weights, minimize=True, showplot=False ) log.info("Starting pulse likelihood: %f" % like_start) if args.phs is None: fitvals[-1] = 1.0 - maxbin[0] / float(len(gtemplate)) if fitvals[-1] > 1.0: fitvals[-1] -= 1.0 if fitvals[-1] < 0.0: fitvals[-1] += 1.0 log.info("Starting pulse phase: %f" % fitvals[-1]) else: log.warning( "Measured starting pulse phase is %f, but using %f" % (1.0 - maxbin / float(len(gtemplate)), args.phs) ) fitvals[-1] = args.phs ftr.fitvals[-1] = fitvals[-1] ftr.phaseogram(plotfile=filename + "_pre.png") plt.close() # ftr.phaseogram() # Write out the starting pulse profile vs, xs = np.histogram( ftr.get_event_phases(), outprof_nbins, range=[0, 1], weights=ftr.weights ) f = open(filename + "_prof_pre.txt", "w") for x, v in zip(xs, vs): f.write("%.5f %12.5f\n" % (x, v)) f.close() # Try normal optimization first to see how it goes if do_opt_first: result = op.minimize(ftr.minimize_func, np.zeros_like(ftr.fitvals)) newfitvals = np.asarray(result["x"]) * ftr.fiterrs + ftr.fitvals like_optmin = -result["fun"] log.info("Optimization likelihood: %f" % like_optmin) ftr.set_params(dict(zip(ftr.fitkeys, newfitvals))) ftr.phaseogram() else: like_optmin = -np.inf # Set up the initial conditions for the emcee walkers. Use the # scipy.optimize newfitvals instead if they are better ndim = ftr.n_fit_params if like_start > like_optmin: # Keep the starting deviations small... pos = [ ftr.fitvals + ftr.fiterrs * args.initerrfact * np.random.randn(ndim) for ii in range(nwalkers) ] # Set starting params for param in ["GLPH_1", "GLEP_1", "SINI", "M2", "E", "ECC", "PX", "A1"]: if param in ftr.fitkeys: idx = ftr.fitkeys.index(param) if param == "GLPH_1": svals = np.random.uniform(-0.5, 0.5, nwalkers) elif param == "GLEP_1": svals = np.random.uniform(minMJD + 100, maxMJD - 100, nwalkers) # svals = 55422.0 + np.random.randn(nwalkers) elif param == "SINI": svals = np.random.uniform(0.0, 1.0, nwalkers) elif param == "M2": svals = np.random.uniform(0.1, 0.6, nwalkers) elif param in ["E", "ECC", "PX", "A1"]: # Ensure all positive svals = np.fabs( ftr.fitvals[idx] + ftr.fiterrs[idx] * np.random.randn(nwalkers) ) if param in ["E", "ECC"]: svals[svals > 1.0] = 1.0 - (svals[svals > 1.0] - 1.0) for ii in range(nwalkers): pos[ii][idx] = svals[ii] else: pos = [ newfitvals + ftr.fiterrs * args.initerrfact * np.random.randn(ndim) for i in range(nwalkers) ] # Set the 0th walker to have the initial pre-fit solution # This way, one walker should always be in a good position pos[0] = ftr.fitvals import emcee # Setting up a backend to save the chains into an h5 file if args.backend: try: backend = emcee.backends.HDFBackend(filename + "_chains.h5") backend.reset(nwalkers, ndim) except ImportError: log.warning("h5py package not installed. Backend set to None") backend = None else: backend = None dtype = [("lnprior", float), ("lnlikelihood", float)] # Following are for parallel processing tests... if args.multicore: try: import pathos.multiprocessing as mp def unwrapped_lnpost(theta): return ftr.lnposterior(theta) with mp.ProcessPool(nodes=ncores) as pool: sampler = emcee.EnsembleSampler( nwalkers, ndim, unwrapped_lnpost, blobs_dtype=dtype, pool=pool, backend=backend, ) if args.noautocorr: sampler.run_mcmc(pos, nsteps, progress=True) else: autocorr = run_sampler_autocorr(sampler, pos, nsteps, burnin) pool.close() pool.join() except ImportError: log.info("Pathos module not available, using single core") sampler = emcee.EnsembleSampler( nwalkers, ndim, ftr.lnposterior, blobs_dtype=dtype, backend=backend ) if args.noautocorr: sampler.run_mcmc(pos, nsteps, progress=True) else: autocorr = run_sampler_autocorr(sampler, pos, nsteps, burnin) else: sampler = emcee.EnsembleSampler( nwalkers, ndim, ftr.lnposterior, blobs_dtype=dtype, backend=backend ) if args.noautocorr: sampler.run_mcmc(pos, nsteps, progress=True) else: autocorr = run_sampler_autocorr(sampler, pos, nsteps, burnin) def chains_to_dict(names, sampler): samples = np.transpose(sampler.get_chain(), (1, 0, 2)) chains = [samples[:, :, ii].T for ii in range(len(names))] return dict(zip(names, chains)) def plot_chains(chain_dict, file=False): npts = len(chain_dict) fig, axes = plt.subplots(npts, 1, sharex=True, figsize=(8, 9)) for ii, name in enumerate(chain_dict.keys()): axes[ii].plot(chain_dict[name], color="k", alpha=0.3) axes[ii].set_ylabel(name) axes[npts - 1].set_xlabel("Step Number") fig.tight_layout() if file: fig.savefig(file) plt.close() else: plt.show() plt.close() chains = chains_to_dict(ftr.fitkeys, sampler) plot_chains(chains, file=filename + "_chains.png") # Make the triangle plot. samples = np.transpose(sampler.get_chain(discard=burnin), (1, 0, 2)).reshape( (-1, ndim) ) blobs = sampler.get_blobs() lnprior_samps = blobs["lnprior"] lnlikelihood_samps = blobs["lnlikelihood"] lnpost_samps = lnprior_samps + lnlikelihood_samps ind = np.unravel_index( np.argmax(lnpost_samps[:][burnin:]), lnpost_samps[:][burnin:].shape ) ftr.maxpost_fitvals = [chains[ii][burnin:][ind] for ii in ftr.fitkeys] try: import corner fig = corner.corner( samples, labels=ftr.fitkeys, bins=50, truths=ftr.maxpost_fitvals, plot_contours=True, ) fig.savefig(filename + "_triangle.png") plt.close() except ImportError: pass # Plot the scaled prior probability alongside the initial gaussian probability distribution and the histogrammed samples ftr.plot_priors(chains, burnin, scale=True) plt.savefig(filename + "_priors.png") plt.close() # Make a phaseogram with the 50th percentile values # ftr.set_params(dict(zip(ftr.fitkeys, np.percentile(samples, 50, axis=0)))) # Make a phaseogram with the best MCMC result ftr.set_params(dict(zip(ftr.fitkeys[:-1], ftr.maxpost_fitvals[:-1]))) ftr.phaseogram(plotfile=filename + "_post.png") plt.close() # Write out the output pulse profile vs, xs = np.histogram( ftr.get_event_phases(), outprof_nbins, range=[0, 1], weights=ftr.weights ) f = open(filename + "_prof_post.txt", "w") for x, v in zip(xs, vs): f.write("%.5f %12.5f\n" % (x, v)) f.close() # Write out the par file for the best MCMC parameter est centered_samples = [ samples[:, i] - ftr.maxpost_fitvals[i] for i in range(len(ftr.fitkeys)) ] errors = [ np.percentile(np.abs(centered_samples[i]), 68) for i in range(len(ftr.fitkeys)) ] ftr.set_param_uncertainties(dict(zip(ftr.fitkeys[:-1], errors[:-1]))) f = open(filename + "_post.par", "w") f.write(ftr.model.as_parfile()) f.close() # Print the best MCMC values and ranges ranges = map( lambda v: (v[1], v[2] - v[1], v[1] - v[0]), zip(*np.percentile(samples, [16, 50, 84], axis=0)), ) log.info("Post-MCMC values (50th percentile +/- (16th/84th percentile):") for name, vals in zip(ftr.fitkeys, ranges): log.info("%8s:" % name + "%25.15g (+ %12.5g / - %12.5g)" % vals) # Put the same stuff in a file f = open(filename + "_results.txt", "w") f.write("Post-MCMC values (50th percentile +/- (16th/84th percentile):\n") for name, vals in zip(ftr.fitkeys, ranges): f.write("%8s:" % name + " %25.15g (+ %12.5g / - %12.5g)\n" % vals) f.write("\nMaximum likelihood par file:\n") f.write(ftr.model.as_parfile()) f.close() import pickle pickle.dump(samples, open(filename + "_samples.pickle", "wb"))