Source code for simplemc.analyzers.MCMCAnalyzer



#TODO print the Accepted # for each rank


from simplemc.cosmo.Derivedparam import AllDerived
import scipy.linalg as la
import scipy as sp
import copy
import random
import sys
from numpy import loadtxt


[docs]class MCMCAnalyzer:
    """
    MCMC sampler (Metropolis-Hastings).
    This is the MCMC module. It spits out chains that are compatible with CosmoM
    it calculates cov matrix during burn-in.
    optional temperature makes it sample at a higher temperature but note that
    this guy, as opposed to cosmomc, reweights the weights on the fly.

    Parameters
    -----------
    like : Likelihood object
        Object of a Likelihood class.

    outfile : str
         Output file.

    skip : int
        Burn-in.

    nsamp : int
        Number of mcmc samples.

    temp : float
        Temperature

    cov : numpy.array
        Covariance matrix. Default: None.

    addDerived : bool
        In order to ad derived parameters such as age of the universe and Omega_{Lambda}.

    GRstop : float
        Gelman-Rubin criteria.
    """
    def __init__(self, like, outfile, skip=5000, nsamp=100000, temp=1.0,
                 cov=None, chain_num=None, addDerived=False, GRstop=0.01, checkGR=500):
        try:
            from mpi4py import MPI
            self.comm = MPI.COMM_WORLD
            name = MPI.Get_processor_name()
            self.chain_num = self.comm.rank+1  #chain_num
            print("Hello, World! "
                  "I am process %d of %d on %s" %
                  (self.comm.rank, self.comm.size, name))
        except:
            self.chain_num = 1
            print("Running only 1 chain without MPI.")

        self.like      = like
        self.outfile   = outfile
        self.nsamp     = nsamp
        self.skip      = skip
        self.temp      = float(temp)  # temperature
        self.cpars     = like.freeParameters()
        self.N         = len(self.cpars)
        self.derived   = addDerived
        self.checkgr   = checkGR

        minvals, maxvals = [], []
        for lb, hb in [p.bounds for p in self.cpars]:
            minvals.append(lb)
            maxvals.append(hb)
        self.minvals = sp.array(minvals)
        self.maxvals = sp.array(maxvals)
        print("Bounds:", self.minvals, self.maxvals)

        if (like.name() == "Composite"):
            self.sublikenames = like.compositeNames()
            self.composite = True
        else:
            self.composite = False

        if (cov == None):
            # make initial cov matrix from diagonal "errors"
            errs = [0.01*p.error**2 for p in self.cpars]
            self.init_pcov(sp.diag(errs))
        else:
            self.init_pcov(cov)

        if self.derived: self.AD = AllDerived()
        self.GRcondition = GRstop

        self.RunChain()


    def init_pcov(self, mat):
        self.chol = la.cholesky(mat)


    def RunChain(self):
        self.openFiles()
        self.cloglike, self.cloglikes = self.getLikes()
        # set up logofs based on the first log like which should be
        # the same for all chains. Better than nothing.
        #self.logofs=self.cloglike
        # Actually, above doesn't seem to work very well.
        # Instead, use zero, as our likelihoods never became very large
        self.logofs = 0
        # current weight
        self.cw     = 0
        # current counter
        self.co     = 0
        # mean for burin
        self.swx    = 0
        self.meanx  = sp.zeros(self.N)
        self.meanxx = sp.zeros((self.N, self.N))
        # max loglike
        self.maxloglike = -1e30
        # are we done
        self.done   = False
        #converge
        self.lpars   = []
        #Uses the last percentage of the chain
        self.percen  = 0.4

        self.gr = None

        print("Starting chain...")

        while not (self.done):
            ppars, numout = self.GetProposal()
            self.cw += numout  ## things hitting outside the prior are formally rejected samples
            self.like.updateParams(ppars)
            ploglike, ploglikes = self.getLikes()
            if (sp.isnan(ploglike)):
                print("\nSomething bad has happened, nan in loglike, assuming zero log")
                ploglike = -1e50
            # print cloglike, ploglike, [p.value for p in like.freeParameters()], [p.value for p in self.cpars]
            if (ploglike > self.cloglike):
                accept = True
            else:
                accept = (sp.exp((ploglike-self.cloglike)/self.temp)
                          > random.uniform(0., 1.))

            # print [p.value for p in ppars], accept, ploglike
            # stop
            if (accept):
                self.ProcessAccepted(ppars, ploglike, ploglikes)
                # for i, item in enumerate(ppars):
                #     print("\nPPARS", i, item.value, ploglike, ploglikes)
            else:
                self.cw += 1

            print("Accepted: {:d} | loglike: {:3.4f} | "
                  "Gelman-Rubin: {}".format(self.co, self.cloglike, self.gr), end='\r')
            sys.stdout.flush()
            if (self.co >0 and self.co % self.checkgr == 0):
                try:
                    chains = self.comm.gather(self.lpars, root=0)
                    if self.comm.rank ==0:
                        self.gr = self.GRDRun(chains)
                        #print('Gelman-Rubin R-1:', self.gr)
                        if (sp.all(self.gr < self.GRcondition)):
                            condition = 1
                            self.closeFiles()
                        else:
                            condition = 0
                    else:
                            condition = None
                    recvmsg = self.comm.bcast(condition, root=0)
                    if recvmsg ==1:
                        print('\n---- Gelman-Rubin achived ---- ')
                        self.closeFiles()
                        return True
                except:
                # Without mpi4py installed
                    self.gr = self.GRDRun(self.lpars)
                    if (sp.all(self.gr < self.GRcondition)):
                        print('\n---- Gelman-Rubin achived ---- ')
                        self.closeFiles()
                        return True

[docs]    def GRDRun(self, chains):
        """
        This is a implementation of the Gelman Rubin diagnostic.
        If the number of chains is 1, then this method divides it in two
        and does the diagnostic for convergence.

        Parameters
        ----------
        chains : list
            List with the chains to perform the GR-diagnostic.

        Returns
        -------
        result : float
            Gelman-Rubin diagnostic.

        """
        mean_chain = []
        var_chain  = []

        if len(chains) == 1:
            lchain = len(chains[0])//2
            chains = [chains[0][:lchain], chains[0][lchain:]]
        else:
            clen = [len(chain) for chain in chains]
            if len(set(clen)) == 1:
                lchain = clen[0]
            else:
                #print('take same # steps', clen)
                lchain = min(clen)

        try:
            for chain in chains:
                mean_chain.append(sp.mean(chain[-lchain:], axis=0))
                var_chain.append(sp.var(chain[-lchain:], axis=0))
        except:
            return 1

        M = sp.mean(mean_chain, axis=0)
        W = sp.mean(var_chain,  axis=0)

        B= sum([(b-M)**2 for b in mean_chain])
        B = lchain/(len(chains)- 1.)*B
        R = (1. - 1./lchain)*W +  B/lchain

        result = sp.array(sp.absolute(1- sp.sqrt(R/W)))
        return result



[docs]    def openFiles(self):
        """
        Open the files to save the samples and maxlike.
        Also add the Dervided Parameters if addDerived option is True.

        """
        outfile = self.outfile
        formstr = '%g ' + '%g '*(self.N+1)
        if self.derived:
            formstr += '%g '*(len(self.AD.list))

        if (self.composite):
            formstr += '%g '*(len(self.sublikenames)+1)
        formstr += '\n'

        if (self.chain_num == None):
            self.cfname  = outfile + ".txt"
            mlfname = outfile + ".maxlike"
        else:
            self.cfname  = outfile + "_%i.txt" % (self.chain_num)
            mlfname = outfile + "_%i.maxlike" % (self.chain_num)

        self.fout    = open(self.cfname, 'w')
        self.mlfout  = open(mlfname, 'w')
        self.formstr = formstr


    def closeFiles(self):
        chain = loadtxt(self.cfname)
        self.weights = chain[:, 0]
        self.samples = chain[:, 2:self.N+2]
        self.loglikes = chain[:, 1]
        self.mlfout.close()



[docs]    def getLikes(self):
        """
        Get loglikelihoods values from the used data.
        """
        if (self.composite):
            cloglikes = self.like.compositeLogLikes_wprior()
            cloglike  = cloglikes.sum()
        else:
            cloglikes = []
            cloglike  = self.like.loglike_wprior()
        return cloglike, cloglikes


[docs]    def GetProposal(self):
        """
        Generation of proposal point in mcmc.

        """
        vec = sp.zeros(self.N)
        numreject = 0
        while True:
            ppars = copy.deepcopy(self.cpars)
            step  = self.draw_pcov()
            #print ('step #', [p.value for p in  ppars])
            for i, p in enumerate(ppars):
                p.value += step[i]
                vec[i]   = p.value

            if all(vec > self.minvals) and all(vec < self.maxvals):
                return ppars, numreject
            numreject += 1



    def draw_pcov(self):
        a = sp.array([random.gauss(0., 1,) for _ in range(self.N)])
        return sp.dot(a, self.chol)



    def ProcessAccepted(self, ppars, ploglike, ploglikes):
        self.co += 1
        # if (self.co % 100 == 0): #JAV 1000
        #     print("Accepted samples", self.co, self.cw)
        vec = [p.value for p in self.cpars]

        #for convergence
        self.lpars.append(vec)
        if self.co % 10 ==0:
            del self.lpars[:int((1-self.percen)*10)]


        if (self.co > self.skip):
            # weight rescaled
            wers = self.cw*sp.exp((self.cloglike-self.logofs)
                               * (self.temp-1.0)/self.temp)

            tmp = [wers, -self.cloglike] + vec
            if self.derived:
                tmp += [pd.value for pd in self.AD.listDerived(self.like)]

            if (self.composite):
                outstr = self.formstr % tuple(tmp + self.cloglikes.tolist())
            else:
                outstr = self.formstr % tuple(tmp)
            self.fout.write(outstr)
            # Flush file on regular basis
            if (self.co % 100 == 0): #JAV 1000
                self.fout.flush()

            if (self.cloglike > self.maxloglike):
                self.maxloglike = self.cloglike
                #print("New maxloglike", self.maxloglike)
                self.mlfout.seek(0)
                self.mlfout.write(outstr)
                self.mlfout.flush()
                self.maxloglike

            if self.co > self.nsamp:
                print('Number of steps achieved')
                self.done = True
                self.closeFiles()

        elif (self.co < self.skip):
            self.swx += self.cw
            v = sp.array(vec)
            self.meanx  += v*self.cw
            self.meanxx += sp.outer(v, v)*self.cw
            if (self.cw > 30):
                print("\nStill burning in, weight too large")
                self.chol *= 0.9
                # print(self.cw)
        else:  # co==skip
            self.meanx  /= self.swx
            self.meanxx /= self.swx
            self.meanxx -= sp.outer(self.meanx, self.meanx)
            print("\nRe-initializing covariance matrix after burn-in")
            print(self.meanxx)
            print()
            # for i, p in enumerate(self.cpars):
            #     print("{}: {} +/- {}".format(p.name, p.value, sp.sqrt(self.meanxx[i, i])))

            self.init_pcov(self.meanxx)

        self.cw    = 1
        self.cpars = ppars
        self.cloglike = ploglike
        if self.composite:
            self.cloglikes = ploglikes

    def get_results(self):
        return {'samples': self.samples, 'weights': self.weights, 'loglikes': self.loglikes,
                'gr_diagnostic': self.gr}