#include <fstream>
#include <iostream>
#include <string>
#include <cmath>
using namespace std;

#include "PosteriorR4s.h"
#include "McRateUtils.h"
using namespace McRateUtils;


#include "tree.h"
#include "treeInterface.h"
#include "sequenceContainer1G.h"
#include "likelihoodComputation.h"
#include "errorMsg.h"
#include "gammaDistribution.h"
#include "datMatrixHolder.h"
#include "talRandom.h"
#include "someUtil.h"
#include "bestAlpha.h"

using namespace std;


#define ZERO_DIST 0.000001 //@@@ so that won't devided by zero
#define MIN_PROB 0.05
#define MAX_RATE 40.0


//////////////////////////////////////////////////////////////////////
// Construction/Destruction
//////////////////////////////////////////////////////////////////////


PosteriorR4s::PosteriorR4s(const tree& inTree, const alphabet* pAlph, 
						   const sequenceContainer1G & seqContainer, const stochasticProcess* pSp)
:m_tree(inTree), m_seqContainer(seqContainer), m_pAlph(pAlph) 
{
	m_pSp = pSp;
	m_pPriorDist = NULL;
}



PosteriorR4s::~PosteriorR4s()
{
	if (m_pPriorDist != NULL)
	{
		delete m_pPriorDist;
		m_pPriorDist = NULL;
	}
}


Vdouble PosteriorR4s::findRates(bool bScaleRates, int categories, bool bAllStat/*false*/)
{
	MDOUBLE alpha = 0;

	//find alpha
	gammaDistribution gammaDist(0.01, categories);
	stochasticProcess sp(&gammaDist, m_pSp->getPijAccelerator());
	cerr << "finding alpha..." << endl;
	bestAlphaFixedTree bestAlpha(m_tree, m_seqContainer, sp, NULL, 15.0);
	alpha = bestAlpha.getBestAlpha();
	cerr << "alpha is: " << alpha;
	return findRates(bScaleRates, alpha, categories, bAllStat);
}



Vdouble PosteriorR4s::findRates(bool bScaleRates, MDOUBLE alpha, const int categories, bool bAllStat)
{
	bAllStat = true;
	if (m_pPriorDist != NULL)
	{
		delete m_pPriorDist;
		m_pPriorDist = NULL;
	}

	assert ((alpha != 0) && (categories != 0));
	m_pPriorDist =  new gammaDistribution(alpha, categories);

	computeRate4site(categories, bAllStat);
	MDOUBLE scaleFactor = 0.0;
	if (bScaleRates == true)
	{
		 scaleFactor = McRateUtils::scaleVec(m_expRates, 1.0);
		//scale also std's
		for (int i=0; i<m_stdVec.size();++i)
		{
			m_stdVec[i] *= scaleFactor;
		}
	}

	if (bAllStat == true)
		calcStat(categories, scaleFactor);

	return m_expRates;
}



/////////////
//computeRate4site: calculates the posterior rates:
//the rate for each position is the expectation of that rate over the range [0.0, maxRate]
//for each position: 
//E(R|Data) = sigma{ (P(Data|Ri)*P(Ri)*Ri) / sigma{(P(Data|Ri)*P(Ri))}} = 
//1/sigma{(P(Data|Ri)*P(Ri))} * sigma{ (P(Data|Ri)*P(Ri)*Ri) }
//if bFullRange = false: the Ri's are the mean of the priorDistribution categories
//if bFullRange = true: go over the whole range using rate_interval steps
//if bAllStat = true: calculate all statistics for each position: MAP, 25 + 75 percentiles
void PosteriorR4s::computeRate4site(const int numOfCategories, bool bAllStat)
{
	if (m_pPriorDist == NULL)
		errorMsg::reportError("no prior distribution in PosteriorR4s::computeRate4site");

	clearDS(bAllStat);

	if (bAllStat == true)
	{
		m_rateDist.resize(m_seqContainer.seqLen());
		for (int z=0; z< m_rateDist.size();++z)
			m_rateDist[z].resize(m_pPriorDist->categories(), 0.0);
	}

	//calc using only the categories of the prior
	for (int pos = 0 ; pos<m_seqContainer.seqLen(); ++pos)
	{
		Vdouble L_vec(m_pPriorDist->categories());
		MDOUBLE totalProb = 1.0, exp, exp_xSqr; 
		computeLVec(pos, 1, m_pPriorDist->categories(), m_pPriorDist, L_vec); 

		//calc probability of each Ri
		MDOUBLE rate, prob_Ri, prior_Ri;
		totalProb = exp = exp_xSqr = 0.0; 
		for (int ri = 0; ri<m_pPriorDist->categories(); ++ri)
		{
			rate = m_pPriorDist->rates(ri);
			if (rate == 0.0)
				rate = ZERO_DIST;
			
			prior_Ri = m_pPriorDist->ratesProb(ri);
			prob_Ri = L_vec[ri] * prior_Ri;

			if (bAllStat == true)
				m_rateDist[pos][ri] = prob_Ri;

			totalProb += prob_Ri;
			exp += prob_Ri * rate;
			exp_xSqr += prob_Ri * pow(rate, 2);
		}
		
		exp /= totalProb;
		exp_xSqr /= totalProb;
		MDOUBLE std = sqrt(exp_xSqr - pow(exp, 2));

		if (bAllStat == true)
		{
			for (int rc = 0; rc<m_pPriorDist->categories(); ++rc)
			{
				m_rateDist[pos][rc] /= totalProb;
			}
		}
		
		m_expRates[pos] = exp;
		m_stdVec[pos] = std;
		if ((pos % 100) == 0)
			cerr<<"bayesian position "<<pos<<endl; 
		else
			cerr<<".";
	}
}




void PosteriorR4s::computeLVec(const int pos, const int fromCategory, const int toCategory,  const distribution* pDist, Vdouble& LVec)
{
	if (toCategory > pDist->categories() )
		errorMsg::reportError(" tocategory exceed possible number in distribution", 1);

	if (toCategory > LVec.size())
		LVec.resize(toCategory);

	MDOUBLE rate;
	for (int ri = fromCategory-1; ri<toCategory; ++ri)
	{
		rate = pDist->rates(ri);
		if (rate == 0.0)
			rate = ZERO_DIST;
		LVec[ri] = likelihoodComputation::getLofPos(pos, m_tree, m_seqContainer, *m_pSp, rate);
	}
}




void PosteriorR4s::printRates(ofstream& outFile)
{
	printTime(outFile);
	outFile<<"# rates were created with PosteriorR4s"<<endl;

	string prior;
	outFile<<"# prior distribution is GAMMA"<<endl;
	outFile<<"# alpha is: "<<((gammaDistribution*) (m_pPriorDist))->getAlpha() <<endl;

	outFile<<endl;
	outFile<<"#POS"<<"\t"<<"SEQ"<<"\t"<<"SCORE"<<"\t"<<"MAX"<<"\t"<<"QQ-INTERVAL"<<"\t"<<"std"<<endl;
	const sequence* pSeq = &(m_seqContainer[0]);
	
	for (int i=0; i < m_expRates.size(); ++i) 
	{
		outFile<<i+1;
		outFile<< "\t"<< pSeq->getAlphabet()->fromInt((*pSeq)[i]);
		outFile<< "\t"<< m_expRates[i];
		outFile<< "\t"<< m_mapRates[i];
		outFile<< "\t"<< m_25Vec[i];
		outFile<< "-"<< m_75Vec[i];
		outFile<< "\t"<< m_stdVec[i];
		outFile<<endl;
	}

	MDOUBLE ave = computeAverage(m_expRates);
	MDOUBLE std = computeStd(m_expRates);
	if (((ave<1e-9)) && (ave>(-(1e-9)))) 
		ave=0;
	if ((std>(1-(1e-9))) && (std< (1.0+(1e-9)))) 
		std=1.0;
	outFile<<"# Average = "<<ave<<endl;
	outFile<<"# Standard Deviation = "<<std<<endl;
}






MDOUBLE PosteriorR4s::getAlpha()
{
	return ((gammaDistribution*)m_pPriorDist)->getAlpha();
}

void PosteriorR4s::clearDS(bool bAllStat)
{
	m_expRates.clear();
	m_expRates.resize(m_seqContainer.seqLen());

	m_stdVec.clear();
	m_stdVec.resize(m_seqContainer.seqLen());

	if (bAllStat == true)
	{
		m_rateDist.clear();
	}
}


void PosteriorR4s::calcStat(const int numCategories, MDOUBLE scaleFactor)
{
	if (m_rateDist.empty())
		errorMsg::reportError("rate distribution is empty in function PosteriorR4s::calcStat()");

	m_25Vec.resize(m_rateDist.size());
	m_75Vec.resize(m_rateDist.size());
	m_mapRates.resize(m_rateDist.size());

	for (int pos = 0; pos < m_rateDist.size(); ++pos)
	{
		MDOUBLE totalProb = 0.0;
		MDOUBLE maxProb = 0.0;
		int maxRate = -1;
		MDOUBLE probRi, prevRate, rate = 0.0;
		bool p25 = false;
		bool p75 = false;
		for (int ri = 0; ri < m_rateDist[pos].size(); ++ri)
		{
			prevRate = rate;
			probRi = m_rateDist[pos][ri];
			totalProb += probRi;
			rate = m_pPriorDist->rates(ri);
			
			if (maxProb < probRi)
			{
				maxProb = probRi;
				m_mapRates[pos] = rate;
			}

			if (p25 == false)
			{
				if (totalProb > 0.25)
				{
					m_25Vec[pos] = prevRate;
					p25 = true;
				}
			}

			if (p75 == false)
			{
				if (totalProb > 0.75)
				{
					m_75Vec[pos] = rate;
					p75 = true;
				}
			}
		}

		if (scaleFactor != 0.0)
		{
			m_mapRates[pos] *= scaleFactor;
			m_25Vec[pos] *= scaleFactor;
			m_75Vec[pos] *= scaleFactor;
		}
	}
}
