#include "LGT3State.h"
#include "threeStateModel.h"
#include "likelihoodComputation.h"
#include "stochasticProcess.h"
#include "pijAccelerator.h"
#include "tree.h"
#include "someUtil.h"
#include "treeUtil.h"
#include "threeStateAlphabet.h"
#include "recognizeFormat.h"
#include <fstream>
#include <string>
using namespace std;

void removeOrganisms();
void sumBranchLengths ();
void loopLRT();
void buildScFromTree(tree &tr,sequenceContainer &origSc, map<string, int> &name2idSc, sequenceContainer &newSc);
//void testDown();



void removeOrganisms(){
	string treeFile = "D:\\My Documents\\projects\\Thy\\species trees\\yang_et-al\\Life_u_1_nj.tree.RECODED.PARSED.ph";
	string newTreeFile = "D:\\My Documents\\projects\\Thy\\species trees\\yang_et-al\\Life_u_1_nj.tree.RECODED.PARSED.REMOVEDph";
	string missingListFile = "D:\\My Documents\\projects\\Thy\\species trees\\yang_et-al\\missinglist.CLEAN.txt";
	tree my_tr(treeFile);
	ifstream m_in(missingListFile.c_str());
	vector<string> names;
	putFileIntoVectorStringArray(m_in,names);

	for (int i=0; i<names.size();i++){
		tree::nodeP thisNode = my_tr.findNodeByName(names[i]);
		if (thisNode) {
			my_tr.removeLeaf(thisNode);
		}
		else 
			cerr<<"cannot find"<<names[i]<<endl;
	}
	ofstream o(newTreeFile.c_str());
	my_tr.output(o);

}

void sumBranchLengths (){
	string treeFile = "D:\\My Documents\\projects\\Thy\\species trees\\reconstruction\\ciccarelli\\Ciccarelli.parsed.wBL.noBP.removed.ph";
	tree tr(treeFile);
	tree::nodeP myroot = tr.findNodeByName("N12"); //returns NULL if not found
	tr.rootAt(myroot);
	vector<tree::nodeP> myClade;
	tree::nodeP myDomainRoot = tr.findNodeByName("N11");  // bacteria or archaea
	tr.getFromNodeToLeaves(myClade,myDomainRoot);
	MDOUBLE sumBLlengths=0;
	for (int i=0;i < myClade.size();++i) {
		cout<<myClade[i]->name()<<endl;
		if (myClade[i] == myDomainRoot) continue;
		sumBLlengths+=myClade[i]->dis2father();
	}
	cout<<"total branch lengths are:"<<sumBLlengths<<endl;

}

// loop over all branches and calculate LRT between optimization given parameters of all tree and
// optimization of trees separately
void loopLRT() {
	string treeFile = "D:\\My Documents\\projects\\Thy\\species trees\\reconstruction\\ciccarelli\\submission\\Ciccarelli.parsed.wBL.noBP.removed.ph";
	string seqsFile = "D:\\My Documents\\projects\\Thy\\species trees\\reconstruction\\ciccarelli\\submission\\Ciccarelli.parsed.Seqs.txt";
	int minNodes = 5;
	
	threeStateAlphabet alph;
	ifstream in(seqsFile.c_str());
	sequenceContainer sc = recognizeFormat::read(in,&alph);


	map<string, int> name2idSc;
	for (int j = 0; j<sc.numberOfSeqs(); ++j){
		name2idSc[sc[j].name()] = sc[j].id();
	}

	tree fullTree(treeFile.c_str());
	

	// run on full dataset:
	string root = "N12";
	fullTree.rootAt(fullTree.findNodeByName(root)); 
//	cout<<"RUNNING FULL TREE"<<endl;
//	assess3stateLGT arFull(fullTree,sc,&root);

	// get optimized params for fixing later
	MDOUBLE mu1 = 0.366898;//arFull.getMu1();
	MDOUBLE mu2 = 1.25557;//arFull.getMu2();
	MDOUBLE mu3 = 3.41183;//arFull.getMu3();
	MDOUBLE mu4 = 4.89679;//arFull.getMu4();
assess3stateLGT arFull(fullTree,sc,false,&root,&mu1,&mu2,&mu3,&mu4);
exit(0);

	vector<tree::nodeP> nodesVec;
	fullTree.getAllNodes(nodesVec, fullTree.getRoot());
	map<string, MDOUBLE> node2LLDiff;
	for (int n = 0; n < nodesVec.size(); ++n)
	{

		tree tree1, tree2; 
		tree::nodeP node = nodesVec[n];
		if (node->isRoot())
			continue;
		string nodeName = node->name();
		cout<<"RUNNING NODE  "<<nodeName<<endl;
		cutTreeToTwo(fullTree, nodeName, tree1, tree2);
		
		if ((tree1.getLeavesNum() < minNodes) || (tree2.getLeavesNum() < minNodes))
			continue;
		//build sequence container
		sequenceContainer scTree1;
		buildScFromTree(tree1,sc,name2idSc,scTree1);
		sequenceContainer scTree2;
		buildScFromTree(tree2,sc,name2idSc,scTree2);

		string resFile = "subTree1."+nodeName+".opt.res";
		string logFile = "subTree1."+nodeName+".opt.log";
		assess3stateLGT arTree1(tree1,scTree1,false,NULL,NULL,NULL,NULL,NULL,&resFile,&logFile);
		MDOUBLE tree1OptLikelihood = arTree1.getLikelihood();

		resFile = "subTree2."+nodeName+".opt.res";
		logFile = "subTree2."+nodeName+".opt.log";
		assess3stateLGT arTree2(tree2,scTree2,false,NULL,NULL,NULL,NULL,NULL,&resFile,&logFile);
		MDOUBLE tree2OptLikelihood = arTree2.getLikelihood();
		MDOUBLE sumOpt = tree1OptLikelihood + tree2OptLikelihood;

		resFile = "subTree1."+nodeName+".fixed.res";
		logFile = "subTree1."+nodeName+".fixed.log";
		assess3stateLGT arTree1Fixed(tree1,scTree1,false,NULL,&mu1,&mu2,&mu3,&mu4,&resFile,&logFile);
		MDOUBLE tree1FixedLikelihood = arTree1Fixed.getLikelihood();

		resFile = "subTree2."+nodeName+".fixed.res";
		logFile = "subTree2."+nodeName+".fixed.log";
		assess3stateLGT arTree2Fixed(tree2,scTree2,false,NULL,&mu1,&mu2,&mu3,&mu4,&resFile,&logFile);
		MDOUBLE tree2FixedLikelihood = arTree2Fixed.getLikelihood();
		MDOUBLE sumFixed = tree1FixedLikelihood + tree2FixedLikelihood;
		
		node2LLDiff[nodeName] = sumOpt - sumFixed; 
	}

	map<string,MDOUBLE>::iterator iter;   
	string str_node2LLDiff = "node2LLdiff";
	ofstream ooo(str_node2LLDiff.c_str());
	for( iter = node2LLDiff.begin(); iter != node2LLDiff.end(); iter++ ) {
		ooo << "node name: " << iter->first << ", likelihood diff: " << iter->second << endl;
	}
}

void buildScFromTree(tree &tr,sequenceContainer &origSc, map<string, int> &name2idSc, sequenceContainer &newSc){
	vector<tree::nodeP> nodesVecTree1;
	tr.getAllLeaves(nodesVecTree1, tr.getRoot());
	cout<<"building new sequence container, root is "<<tr.getRoot()->name()<<endl;
	for (int i = 0; i<nodesVecTree1.size(); ++i){
		int idInOrigSc = -1;
		string name = nodesVecTree1[i]->name();
		map<string,int>::iterator iter = name2idSc.find(name);
		if( iter == name2idSc.end() ) {
			string errorMs = "error in loopLRT, cannot find ";
			errorMs+=nodesVecTree1[i]->name();
			errorMs+=" in sequence container";
			errorMsg::reportError(errorMs);
		}
		else {
			idInOrigSc = iter->second;
		}
		int content = origSc[idInOrigSc][0];//first position
		sequence ss (int2string(content),name,"",i,origSc.getAlphabet());
		newSc.add(ss);
	}
}

/*
void testDown(){
	//string treeFile = "D:\\My Documents\\tests\\tree.txt";
	string treeFile = "D:\\My Documents\\projects\\Thy\\species trees\\reconstruction\\ciccarelli\\submission\\Ciccarelli.parsed.wBL.noBP.removed.ph";
	//string seqsFile = "D:\\My Documents\\tests\\1pos.01.fa";//
	string seqsFile="D:\\My Documents\\projects\\Thy\\species trees\\reconstruction\\ciccarelli\\submission\\Ciccarelli.parsed.Seqs.txt";

	threeStateAlphabet alph;
	ifstream in(seqsFile.c_str());
	sequenceContainer sc = recognizeFormat::read(in,&alph);

	tree fullTree(treeFile.c_str());
	string root = "N12";
	fullTree.rootAt(fullTree.findNodeByName(root));

	MDOUBLE mu1 = 0.366898;
	MDOUBLE mu2 = 1.25557;
	MDOUBLE mu3 = 3.41183;
	MDOUBLE mu4 = 4.89679;

	assess3stateLGT arFull(fullTree,sc,NULL,&mu1,&mu2,&mu3,&mu4);

	cout<<endl<<endl<<"likelihood from up is "<<arFull.getLikelihood()<<endl;
	cout<<"likelihood from down is "<<arFull.getLikelihoodFromDown()<<endl;

}
*/
