// optimize.h
// Interface of a SARSA optimizer

#ifndef _OPTIMIZE_H
#define _OPTIMIZE_H

#include "common.h"
#include "policy.h"


class COptimizer {
public:
	COptimizer(float nEpsilon =0.1, float nLambda =1., float nAlpha =-1.,
						 eSelectType actionSelect =EGreedy);
	~COptimizer();

  CAction optimizeSingleCycle();
	void optimizeMultiCycle(int nCycles);

	// Accessor Methods
	float getLambda()	 const	{return nLambda;}
	float getEpsilon() const	{return nEpsilon;}
	float getAlpha()	 const	{return nAlpha;}
	void setLambda(float f)  
		{nLambda=f;
		 INFO_MSG(("COptimizer::Lambda value was changed to %f\n",nLambda));}
	void setEpsilon(float f) 
		{nEpsilon=f;
		 INFO_MSG(("COptimizer::Epsilon value was changed to %f\n",nEpsilon));} 
	void setAlpha(float f) 
		{nAlpha=f;
		 INFO_MSG(("COptimizer::Alpha value was changed to %f\n",nAlpha));} 
	void setActionSelect(eSelectType a)
		{actionSelect=a;
		 INFO_MSG(("COptimizer::Action select method was changed to %d\n",(int)actionSelect));} 

	int getCycles()						 const {return nCycles;}
	int getSignificantCycles() const {return nSignificantCycles;}
	
	CAction selectOptimalAction(CState &S);

	// Policy Accessor Methods
	bool loadPolicy(const char *sFileName) {return pPolicy->load(sFileName);}
	bool savePolicy(const char *sFileName) {return pPolicy->save(sFileName);}

	void displayPolicy(FILE *fOut) const   {pPolicy->display(fOut);}		
		
private:	
	double alpha(CState &S, CAction &A)   
	{ return (nAlpha <0)?(1. /(double)pPolicy->getVisits(S,A)):nAlpha;}
	
	CAction selectAction(CState &S);	
	
	CPolicy *pPolicy;
	float nLambda, nEpsilon, nAlpha;	  
	tStock nMaxAction;
	eSelectType actionSelect;

	int nCycles, nSignificantCycles;
};

#endif
