#include "player.h" //Any RL Player is like follows: template class RL_Player : public Player { PlayerImp* _imp; public: Action start_run (Observation* startObserv) { Action act; _imp->set_state(startObserv); _imp->start_run(); act = _imp->choose_action(); return act; } Action do_step (Observation* currObserv, double lastReward) { Action act; _imp->set_state(currObserv); _imp->get_step_reward(lastReward); act = _imp->choose_action(); return act; } void end_run (Observation* currObserv, double lastReward) { _imp->set_state(currObserv); _imp->get_step_reward(lastReward); _imp->finish_run(); } void print_stats () { _imp->print_stats(); } RL_Player (PlayerImp* imp) { _imp = imp; } }; // Standard player factory for creating PL players! //static data member templates are not implemented - so we have //to use this "trick" for singelton implementation !? void* _factory = 0; template class RL_PlayerFactory : public PlayerFactory { AlgorithmKind what_alg (const char* algStr) { // for now Sarsa, TD0 & Q_Learning are implemented if (!strcmp (algStr,"Sarsa")) return ALG_SARSA; if (!strcmp (algStr,"TD0")) return ALG_TD0; if (!strcmp (algStr,"Q_Learning")) return ALG_QLRN; return ALG_UNK; } bool is_online (AlgorithmKind algKind) { // for now Sarsa is for now the only algorithm specifing // "on_line" (eps-greedy) policy! if (algKind == ALG_SARSA) return true; return false; } public: static RL_PlayerFactory* Instance() { if (!_factory) _factory = (void*) new RL_PlayerFactory(); return (RL_PlayerFactory*) _factory; } RL_Player* CreatePlayer (const char* params) { cout << "Create New Player!\n"; istrstream ist(params); char pl_kind[10]; ist >> pl_kind; // cout << pl_kind << "\n"; if (! strcmp(pl_kind,"RL")) { cout << "RL PLAYER!\n"; char playerStr[15]; for (int i=0;i<15;i++) playerStr[i] = 0; ist >> playerStr; cout << playerStr << "\n"; // Specific implementation choise is provided by generated code implementChoise* choise = implementChoise::Instance(); // Implementation is specified by name... PlayerImpBuilder* builder = choise->Choose_Impementation (playerStr); if (!builder) { cout << "Unknown player kind is specified!\n"; return 0; } char algStr[10]; for (int i=0;i<10;i++) algStr[i] = 0; ist >> algStr; AlgorithmKind algKind = what_alg(algStr); // cout << algStr << "," << algKind << "\n"; if (algKind == ALG_UNK) { cerr << "Not implemented RL algorithm kind is specified!\n"; return 0; } double lambda; ist >> lambda; cout << "Lambda parameter: " << lambda << "\n"; if (lambda > 1 || lambda < 0) { cerr << "Not valid value (not in [0,1] interval!\n"; return 0; } char policyName[10]; double eps; if (! is_online(algKind)) { for (int i=0;i<10;i++) policyName[i] = 0; ist >> policyName; cout << policyName << "(policy name)\n"; } else { ist >> eps; cout << eps << "(eps)\n"; if (eps > 1 || eps < 0) { cerr << "Not valid value (not in [0,1] interval!\n"; return 0; } } if (algKind != ALG_SARSA) { builder->create_algorithm(algKind,lambda); builder->create_policy(policyName); } else { builder->create_online_algorithm(algKind,lambda,eps); } PlayerImp* imp = builder->get_player_imp(); if (!imp) { cerr << "Can't create player implementation!\n"; return 0; } return new RL_Player(builder->get_player_imp()); } // We know how to create RL players only. cerr << "Not RL player creation is specified"; return 0; } };