#pragma once
#include <time.h>
#include "GameState.h"
#include "AbstractOrder.h"
#include "ActionGenerator.h"
#include "EvaluationFunction.h"

class UCT
{
public:
	struct gameNode_t {
		std::vector<playerActions_t> actions; // action of each child
		double totalEvaluation;
        double totalVisits;
        struct gameNode_t* parent; // to backpropagate the stats
        std::vector<struct gameNode_t*> children; // to delete the tree
        GameState gs;
        double depth;
        int player; // 1 : max, 0 : min, -1: Game-over
        ActionGenerator moveGenerator;
        bool isEmpty;
        __int8 nextPlayerInSimultaneousNode;
	};

	UCT(int maxDepth, EvaluationFunction* ef, int maxSimulations, int maxSimulationTime);
	playerActions_t start(bool player, GameState gs);
	playerActions_t startUCT(GameState gs, bool maxplayer, bool minplayer, int T, int cutOffTime);

private:
	int _maxDepth;
    int _maxDepthReached;
    int _maxDepthRolloutReached;
    long _totalBranching;
    int _numBranching;
    int _maxMissplacedUnits;
	EvaluationFunction* _ef;
    int _maxSimulations;
    int _maxSimulationTime;

    // timers
    Timer timerUCT;
//     Timer timerTreePolicy;
//     Timer timerRolloutPolicy;
//     Timer timerRolloutPolicyActionGenerator;
//     Timer timerRolloutPolicyRandomAction;
//     Timer timerRolloutPolicyExecute;
//     Timer timerRolloutPolicyMoveForward;
//     Timer timerRolloutPolicyEvaluate;
//     Timer timerBackupPolicy;

    gameNode_t* newGameNode(bool maxplayer, GameState gs, gameNode_t* parent=NULL);
    gameNode_t* bestChild(gameNode_t* currentNode, bool maxplayer);
    double nodeValue(gameNode_t* node);
    void simulate(GameState gs, int time, __int8 nextSimultaneous);
    void deleteNode(gameNode_t* node);

    // bandit policies
    gameNode_t* eGreedy(gameNode_t* currentNode, bool maxplayer);
    gameNode_t* UCB(gameNode_t* currentNode, bool maxplayer);
};
