import random
from functools import lru_cache

from .game import Game


class Agent(object):
    def play(self, game):
        moves = game.moves()

        if len(moves) == 0:
            raise ValueError("invalid game state")
        if len(moves) == 1:
            return moves[0]

        return self.decide(game)


class DumbAgent(Agent):
    """dumb AI agent that plays randomly"""
    def decide(self, game):
        return random.choice(game.moves())


class SmartAgent(Agent):
    def decide(self, game):
        moves = game.moves()

        onestep = [a for a in moves if game.move(a).completed()[0]]
        if len(onestep) > 0:
            return onestep[0]

        return minmax_decision(game, game.turn)


cache = lru_cache(10000)  # < 3^9


def minmax_decision(state: Game, player: str):
    # assuming human player is not perfect and using avg instead of min

    def terminal_test(state):
        completed, winner, _ = state.completed()
        if winner:
            utility = (+1 if winner == player else -1)
        else:
            utility = 0
        return completed, utility

    @cache
    def max_value(state):
        completed, utility = terminal_test(state)
        if completed:
            return utility
        moves = state.moves()
        return max(avg_value(state.move(a)) for a in moves)

    @cache
    def avg_value(state):
        completed, utility = terminal_test(state)
        if completed:
            return utility
        moves = state.moves()
        return sum(max_value(state.move(a)) for a in moves) / len(moves)

    return max(state.moves(), key=lambda a: avg_value(state.move(a)))