Source code for game_ai

import numpy as np
import os
from numpy.random import default_rng

# import the game you want to play
import tictac as game
#import n_in_a_row as game


"""
A general AI for simple turn based games.

The game to be played must be imported as a module named game.
The module must implement these functions:

* new_game()
* can_continue(state)
* check_winner(state)
* previous_player(player)
* next_player(player)
* get_all_reasonable_plays(state)
* draw(state)
* make_move(play, player, state)
* copy_game(state)
* declare_winner(winner)

and define these variables
* n_players

"""

# Tags for marking human and cpu players
HUMAN = "bio"
CPU = "cpu"


[docs]class MC_node:
    """
    A node in the Monte Carlo tree.
    
    Each node records the win and play statistics and links to 
    the nodes that come before and after it in the tree.
    
    Args:
        player (int): player number for the player whose move lead to this node
        state: current state of the game
        layer (int): counts how deep in the tree this node is
        play: the play action ("move") that leads to this node
        explore (float): for large values the algorithm is more eager to try non-optimal plays
    """

    # Creates a node object
    def __init__(self, player, state, layer, play, explore=1.0):
        self.player = player
        self.wins = 0
        self.plays = 0
        self.layer = layer
        self.play = play
        self.state = state
        self.branches = []
        self.parent = None
        self.winner = game.check_winner(state)
        self.explore_factor = explore
        
    # String representation for the node object
    def __str__(self):
        info = "MC node:\n"
        info += " player: "+str(self.player)+"\n"        
        info += " layer: "+str(self.layer)+"\n"
        info += " play: "+str(self.play)+"\n"
        info += " state: \n"+str(self.state)+"\n"
        info += " wins / plays: "+str(self.wins)+" / "+str(self.plays)+"\n"
        info += " branches: "+str(len(self.branches))
        for b in self.branches:
            info += " "+str(b.play)
            
        return info+"\n"

                
[docs]    def has_branches(self):
        """
        Checks if more gameplay branches follow from this node.
        
        Returns: 
            bool: True if more nodes follow this node in the MC tree.
        """
        return len(self.branches) > 0
        
        
[docs]    def is_leaf(self):
        """
        Checks if this is a leaf node.
        
        A node is said to be a leaf node if
        
        (a) the game ends at this node (there are no following nodes) **or**
        (b) at least one of the following nodes has 0 recorded plays
        
        I.e., a leaf is a node at the outer edge of the tree.
        
        If this node is not terminal but the following nodes have not yet been
        created, the function calls :meth:`MC_node.create_branches` to create nodes
        for all possible plays.
        
        Returns: 
            bool: True if this is a leaf node.
        """
        
        # If there are no branches, this node may end the game.
        # Or maybe the branches just have not been created yet.
        if not self.has_branches():

            # If winner is 0, the game may have ended in a draw
            # or the following branches have not yet been created.
            if self.winner == 0: 
            
                self.create_branches()

                # If there are no following plays, the game has ended in a draw.
                if len(self.branches) == 0:
                    return True

            else: # if winner is not 0, the game has been decided with someone winning.
                return True
    
        # If there are branches, the game is still ongoing.
        for b in self.branches:
            if b.plays == 0:
                return True

        return False

        
[docs]    def pick_unplayed_branch(self):
        """
        Randomly chooses one node out of the nodes that
        
        (a) follow this node in the MC tree **and**
        (b) have no recorded plays
        
        Returns: 
            MC_node: the chosen node
        """
        
        # first list all the unplayed branches
        unplayed = []
        for b in range(len(self.branches)):
            if self.branches[b].plays == 0:
                unplayed.append(b)
                
        # then randomly pick one
        chosen = random.choice(unplayed)
        
        return self.branches[chosen]
        
        
[docs]    def pick_played_branch(self):
        """
        Chooses the node that
        
        (a) follows this node in the MC tree **and**
        (b) has the highest play factor as given by :meth:`MC_node.bias_factor`.
        
        Returns: 
            MC_node: the chosen node
        """
                
        # calculate the bias factors for all branches
        # always save the best factor and branch
        best_branch = 0
        best_bias = self.branches[0].bias_factor()
    
        for b in range(1,len(self.branches)):
        
            bias = self.branches[b].bias_factor()
            
            # check if this is better than the previous best
            if bias > best_bias:
                best_bias = bias
                best_branch = b
                              
        return self.branches[best_branch]
        
        
[docs]    def pick_best_play(self):
        """
        Chooses the play action (i.e., move) recorded in the node that
        
        (a) follows this node **and**
        (b) has the most recorded plays
        
        Returns: 
            object: the most tried out play
        """
    
        # calculate the number of playouts for all branches
        # always save the highest play count and branch
        best_branch = 0
        most_plays = self.branches[0].plays
        
        for  b in range(1,len(self.branches)):
            plays = self.branches[b].plays
            if plays > most_plays:
                most_plays = plays
                best_branch = b
                
        # return the play action from the most tried branch
        return self.branches[best_branch].play
        
        
[docs]    def bias_factor(self):
        """
        Calculates the factor for choosing the next node for a node
        where all the following nodes have at least one recorded play.
        The higher this factor is, the more likely it is that this node is chosen.
        
        The factor is calculated as
        
        .. math ::
        
            \\frac{w}{n} + c \\sqrt{ \\frac{1}{n} \\ln(n_\\text{parent}) }
        
        where
        
            * :math:`w` = the recorded wins in all the playouts from this node
            * :math:`n` = the number of recorded playouts from this node
            * :math:`c` = the exploration factor
            * :math:`n_\\text{parent}` = the number of recorded playouts from the previous node
        
        The first term :math:`w / n` is the win ratio for the playouts from this node.
        If this node is likely to lead to victory, this ratio is close to 1
        and gets chosen often.
        
        The second term is large, if there are only a few plays from this node and
        many plays from the *other nodes* that branch off the same parent node (the competing
        play options). If this factor was not included, any node whose initial play lead
        to a loss would never be considered again by the algorithm. Since this may
        happen simply due to bad luck, it is important to give these nodes some additional
        plays.
        
        Returns: 
            float: the bias factor for this node
        """
    
        w = self.wins
        n = self.plays
        c = self.explore_factor
        N = self.parent.plays
    
        return w/n + c * np.sqrt( np.log(N) / n )
        
        
[docs]    def create_branches(self):
        """
        Creates new MC tree nodes corresponding to all the
        play options available at the game state represented by this node.
        The new nodes are created as :class:`MC_node` objects and
        saved in the list self.branches of this node.
        """
        
        # ask game for a list of all (reasonble) play actions (moves) in this situation
        the_plays = game.all_reasonable_plays(self.state, game.next_player(self.player))

        # loop over all the possible moves and create a new MC_node
        # representing the state of the game after that move
        for play in the_plays:
        
            # We must not change the current state of the game, so we
            # ask for a copy. Depending on the game, this may be just
            # a full copy of the current state, but it may also contain
            # some differences such as added randomization or obscured
            # information. (E.g. if there is a deck of cards, the AI
            # should not be allowed to know the order of the cards.)
            new_state = game.copy_game(self.state)

            # advance the game by taking the move
            game.make_move(play, game.next_player(self.player), new_state)
                        
            # create the new MC tree node
            new_branch = MC_node(player=game.next_player(self.player), 
                                 state=new_state, 
                                 layer=self.layer+1, 
                                 play=play)

            # record that the new node branches from this node
            new_branch.parent = self
            
            # add the new branch to the list of branches in this node
            self.branches.append(new_branch)
            
            
[docs]    def explore_branches(self):
        """
        Travels through the MC tree, starting from this node, looking for a leaf node.
        
        At each node, the algorithm checks if the current node is a leaf node
        using :meth:`MC_node.is_leaf`. If it is, the current node is returned.
        
        If the currently examined node is not a leaf, the algorithm calculates 
        bias factors for all the nodes following the current node and then picks 
        the node with the highest factor usin :meth:`MC_node.pick_played_branch`. 
        This node then becomes the current node and the process is repeated.
        
        Eventually, the process will end up at a leaf node
        At that point, the exploration stops and the found leaf is returned.
        
        .. note ::
            This function is incomplete!
        
        Returns: 
            MC_node: the found leaf node
        """

        # starting point is this node
        active_node = self

        # if this is a leaf, just return this node
        # todo
        
        # search as long as we haven't found a leaf
        while not active_node.is_leaf():

            # Move down the tree as long as no leaf is found.
            # Always save the node you travel to in active_node.
            
            # todo
            pass
            
        # eventually, return the found leaf node
        return active_node


[docs]    def simulate(self):
        """
        Simulates the outcome of the game starting from the current node.
        
        The algorithm alternates between players making each player
        randomly choose one of their available play options until the game ends.
        
        Returns: 
            int: the player number of the winner or 0 if the game ended in a draw
        """
        
        # if the game has been decided, there's nothing to do
        if self.winner > 0:
            return self.winner
            
        # if the game is finished without a winner, it's a draw
        if not game.can_continue(self.state):
            return 0
    
        # the game is not finished, so we will simulate the rest of the game
        # by randomly choosing moves for all players until the game is finished
        finished = False
        
        # in order to not tamper with the game state, make a copy of it
        simulated_state = game.copy_game(self.state)
        
        player = game.next_player(self.player)
        winner = 0
        
        while not finished:
            
            # choose a random play
            the_plays = game.all_reasonable_plays(simulated_state, player)
            random_play = random.choice(the_plays)
            
            # advance the game
            game.make_move(random_play, player, simulated_state)
            
            # check if the game has now ended and return the winner if it has
            winner = game.check_winner(simulated_state)
            if winner > 0:
                return winner
            elif not game.can_continue(simulated_state):
                return 0
            else: # if the game is not finished, it's the next player's turn
                player = game.next_player(player)
                

[docs]    def backpropagate(self, winner):
        """
        Recursively records play statistics according to who won the simulated game.
        
        The number of plays from this node is increased by one regardless of
        the outcome of the game.
        
        If the player whose turn it is at this node won the game, the win
        count is also increased by one. If the game ended in a draw,
        the win count is increased by one half.
        
        Once the stats have been recorded for one node,
        the algorithm moves on to the parent node. The process is
        repeated until the root node (the node with no parent) is reached.
        
        Args:
            winner (int): number of the winning player or 0 if the result was a draw
        """
        
        # always record one playout
        self.plays += 1
        
        # Check if the player of this node won.
        # NOTE: self.player is the number of the player
        # whose action *lead* to this node. 
        # Therefore if this node leads to victory, it's
        # a good choice for the player whose decision made it happen
        # only if that player won.
        
        if winner == 0:
            # for a draw, record 0.5 wins
            self.wins += 0.5
            
        elif self.player == winner:
            # for a win, record 1.0 wins
            self.wins += 1
            
        # move down the tree to the previous node unless we are at the root
        if self.parent != None:
            self.parent.backpropagate(winner)
        
    

[docs]class AI:
    """
    A Monte Carlo tree search AI.
    
    The AI chooses its actions using a MC tree built out of
    MC_node objects.
    
    Args:
        player (int): the player number for this AI
        thinking_time (int): the number of iterations the AI is allowed when building the MC tree
    """
    

    def __init__(self, player, thinking_time = 5000):
        self.player = player       
        self.iterations = thinking_time


[docs]    def pick_move(self, state):
        """
        Given the state of the game, the AI chooses its next move.
        
        Args:
            state: the state of the game in the format defined by the game module
        
        Returns: 
            object: the chosen play action (move) in the format defined by the game module
        """

        # Create the starting node for the MC search tree.
        # Note that the player recorded at a node is the player whose move
        # *lead* to the node. For the current state of the game, that is the
        # previous player, not the AI player itself.
        root = MC_node(game.previous_player(self.player), state, layer=0, play=None)

        # empty line between the game and the AI progress bar
        print()
        
        # explore the MC tree as many times as allowed
        for i in range(self.iterations):
                        
            # progress bar
            self.print_progress(i)
                        
            # explore the existing tree until a leaf node is found
            leaf = root.explore_branches()
            
            # If the game has not finished at the leaf, pick one of the
            # possible moves for which no playouts have been recorded
            # and simulate the rest of the game afer that.
            # Once the game has ended, backpropagate the results to update
            # the play statistics of the leaf node and all the nodes leading
            # to it.
            if leaf.winner == 0 and len(leaf.branches) > 0:
                unplayed = leaf.pick_unplayed_branch()            
                result = unplayed.simulate()
                unplayed.backpropagate(result)

            else: # the game has already finished
                result = leaf.winner
                leaf.backpropagate(result)
             
        # after a set number of iterations, the algorithm must make a decision
                
        # complete progress bar
        self.print_progress(self.iterations)
            
        # The possible choices are the play actions corresponding to the
        # nodes in the first layer of the MC tree 
        # (i.e., the nodes directly following the root).
        # Choose the best one out of these options and return it.
        return root.pick_best_play()


[docs]    def print_progress(self, step):
        """
        Draws a progress bar.
        
        Args:
            step (int): the number of iterations taken so far
        """

        total = self.iterations
        message = "["
        total_bar_length = 60
        percentage = int(step / total * 100)
        bar_fill = int(step / total * total_bar_length)
        for i in range(total_bar_length):
            if i < bar_fill:
                message += "|"
            else:
                message += " "
    
        message += "] "+str(percentage)+" %"
        if step < total:
            print(message, end="\r")     
        else:
            print(message) 



[docs]def play_game(player_types, thinking_time = 5000):
    """
    Plays a full game.
    
    Args:
        player_types (list): a list of players that may be either HUMAN or AI
        thinking_time (int): the number of iterations allowed for the AI
    
    Returns 
        int: the player number of the winner or 0 for a draw
    """

    # Record the number of players in game
    game.n_players = len(player_types)

    # Start the game
    game_state = game.new_game()
    finished = False
        
    # create the AIs
    ais = [None]*game.n_players
    for p in range(game.n_players):
        if player_types[p] == CPU:
            ais[p] = AI(p+1, thinking_time)
        
    # visualize the starting state
    game.draw(game_state)
    
    # identify the starting player
    player_index = 0
    player_number = 1
    
    # let the game run
    while not finished:

        # ask for a move from either a human or the AI
        if player_types[player_index] == HUMAN:
            play = game.ask_for_move(player_number, game_state)
            
        elif player_types[player_index] == CPU:
            play = ais[player_index].pick_move(game_state)
            
        # play the chosen move
        game.make_move(play, player_number, game_state)
        
        # visualize the current state
        game.draw(game_state)

        # check if someone has won            
        win = game.check_winner(game_state)
            
        if win > 0: # we have a winner
            finished = True
        elif not game.can_continue(game_state): # it's a draw
            finished = True
        else: # still going, next player's turn
            player_number = game.next_player(player_number)
            player_index = player_number - 1     
        
               
    # visualize the end state
    game.draw(game_state) 
    
    # return the winner's number
    return win
        

if __name__ == "__main__":

    # initialize the rng
    random = default_rng()

    # play the game
    # choose number of players and their type, and set AI thinking time
    winner = play_game([HUMAN,CPU], thinking_time = 1000)

    # declare the winner
    game.declare_winner(winner)