import numpy as np
import os
from numpy.random import default_rng
# import the game you want to play
import tictac as game
#import n_in_a_row as game
"""
A general AI for simple turn based games.
The game to be played must be imported as a module named game.
The module must implement these functions:
* new_game()
* can_continue(state)
* check_winner(state)
* previous_player(player)
* next_player(player)
* get_all_reasonable_plays(state)
* draw(state)
* make_move(play, player, state)
* copy_game(state)
* declare_winner(winner)
and define these variables
* n_players
"""
# Tags for marking human and cpu players
HUMAN = "bio"
CPU = "cpu"
[docs]class MC_node:
"""
A node in the Monte Carlo tree.
Each node records the win and play statistics and links to
the nodes that come before and after it in the tree.
Args:
player (int): player number for the player whose move lead to this node
state: current state of the game
layer (int): counts how deep in the tree this node is
play: the play action ("move") that leads to this node
explore (float): for large values the algorithm is more eager to try non-optimal plays
"""
# Creates a node object
def __init__(self, player, state, layer, play, explore=1.0):
self.player = player
self.wins = 0
self.plays = 0
self.layer = layer
self.play = play
self.state = state
self.branches = []
self.parent = None
self.winner = game.check_winner(state)
self.explore_factor = explore
# String representation for the node object
def __str__(self):
info = "MC node:\n"
info += " player: "+str(self.player)+"\n"
info += " layer: "+str(self.layer)+"\n"
info += " play: "+str(self.play)+"\n"
info += " state: \n"+str(self.state)+"\n"
info += " wins / plays: "+str(self.wins)+" / "+str(self.plays)+"\n"
info += " branches: "+str(len(self.branches))
for b in self.branches:
info += " "+str(b.play)
return info+"\n"
[docs] def has_branches(self):
"""
Checks if more gameplay branches follow from this node.
Returns:
bool: True if more nodes follow this node in the MC tree.
"""
return len(self.branches) > 0
[docs] def is_leaf(self):
"""
Checks if this is a leaf node.
A node is said to be a leaf node if
(a) the game ends at this node (there are no following nodes) **or**
(b) at least one of the following nodes has 0 recorded plays
I.e., a leaf is a node at the outer edge of the tree.
If this node is not terminal but the following nodes have not yet been
created, the function calls :meth:`MC_node.create_branches` to create nodes
for all possible plays.
Returns:
bool: True if this is a leaf node.
"""
# If there are no branches, this node may end the game.
# Or maybe the branches just have not been created yet.
if not self.has_branches():
# If winner is 0, the game may have ended in a draw
# or the following branches have not yet been created.
if self.winner == 0:
self.create_branches()
# If there are no following plays, the game has ended in a draw.
if len(self.branches) == 0:
return True
else: # if winner is not 0, the game has been decided with someone winning.
return True
# If there are branches, the game is still ongoing.
for b in self.branches:
if b.plays == 0:
return True
return False
[docs] def pick_unplayed_branch(self):
"""
Randomly chooses one node out of the nodes that
(a) follow this node in the MC tree **and**
(b) have no recorded plays
Returns:
MC_node: the chosen node
"""
# first list all the unplayed branches
unplayed = []
for b in range(len(self.branches)):
if self.branches[b].plays == 0:
unplayed.append(b)
# then randomly pick one
chosen = random.choice(unplayed)
return self.branches[chosen]
[docs] def pick_played_branch(self):
"""
Chooses the node that
(a) follows this node in the MC tree **and**
(b) has the highest play factor as given by :meth:`MC_node.bias_factor`.
Returns:
MC_node: the chosen node
"""
# calculate the bias factors for all branches
# always save the best factor and branch
best_branch = 0
best_bias = self.branches[0].bias_factor()
for b in range(1,len(self.branches)):
bias = self.branches[b].bias_factor()
# check if this is better than the previous best
if bias > best_bias:
best_bias = bias
best_branch = b
return self.branches[best_branch]
[docs] def pick_best_play(self):
"""
Chooses the play action (i.e., move) recorded in the node that
(a) follows this node **and**
(b) has the most recorded plays
Returns:
object: the most tried out play
"""
# calculate the number of playouts for all branches
# always save the highest play count and branch
best_branch = 0
most_plays = self.branches[0].plays
for b in range(1,len(self.branches)):
plays = self.branches[b].plays
if plays > most_plays:
most_plays = plays
best_branch = b
# return the play action from the most tried branch
return self.branches[best_branch].play
[docs] def bias_factor(self):
"""
Calculates the factor for choosing the next node for a node
where all the following nodes have at least one recorded play.
The higher this factor is, the more likely it is that this node is chosen.
The factor is calculated as
.. math ::
\\frac{w}{n} + c \\sqrt{ \\frac{1}{n} \\ln(n_\\text{parent}) }
where
* :math:`w` = the recorded wins in all the playouts from this node
* :math:`n` = the number of recorded playouts from this node
* :math:`c` = the exploration factor
* :math:`n_\\text{parent}` = the number of recorded playouts from the previous node
The first term :math:`w / n` is the win ratio for the playouts from this node.
If this node is likely to lead to victory, this ratio is close to 1
and gets chosen often.
The second term is large, if there are only a few plays from this node and
many plays from the *other nodes* that branch off the same parent node (the competing
play options). If this factor was not included, any node whose initial play lead
to a loss would never be considered again by the algorithm. Since this may
happen simply due to bad luck, it is important to give these nodes some additional
plays.
Returns:
float: the bias factor for this node
"""
w = self.wins
n = self.plays
c = self.explore_factor
N = self.parent.plays
return w/n + c * np.sqrt( np.log(N) / n )
[docs] def create_branches(self):
"""
Creates new MC tree nodes corresponding to all the
play options available at the game state represented by this node.
The new nodes are created as :class:`MC_node` objects and
saved in the list self.branches of this node.
"""
# ask game for a list of all (reasonble) play actions (moves) in this situation
the_plays = game.all_reasonable_plays(self.state, game.next_player(self.player))
# loop over all the possible moves and create a new MC_node
# representing the state of the game after that move
for play in the_plays:
# We must not change the current state of the game, so we
# ask for a copy. Depending on the game, this may be just
# a full copy of the current state, but it may also contain
# some differences such as added randomization or obscured
# information. (E.g. if there is a deck of cards, the AI
# should not be allowed to know the order of the cards.)
new_state = game.copy_game(self.state)
# advance the game by taking the move
game.make_move(play, game.next_player(self.player), new_state)
# create the new MC tree node
new_branch = MC_node(player=game.next_player(self.player),
state=new_state,
layer=self.layer+1,
play=play)
# record that the new node branches from this node
new_branch.parent = self
# add the new branch to the list of branches in this node
self.branches.append(new_branch)
[docs] def explore_branches(self):
"""
Travels through the MC tree, starting from this node, looking for a leaf node.
At each node, the algorithm checks if the current node is a leaf node
using :meth:`MC_node.is_leaf`. If it is, the current node is returned.
If the currently examined node is not a leaf, the algorithm calculates
bias factors for all the nodes following the current node and then picks
the node with the highest factor usin :meth:`MC_node.pick_played_branch`.
This node then becomes the current node and the process is repeated.
Eventually, the process will end up at a leaf node
At that point, the exploration stops and the found leaf is returned.
.. note ::
This function is incomplete!
Returns:
MC_node: the found leaf node
"""
# starting point is this node
active_node = self
# if this is a leaf, just return this node
# todo
# search as long as we haven't found a leaf
while not active_node.is_leaf():
# Move down the tree as long as no leaf is found.
# Always save the node you travel to in active_node.
# todo
pass
# eventually, return the found leaf node
return active_node
[docs] def simulate(self):
"""
Simulates the outcome of the game starting from the current node.
The algorithm alternates between players making each player
randomly choose one of their available play options until the game ends.
Returns:
int: the player number of the winner or 0 if the game ended in a draw
"""
# if the game has been decided, there's nothing to do
if self.winner > 0:
return self.winner
# if the game is finished without a winner, it's a draw
if not game.can_continue(self.state):
return 0
# the game is not finished, so we will simulate the rest of the game
# by randomly choosing moves for all players until the game is finished
finished = False
# in order to not tamper with the game state, make a copy of it
simulated_state = game.copy_game(self.state)
player = game.next_player(self.player)
winner = 0
while not finished:
# choose a random play
the_plays = game.all_reasonable_plays(simulated_state, player)
random_play = random.choice(the_plays)
# advance the game
game.make_move(random_play, player, simulated_state)
# check if the game has now ended and return the winner if it has
winner = game.check_winner(simulated_state)
if winner > 0:
return winner
elif not game.can_continue(simulated_state):
return 0
else: # if the game is not finished, it's the next player's turn
player = game.next_player(player)
[docs] def backpropagate(self, winner):
"""
Recursively records play statistics according to who won the simulated game.
The number of plays from this node is increased by one regardless of
the outcome of the game.
If the player whose turn it is at this node won the game, the win
count is also increased by one. If the game ended in a draw,
the win count is increased by one half.
Once the stats have been recorded for one node,
the algorithm moves on to the parent node. The process is
repeated until the root node (the node with no parent) is reached.
Args:
winner (int): number of the winning player or 0 if the result was a draw
"""
# always record one playout
self.plays += 1
# Check if the player of this node won.
# NOTE: self.player is the number of the player
# whose action *lead* to this node.
# Therefore if this node leads to victory, it's
# a good choice for the player whose decision made it happen
# only if that player won.
if winner == 0:
# for a draw, record 0.5 wins
self.wins += 0.5
elif self.player == winner:
# for a win, record 1.0 wins
self.wins += 1
# move down the tree to the previous node unless we are at the root
if self.parent != None:
self.parent.backpropagate(winner)
[docs]class AI:
"""
A Monte Carlo tree search AI.
The AI chooses its actions using a MC tree built out of
MC_node objects.
Args:
player (int): the player number for this AI
thinking_time (int): the number of iterations the AI is allowed when building the MC tree
"""
def __init__(self, player, thinking_time = 5000):
self.player = player
self.iterations = thinking_time
[docs] def pick_move(self, state):
"""
Given the state of the game, the AI chooses its next move.
Args:
state: the state of the game in the format defined by the game module
Returns:
object: the chosen play action (move) in the format defined by the game module
"""
# Create the starting node for the MC search tree.
# Note that the player recorded at a node is the player whose move
# *lead* to the node. For the current state of the game, that is the
# previous player, not the AI player itself.
root = MC_node(game.previous_player(self.player), state, layer=0, play=None)
# empty line between the game and the AI progress bar
print()
# explore the MC tree as many times as allowed
for i in range(self.iterations):
# progress bar
self.print_progress(i)
# explore the existing tree until a leaf node is found
leaf = root.explore_branches()
# If the game has not finished at the leaf, pick one of the
# possible moves for which no playouts have been recorded
# and simulate the rest of the game afer that.
# Once the game has ended, backpropagate the results to update
# the play statistics of the leaf node and all the nodes leading
# to it.
if leaf.winner == 0 and len(leaf.branches) > 0:
unplayed = leaf.pick_unplayed_branch()
result = unplayed.simulate()
unplayed.backpropagate(result)
else: # the game has already finished
result = leaf.winner
leaf.backpropagate(result)
# after a set number of iterations, the algorithm must make a decision
# complete progress bar
self.print_progress(self.iterations)
# The possible choices are the play actions corresponding to the
# nodes in the first layer of the MC tree
# (i.e., the nodes directly following the root).
# Choose the best one out of these options and return it.
return root.pick_best_play()
[docs] def print_progress(self, step):
"""
Draws a progress bar.
Args:
step (int): the number of iterations taken so far
"""
total = self.iterations
message = "["
total_bar_length = 60
percentage = int(step / total * 100)
bar_fill = int(step / total * total_bar_length)
for i in range(total_bar_length):
if i < bar_fill:
message += "|"
else:
message += " "
message += "] "+str(percentage)+" %"
if step < total:
print(message, end="\r")
else:
print(message)
[docs]def play_game(player_types, thinking_time = 5000):
"""
Plays a full game.
Args:
player_types (list): a list of players that may be either HUMAN or AI
thinking_time (int): the number of iterations allowed for the AI
Returns
int: the player number of the winner or 0 for a draw
"""
# Record the number of players in game
game.n_players = len(player_types)
# Start the game
game_state = game.new_game()
finished = False
# create the AIs
ais = [None]*game.n_players
for p in range(game.n_players):
if player_types[p] == CPU:
ais[p] = AI(p+1, thinking_time)
# visualize the starting state
game.draw(game_state)
# identify the starting player
player_index = 0
player_number = 1
# let the game run
while not finished:
# ask for a move from either a human or the AI
if player_types[player_index] == HUMAN:
play = game.ask_for_move(player_number, game_state)
elif player_types[player_index] == CPU:
play = ais[player_index].pick_move(game_state)
# play the chosen move
game.make_move(play, player_number, game_state)
# visualize the current state
game.draw(game_state)
# check if someone has won
win = game.check_winner(game_state)
if win > 0: # we have a winner
finished = True
elif not game.can_continue(game_state): # it's a draw
finished = True
else: # still going, next player's turn
player_number = game.next_player(player_number)
player_index = player_number - 1
# visualize the end state
game.draw(game_state)
# return the winner's number
return win
if __name__ == "__main__":
# initialize the rng
random = default_rng()
# play the game
# choose number of players and their type, and set AI thinking time
winner = play_game([HUMAN,CPU], thinking_time = 1000)
# declare the winner
game.declare_winner(winner)