Source code for splendor.splendor.splendor_model

# pylint: skip-file

# INFORMATION ------------------------------------------------------------------------------------------------------- #

# Author:  Steven Spratley, extending code by Guang Ho and Michelle Blom
# Date:    04/01/2021
# Purpose: Implements "Splendor" for the COMP90054 competitive game environment

# IMPORTS ------------------------------------------------------------------------------------------------------------#


import copy
import itertools
import random

from splendor.template import GameRule, GameState

from .splendor_utils import *

# CLASS DEF ----------------------------------------------------------------------------------------------------------#


# Represents cards with a colour (str), unique code (str), resource costs (dict), deck ID (int), and points (int).
[docs] class Card: def __init__(self, colour, code, cost, deck_id, points): self.colour = colour self.code = code self.cost = cost self.deck_id = deck_id self.points = points def __str__(self): gem_string = "" for colour, number in self.cost.items(): gem_string += f'{", " if gem_string!="" else ""}{number} {colour}' return f"Tier {self.deck_id+1} {self.colour} card worth {self.points} points and costing {gem_string}" def __repr__(self): return self.code def __eq__( self, other ): # Equal in the ways that matter: code is identical, and points haven't been tampered with. return ( hasattr(other, "code") and other.code == self.code and self.points == other.points == CARDS[other.code][-1] )
# Represents game as agents playing on a board with cards, gems, and nobles.
[docs] class SplendorState(GameState): def __init__(self, num_agents): self.board = self.BoardState(num_agents) self.agents = [self.AgentState(i) for i in range(num_agents)] self.agent_to_move = 0 # def __repr__(self) -> str: # return super().__repr__() def __str__(self) -> str: output = "" output += str(self.board) for agent_state in self.agents: output += str(agent_state) return output
[docs] class BoardState: def __init__(self, num_agents): self.decks = [[], [], []] self.dealt = [[None] * 4 for i in range(3)] # All gem stacks start at (4,5,7) for games of (2,3,4) players respectively. Yellow seals always start at 5. n = [4, 5, 7][num_agents - 2] self.gems = { "black": n, "red": n, "yellow": 5, "green": n, "blue": n, "white": n, } # Deal out num_agents+1 of the 10 nobles at random. Nobles = (code, cost). self.nobles = random.sample(NOBLES, k=num_agents + 1) # Sort cards into three deck tiers. Deal four cards per tier. Decks are shuffled before each deal. for code, (colour, cost, deck_id, points) in CARDS.items(): deck_id -= 1 # Deck IDs are read in as (1-3), but should be zero-indexed instead. card = Card(colour, code, cost, deck_id, points) self.decks[deck_id].append(card) for deck in self.decks: random.shuffle(deck) for i in range(3): for j in range(4): self.dealt[i][j] = self.deal(i)
[docs] def deal(self, deck_id): if len(self.decks[deck_id]): # random.shuffle(self.decks[deck_id]) return self.decks[deck_id].pop() return None
[docs] def dealt_list(self): return [card for deck in self.dealt for card in deck if card]
def __str__(self) -> str: output = "" output += "\nAvailable Gems:\n" output += str(self.gems) output += "\nDealt Card List: \n" for card in self.dealt_list(): output += "\t" + str(card) + "\n" output += "\nNoble List \n" output += str(self.nobles) output += "\n" return output
# def __repr__(self) -> str: # return self.__str__
[docs] class AgentState: def __init__(self, _id): self.id = _id self.score = 0 self.gems = {c: 0 for c in COLOURS.values()} self.cards = {c: [] for c in COLOURS.values()} self.nobles = [] self.passed = False self.agent_trace = AgentTrace(_id) self.last_action = None def __str__(self) -> str: output = "" output += "Agent (%d): \n" % (self.id) output += "\tscore: %d,\n" % (self.score) output += "\tgems: %s\n" % (self.gems) output += "\tcards: %s\n" % (self.cards) output += "\tnobles: %s.\n" % (self.nobles) return output
# Implements game logic.
[docs] class SplendorGameRule(GameRule): def __init__(self, num_of_agent): super().__init__(num_of_agent) # No private information: agent states are available to other agents. While upcoming cards are random, the decks # are still provided in the gamestate for agents to use if they want, since they are shuffled before each deal. self.private_information = None # # for now the idea is to see whether the action is one of the legal action # def validAction(self, selected, all_legal_actions): # # return utils.ValidAction(m, actions)
[docs] def initialGameState(self): return SplendorState(self.num_of_agent)
[docs] def generatePredecessor(self, state, action, agent_id): agent = state.agents[agent_id] board = state.board agent.last_action = action # Record last action such that other agents can make use of this information. score = 0 if "card" in action: card = action["card"] if "collect" in action["type"] or action["type"] == "reserve": # Increment board gem stacks by collected_gems. Decrement player gem stacks by collected_gems. for colour, count in action["collected_gems"].items(): board.gems[colour] += count agent.gems[colour] -= count # Increment player gem stacks by returned_gems. Decrement board gem stacks by returned_gems. for colour, count in action["returned_gems"].items(): agent.gems[colour] += count board.gems[colour] -= count if action["type"] == "reserve": i, j = action["card_position"] # Card drawn from deck, will be returned to it bought_card = board.dealt[i][j] if bought_card is not None: board.decks[bought_card.deck_id].append(bought_card) # Remove card from reserved cards agent.cards["yellow"].remove(card) board.dealt[i][j] = card elif "buy" in action["type"]: # Increment player gem stacks by returned_gems. Decrement board gem stacks by returned_gems. for colour, count in action["returned_gems"].items(): agent.gems[colour] += count board.gems[colour] -= count # If buying one of the available cards on the board, set removed card slot to new dealt card. # Since the board may have None cards (empty slots that cannot be filled), check cards first. if "available" in action["type"]: i, j = action["card_position"] bought_card = board.dealt[i][j] if bought_card is not None: board.decks[bought_card.deck_id].append(bought_card) board.dealt[i][j] = card # Else, agent bought a reserved card. Return card to player's yellow stack. else: agent.cards["yellow"].append(card) agent.cards[card.colour].remove(card) score -= card.points if action["noble"] is not None: # Remove noble from board. Add noble to player's stack. Like cards, nobles aren't hashable due to possessing # dictionaries (i.e. resource costs). Therefore, locate and delete the noble via unique code. # Add noble's points to agent score. board.nobles.append(action["noble"]) agent.nobles.remove(action["noble"]) score -= 3 # Log this turn's action and any resultant score. Return updated gamestate. # agent.agent_trace.action_reward.remove((action, -score)) # Removing last one because I belive it is more reliable agent.agent_trace.action_reward.pop() agent.score += score # score is negative agent.passed = action["type"] == "pass" return state
[docs] def generateSuccessor(self, state, action, agent_id): agent, board = state.agents[agent_id], state.board agent.last_action = action # Record last action such that other agents can make use of this information. score = 0 if "card" in action: card = action["card"] if "collect" in action["type"] or action["type"] == "reserve": # Decrement board gem stacks by collected_gems. Increment player gem stacks by collected_gems. for colour, count in action["collected_gems"].items(): board.gems[colour] -= count agent.gems[colour] += count # Decrement player gem stacks by returned_gems. Increment board gem stacks by returned_gems. for colour, count in action["returned_gems"].items(): agent.gems[colour] -= count board.gems[colour] += count if action["type"] == "reserve": # Remove card from dealt cards by locating via unique code (cards aren't otherwise hashable). # Since we want to retain the positioning of dealt cards, set removed card slot to new dealt card. # Since the board may have None cards (empty slots that cannot be filled), check cards first. # Add card to player's yellow stack. for i in range(len(board.dealt[card.deck_id])): if ( board.dealt[card.deck_id][i] and board.dealt[card.deck_id][i].code == card.code ): board.dealt[card.deck_id][i] = board.deal(card.deck_id) agent.cards["yellow"].append(card) break elif "buy" in action["type"]: # Decrement player gem stacks by returned_gems. Increment board gem stacks by returned_gems. for colour, count in action["returned_gems"].items(): agent.gems[colour] -= count board.gems[colour] += count # If buying one of the available cards on the board, set removed card slot to new dealt card. # Since the board may have None cards (empty slots that cannot be filled), check cards first. if "available" in action["type"]: for i in range(len(board.dealt[card.deck_id])): if ( board.dealt[card.deck_id][i] and board.dealt[card.deck_id][i].code == card.code ): board.dealt[card.deck_id][i] = board.deal(card.deck_id) break # Else, agent is buying a reserved card. Remove card from player's yellow stack. else: for i in range(len(agent.cards["yellow"])): if agent.cards["yellow"][i].code == card.code: del agent.cards["yellow"][i] break # Add card to player's stack of matching colour, and increment agent's score accordingly. agent.cards[card.colour].append(card) score += card.points if action["noble"]: # Remove noble from board. Add noble to player's stack. Like cards, nobles aren't hashable due to possessing # dictionaries (i.e. resource costs). Therefore, locate and delete the noble via unique code. # Add noble's points to agent score. for i in range(len(board.nobles)): if board.nobles[i][0] == action["noble"][0]: del board.nobles[i] agent.nobles.append(action["noble"]) score += 3 break # Log this turn's action and any resultant score. Return updated gamestate. agent.agent_trace.action_reward.append((action, score)) agent.score += score agent.passed = action["type"] == "pass" return state
# Game ends if any agent possesses at least 15 points, and all agents have gone in this round. As a very rare edge # case, poor playing agents might encounter a game where none are able to proceed. Game also ends in this case.
[docs] def gameEnds(self): deadlock = 0 for agent in self.current_game_state.agents: deadlock += 1 if agent.passed else 0 if agent.score >= 15 and self.current_agent_index == 0: return True return deadlock == len(self.current_game_state.agents)
# Return final score for this agent. If victories are tied, tie-break on number of cards placed by incrementing points.
[docs] def calScore(self, game_state, agent_id): max_score = 0 details = [] bought_cards = lambda a: sum( [len(cards) for colour, cards in a.cards.items() if colour != "yellow"] ) for a in game_state.agents: details.append((a.id, bought_cards(a), a.score)) max_score = max(a.score, max_score) victors = [d for d in details if d[-1] == max_score] if len(victors) > 1 and agent_id in [d[0] for d in victors]: min_cards = min([d[1] for d in details]) if bought_cards(game_state.agents[agent_id]) == min_cards: # Add a half point if this agent was a tied victor, and had the fewest cards. return game_state.agents[agent_id].score + 0.5 return game_state.agents[agent_id].score
# Generate a list of gem combinations that can be returned, if agent exceeds limit with collected gems. # Agents are disallowed from returning gems of the same colour as those they've just picked up. Since collected_gems # is sampled exhaustively, this function simply needs to screen out colours in collected_gems, in order for agents # to be given all collected/returned combinations permissible.
[docs] def generate_return_combos(self, current_gems, collected_gems): total_gem_count = sum(current_gems.values()) + sum(collected_gems.values()) if total_gem_count > 10: return_combos = [] num_return = total_gem_count - 10 # Combine current and collected gems. Screen out gem colours that were just collected. total_gems = { i: current_gems.get(i, 0) + collected_gems.get(i, 0) for i in set(current_gems) } total_gems = { i[0]: i[1] for i in total_gems.items() if i[0] not in collected_gems.keys() }.items() # Form a total gems list (with elements == gem colours, and len == number of gems). total_gems_list = [] for colour, count in total_gems: for _ in range(count): total_gems_list.append(colour) # If, after screening, there aren't enough gems that can be returned, return an empty list, indicating that # the collected_gems combination is not viable. if len(total_gems_list) < num_return: return [] # Else, find all valid combinations of gems to return. for combo in set(itertools.combinations(total_gems_list, num_return)): returned_gems = {c: 0 for c in COLOURS.values()} for colour in combo: returned_gems[colour] += 1 # Filter out colours with zero gems, and append. return_combos.append( dict({i for i in returned_gems.items() if i[-1] > 0}) ) return return_combos return [ {} ] # If no gems need to be returned, return a list comprised of one empty combo.
# Checks to see whether an agent's purchased cards and collected gems can cover a given resource cost. # If it can, return the combination of gems to be returned, if any. If it can't, return False.
[docs] def resources_sufficient(self, agent, costs): wild = agent.gems["yellow"] return_combo = {c: 0 for c in COLOURS.values()} for colour, cost in costs.items(): # If a shortfall is found, see if the difference can be made with wild/seal/yellow gems. available = agent.gems[colour] + len(agent.cards[colour]) shortfall = max(cost - available, 0) # Shortfall shouldn't be negative. wild -= shortfall # If wilds are expended, the agent cannot make the purchase. if wild < 0: return False # Else, increment return_combo accordingly. Note that the agent should never return gems if it can afford # to pay using its card stacks, and should never return wilds if it can return coloured gems instead. # Although there may be strategic instances where holding on to coloured gems is beneficial (by virtue of # shorting players from resources), in this implementation, this edge case is not worth added complexity. gem_cost = max(cost - len(agent.cards[colour]), 0) # Gems owed. gem_shortfall = max(gem_cost - agent.gems[colour], 0) # Wilds required. return_combo[colour] = ( gem_cost - gem_shortfall ) # Coloured gems to be returned. return_combo["yellow"] += gem_shortfall # Wilds to be returned. # Filter out unnecessary colours and return dict specifying combination of gems. return dict({i for i in return_combo.items() if i[-1] > 0})
# Checks whether a particular noble is a candidate for visiting this agent.
[docs] def noble_visit(self, agent, noble): _, costs = noble for colour, cost in costs.items(): if not len(agent.cards[colour]) >= cost: return False return True
[docs] def getLegalActions(self, game_state, agent_id): actions = [] agent, board = game_state.agents[agent_id], game_state.board # A given turn consists of the following: # 1. Collect gems (up to 3 different) OR # Collect gems (2 same, if stack >=4) OR # Reserve one of 12 available cards OR # Buy one of 12 available cards OR # Buy a previously reserved card. # 2. Discard down to 10 gems if necessary. # 3. Obtain a noble if requirements are met. # Since the gamestate does not change during an agent's turn, all turn parts are able to be planned for at once. # Action fields: {'type', 'collected_gems', 'returned_gems', 'card', 'noble'} # Actions will always take the form of one of the following three templates: # {'type': 'collect_diff'/'collect_same', 'collected_gems': {gem counts}, 'returned_gems': {gem counts}, 'noble': noble} # {'type': 'reserve', 'card':card, 'collected_gems': {'yellow': 1/None}, 'returned_gems': {colour: 1/None}, 'noble': noble} # {'type': 'buy_available'/'buy_reserve', 'card': card, 'returned_gems': {gem counts}, 'noble': noble} # First, check if any nobles are waiting to visit from the last turn. Ensure each action to follow recognises # this, and in the exceedingly rare case that there are multiple nobles waiting (meaning that, at the last turn, # this agent had the choice of at least 3 nobles), multiply all generated actions by these nobles to allow the # agent to choose again. potential_nobles = [] for noble in board.nobles: if self.noble_visit(agent, noble): potential_nobles.append(noble) if len(potential_nobles) == 0: potential_nobles = [None] # Generate actions (collect up to 3 different gems). Work out all legal combinations. Theoretical max is 10. available_colours = [ colour for colour, number in board.gems.items() if colour != "yellow" and number > 0 ] num_holding_gem = sum(agent.gems.values()) if num_holding_gem <= 7: min_comb_len = min(3, len(available_colours)) elif num_holding_gem == 8: min_comb_len = min(2, len(available_colours)) else: min_comb_len = min(1, len(available_colours)) for combo_length in range(min_comb_len, min(len(available_colours), 3) + 1): for combo in itertools.combinations(available_colours, combo_length): collected_gems = {colour: 1 for colour in combo} # make sure there is no action that collect empty gem if not collected_gems == {}: # Find combos of gems to return, if any. Since the max to be returned can be 3, theoretical max # combinations will be 51, and max actions generated by the end of this stage will be 510. # Handling this branching factor properly will be crucial for agent performance. # If return_combos comes back False, then taking these gems is invalid and won't be added. return_combos = self.generate_return_combos( agent.gems, collected_gems ) for returned_gems in return_combos: for noble in potential_nobles: actions.append( { "type": "collect_diff", "collected_gems": collected_gems, "returned_gems": returned_gems, "noble": noble, } ) # Generate actions (collect 2 identical gems). Theoretical max is 5. available_colours = [ colour for colour, number in board.gems.items() if colour != "yellow" and number >= 4 ] for colour in available_colours: collected_gems = {colour: 2} # Like before, find combos to return, if any. Since the max to be returned is now 2, theoretical max # combinations will be 21, and max actions generated here will be 105. return_combos = self.generate_return_combos(agent.gems, collected_gems) for returned_gems in return_combos: for noble in potential_nobles: actions.append( { "type": "collect_same", "collected_gems": collected_gems, "returned_gems": returned_gems, "noble": noble, } ) # Generate actions (reserve card). Agent can reserve only if it possesses < 3 cards currently reserved. # With a reservation, the agent will receive one seal (yellow), if there are any left. Reservations are stored # and displayed under the agent's yellow stack, as they won't generate their true colour until fully purchased. # There is a possible 12 cards to be reserved, and if the agent goes over limit, there are max 6 gem colours # that can be returned, leading to a theoretical max of 72 actions here. if len(agent.cards["yellow"]) < 3: collected_gems = {"yellow": 1} if board.gems["yellow"] > 0 else {} return_combos = self.generate_return_combos(agent.gems, collected_gems) for returned_gems in return_combos: for row, deck in enumerate(board.dealt): for col, card in enumerate(deck): if card: for noble in potential_nobles: actions.append( { "type": "reserve", "card": card, "card_position": (row, col), "collected_gems": collected_gems, "returned_gems": returned_gems, "noble": noble, } ) # Generate actions (buy card). Agents can buy cards if they can cover its resource cost. Resources can come from # an agent's gem and card stacks. Card stacks represent gem factories, or 'permanent gems'; if there are 2 blue # cards already purchased, this acts like 2 extra blue gems to spend in a given turn. Gems are therefore only # returned if the stack of that colour is insufficient to cover the cost. # Agents are disallowed from purchasing > 7 cards of any one colour, for the purposes of a clean interface. # This is not expected to affect gameplay, as there is essentially zero strategic reason to exceed this limit. # Available cards consist of cards dealt onto the board, as well as cards previously reserved by this agent. # There is a max 15 actions that can be generated here (15 possible cards to be bought: 12 dealt + 3 reserved). # However, in the case that multiple nobles are made candidates for visiting with this move, this number will # be multiplied accordingly. This however, is a rare event. for row, deck in enumerate(board.dealt + [agent.cards["yellow"]]): for col, card in enumerate(deck): if not card or len(agent.cards[card.colour]) == 7: continue returned_gems = self.resources_sufficient( agent, card.cost ) # Check if this card is affordable. if ( type(returned_gems) == dict ): # If a dict was returned, this means the agent possesses sufficient resources. # Check to see if the acquisition of a new card has meant new nobles becoming candidates to visit. new_nobles = [] for noble in board.nobles: agent_post_action = copy.deepcopy(agent) # Give the card featured in this action to a copy of the agent. agent_post_action.cards[card.colour].append(card) # Use this copied agent to check whether this noble can visit. if self.noble_visit(agent_post_action, noble): new_nobles.append( noble ) # If so, add noble to the new list. if not new_nobles: new_nobles = [None] for noble in new_nobles: actions.append( { "type": ( "buy_reserve" if card in agent.cards["yellow"] else "buy_available" ), "card": card, "card_position": (row, col), "returned_gems": returned_gems, "noble": noble, } ) # Return list of actions. If there are no actions (almost impossible), all this player can do is pass. # A noble is still permitted to visit if conditions are met. if not actions: for noble in potential_nobles: actions.append({"type": "pass", "noble": noble}) return actions
# END FILE -----------------------------------------------------------------------------------------------------------#