# pylint: skip-file
# INFORMATION ------------------------------------------------------------------------------------------------------- #
# Author: Steven Spratley, extending code by Guang Ho and Michelle Blom
# Date: 04/01/2021
# Purpose: Implements "Splendor" for the COMP90054 competitive game environment
# IMPORTS ------------------------------------------------------------------------------------------------------------#
import copy
import itertools
import random
from splendor.template import GameRule, GameState
from .splendor_utils import *
# CLASS DEF ----------------------------------------------------------------------------------------------------------#
# Represents cards with a colour (str), unique code (str), resource costs (dict), deck ID (int), and points (int).
[docs]
class Card:
def __init__(self, colour, code, cost, deck_id, points):
self.colour = colour
self.code = code
self.cost = cost
self.deck_id = deck_id
self.points = points
def __str__(self):
gem_string = ""
for colour, number in self.cost.items():
gem_string += f'{", " if gem_string!="" else ""}{number} {colour}'
return f"Tier {self.deck_id+1} {self.colour} card worth {self.points} points and costing {gem_string}"
def __repr__(self):
return self.code
def __eq__(
self, other
): # Equal in the ways that matter: code is identical, and points haven't been tampered with.
return (
hasattr(other, "code")
and other.code == self.code
and self.points == other.points == CARDS[other.code][-1]
)
# Represents game as agents playing on a board with cards, gems, and nobles.
[docs]
class SplendorState(GameState):
def __init__(self, num_agents):
self.board = self.BoardState(num_agents)
self.agents = [self.AgentState(i) for i in range(num_agents)]
self.agent_to_move = 0
# def __repr__(self) -> str:
# return super().__repr__()
def __str__(self) -> str:
output = ""
output += str(self.board)
for agent_state in self.agents:
output += str(agent_state)
return output
[docs]
class BoardState:
def __init__(self, num_agents):
self.decks = [[], [], []]
self.dealt = [[None] * 4 for i in range(3)]
# All gem stacks start at (4,5,7) for games of (2,3,4) players respectively. Yellow seals always start at 5.
n = [4, 5, 7][num_agents - 2]
self.gems = {
"black": n,
"red": n,
"yellow": 5,
"green": n,
"blue": n,
"white": n,
}
# Deal out num_agents+1 of the 10 nobles at random. Nobles = (code, cost).
self.nobles = random.sample(NOBLES, k=num_agents + 1)
# Sort cards into three deck tiers. Deal four cards per tier. Decks are shuffled before each deal.
for code, (colour, cost, deck_id, points) in CARDS.items():
deck_id -= 1 # Deck IDs are read in as (1-3), but should be zero-indexed instead.
card = Card(colour, code, cost, deck_id, points)
self.decks[deck_id].append(card)
for deck in self.decks:
random.shuffle(deck)
for i in range(3):
for j in range(4):
self.dealt[i][j] = self.deal(i)
[docs]
def deal(self, deck_id):
if len(self.decks[deck_id]):
# random.shuffle(self.decks[deck_id])
return self.decks[deck_id].pop()
return None
[docs]
def dealt_list(self):
return [card for deck in self.dealt for card in deck if card]
def __str__(self) -> str:
output = ""
output += "\nAvailable Gems:\n"
output += str(self.gems)
output += "\nDealt Card List: \n"
for card in self.dealt_list():
output += "\t" + str(card) + "\n"
output += "\nNoble List \n"
output += str(self.nobles)
output += "\n"
return output
# def __repr__(self) -> str:
# return self.__str__
[docs]
class AgentState:
def __init__(self, _id):
self.id = _id
self.score = 0
self.gems = {c: 0 for c in COLOURS.values()}
self.cards = {c: [] for c in COLOURS.values()}
self.nobles = []
self.passed = False
self.agent_trace = AgentTrace(_id)
self.last_action = None
def __str__(self) -> str:
output = ""
output += "Agent (%d): \n" % (self.id)
output += "\tscore: %d,\n" % (self.score)
output += "\tgems: %s\n" % (self.gems)
output += "\tcards: %s\n" % (self.cards)
output += "\tnobles: %s.\n" % (self.nobles)
return output
# Implements game logic.
[docs]
class SplendorGameRule(GameRule):
def __init__(self, num_of_agent):
super().__init__(num_of_agent)
# No private information: agent states are available to other agents. While upcoming cards are random, the decks
# are still provided in the gamestate for agents to use if they want, since they are shuffled before each deal.
self.private_information = None
# # for now the idea is to see whether the action is one of the legal action
# def validAction(self, selected, all_legal_actions):
# # return utils.ValidAction(m, actions)
[docs]
def initialGameState(self):
return SplendorState(self.num_of_agent)
[docs]
def generatePredecessor(self, state, action, agent_id):
agent = state.agents[agent_id]
board = state.board
agent.last_action = action # Record last action such that other agents can make use of this information.
score = 0
if "card" in action:
card = action["card"]
if "collect" in action["type"] or action["type"] == "reserve":
# Increment board gem stacks by collected_gems. Decrement player gem stacks by collected_gems.
for colour, count in action["collected_gems"].items():
board.gems[colour] += count
agent.gems[colour] -= count
# Increment player gem stacks by returned_gems. Decrement board gem stacks by returned_gems.
for colour, count in action["returned_gems"].items():
agent.gems[colour] += count
board.gems[colour] -= count
if action["type"] == "reserve":
i, j = action["card_position"]
# Card drawn from deck, will be returned to it
bought_card = board.dealt[i][j]
if bought_card is not None:
board.decks[bought_card.deck_id].append(bought_card)
# Remove card from reserved cards
agent.cards["yellow"].remove(card)
board.dealt[i][j] = card
elif "buy" in action["type"]:
# Increment player gem stacks by returned_gems. Decrement board gem stacks by returned_gems.
for colour, count in action["returned_gems"].items():
agent.gems[colour] += count
board.gems[colour] -= count
# If buying one of the available cards on the board, set removed card slot to new dealt card.
# Since the board may have None cards (empty slots that cannot be filled), check cards first.
if "available" in action["type"]:
i, j = action["card_position"]
bought_card = board.dealt[i][j]
if bought_card is not None:
board.decks[bought_card.deck_id].append(bought_card)
board.dealt[i][j] = card
# Else, agent bought a reserved card. Return card to player's yellow stack.
else:
agent.cards["yellow"].append(card)
agent.cards[card.colour].remove(card)
score -= card.points
if action["noble"] is not None:
# Remove noble from board. Add noble to player's stack. Like cards, nobles aren't hashable due to possessing
# dictionaries (i.e. resource costs). Therefore, locate and delete the noble via unique code.
# Add noble's points to agent score.
board.nobles.append(action["noble"])
agent.nobles.remove(action["noble"])
score -= 3
# Log this turn's action and any resultant score. Return updated gamestate.
# agent.agent_trace.action_reward.remove((action, -score))
# Removing last one because I belive it is more reliable
agent.agent_trace.action_reward.pop()
agent.score += score # score is negative
agent.passed = action["type"] == "pass"
return state
[docs]
def generateSuccessor(self, state, action, agent_id):
agent, board = state.agents[agent_id], state.board
agent.last_action = action # Record last action such that other agents can make use of this information.
score = 0
if "card" in action:
card = action["card"]
if "collect" in action["type"] or action["type"] == "reserve":
# Decrement board gem stacks by collected_gems. Increment player gem stacks by collected_gems.
for colour, count in action["collected_gems"].items():
board.gems[colour] -= count
agent.gems[colour] += count
# Decrement player gem stacks by returned_gems. Increment board gem stacks by returned_gems.
for colour, count in action["returned_gems"].items():
agent.gems[colour] -= count
board.gems[colour] += count
if action["type"] == "reserve":
# Remove card from dealt cards by locating via unique code (cards aren't otherwise hashable).
# Since we want to retain the positioning of dealt cards, set removed card slot to new dealt card.
# Since the board may have None cards (empty slots that cannot be filled), check cards first.
# Add card to player's yellow stack.
for i in range(len(board.dealt[card.deck_id])):
if (
board.dealt[card.deck_id][i]
and board.dealt[card.deck_id][i].code == card.code
):
board.dealt[card.deck_id][i] = board.deal(card.deck_id)
agent.cards["yellow"].append(card)
break
elif "buy" in action["type"]:
# Decrement player gem stacks by returned_gems. Increment board gem stacks by returned_gems.
for colour, count in action["returned_gems"].items():
agent.gems[colour] -= count
board.gems[colour] += count
# If buying one of the available cards on the board, set removed card slot to new dealt card.
# Since the board may have None cards (empty slots that cannot be filled), check cards first.
if "available" in action["type"]:
for i in range(len(board.dealt[card.deck_id])):
if (
board.dealt[card.deck_id][i]
and board.dealt[card.deck_id][i].code == card.code
):
board.dealt[card.deck_id][i] = board.deal(card.deck_id)
break
# Else, agent is buying a reserved card. Remove card from player's yellow stack.
else:
for i in range(len(agent.cards["yellow"])):
if agent.cards["yellow"][i].code == card.code:
del agent.cards["yellow"][i]
break
# Add card to player's stack of matching colour, and increment agent's score accordingly.
agent.cards[card.colour].append(card)
score += card.points
if action["noble"]:
# Remove noble from board. Add noble to player's stack. Like cards, nobles aren't hashable due to possessing
# dictionaries (i.e. resource costs). Therefore, locate and delete the noble via unique code.
# Add noble's points to agent score.
for i in range(len(board.nobles)):
if board.nobles[i][0] == action["noble"][0]:
del board.nobles[i]
agent.nobles.append(action["noble"])
score += 3
break
# Log this turn's action and any resultant score. Return updated gamestate.
agent.agent_trace.action_reward.append((action, score))
agent.score += score
agent.passed = action["type"] == "pass"
return state
# Game ends if any agent possesses at least 15 points, and all agents have gone in this round. As a very rare edge
# case, poor playing agents might encounter a game where none are able to proceed. Game also ends in this case.
[docs]
def gameEnds(self):
deadlock = 0
for agent in self.current_game_state.agents:
deadlock += 1 if agent.passed else 0
if agent.score >= 15 and self.current_agent_index == 0:
return True
return deadlock == len(self.current_game_state.agents)
# Return final score for this agent. If victories are tied, tie-break on number of cards placed by incrementing points.
[docs]
def calScore(self, game_state, agent_id):
max_score = 0
details = []
bought_cards = lambda a: sum(
[len(cards) for colour, cards in a.cards.items() if colour != "yellow"]
)
for a in game_state.agents:
details.append((a.id, bought_cards(a), a.score))
max_score = max(a.score, max_score)
victors = [d for d in details if d[-1] == max_score]
if len(victors) > 1 and agent_id in [d[0] for d in victors]:
min_cards = min([d[1] for d in details])
if bought_cards(game_state.agents[agent_id]) == min_cards:
# Add a half point if this agent was a tied victor, and had the fewest cards.
return game_state.agents[agent_id].score + 0.5
return game_state.agents[agent_id].score
# Generate a list of gem combinations that can be returned, if agent exceeds limit with collected gems.
# Agents are disallowed from returning gems of the same colour as those they've just picked up. Since collected_gems
# is sampled exhaustively, this function simply needs to screen out colours in collected_gems, in order for agents
# to be given all collected/returned combinations permissible.
[docs]
def generate_return_combos(self, current_gems, collected_gems):
total_gem_count = sum(current_gems.values()) + sum(collected_gems.values())
if total_gem_count > 10:
return_combos = []
num_return = total_gem_count - 10
# Combine current and collected gems. Screen out gem colours that were just collected.
total_gems = {
i: current_gems.get(i, 0) + collected_gems.get(i, 0)
for i in set(current_gems)
}
total_gems = {
i[0]: i[1]
for i in total_gems.items()
if i[0] not in collected_gems.keys()
}.items()
# Form a total gems list (with elements == gem colours, and len == number of gems).
total_gems_list = []
for colour, count in total_gems:
for _ in range(count):
total_gems_list.append(colour)
# If, after screening, there aren't enough gems that can be returned, return an empty list, indicating that
# the collected_gems combination is not viable.
if len(total_gems_list) < num_return:
return []
# Else, find all valid combinations of gems to return.
for combo in set(itertools.combinations(total_gems_list, num_return)):
returned_gems = {c: 0 for c in COLOURS.values()}
for colour in combo:
returned_gems[colour] += 1
# Filter out colours with zero gems, and append.
return_combos.append(
dict({i for i in returned_gems.items() if i[-1] > 0})
)
return return_combos
return [
{}
] # If no gems need to be returned, return a list comprised of one empty combo.
# Checks to see whether an agent's purchased cards and collected gems can cover a given resource cost.
# If it can, return the combination of gems to be returned, if any. If it can't, return False.
[docs]
def resources_sufficient(self, agent, costs):
wild = agent.gems["yellow"]
return_combo = {c: 0 for c in COLOURS.values()}
for colour, cost in costs.items():
# If a shortfall is found, see if the difference can be made with wild/seal/yellow gems.
available = agent.gems[colour] + len(agent.cards[colour])
shortfall = max(cost - available, 0) # Shortfall shouldn't be negative.
wild -= shortfall
# If wilds are expended, the agent cannot make the purchase.
if wild < 0:
return False
# Else, increment return_combo accordingly. Note that the agent should never return gems if it can afford
# to pay using its card stacks, and should never return wilds if it can return coloured gems instead.
# Although there may be strategic instances where holding on to coloured gems is beneficial (by virtue of
# shorting players from resources), in this implementation, this edge case is not worth added complexity.
gem_cost = max(cost - len(agent.cards[colour]), 0) # Gems owed.
gem_shortfall = max(gem_cost - agent.gems[colour], 0) # Wilds required.
return_combo[colour] = (
gem_cost - gem_shortfall
) # Coloured gems to be returned.
return_combo["yellow"] += gem_shortfall # Wilds to be returned.
# Filter out unnecessary colours and return dict specifying combination of gems.
return dict({i for i in return_combo.items() if i[-1] > 0})
# Checks whether a particular noble is a candidate for visiting this agent.
[docs]
def noble_visit(self, agent, noble):
_, costs = noble
for colour, cost in costs.items():
if not len(agent.cards[colour]) >= cost:
return False
return True
[docs]
def getLegalActions(self, game_state, agent_id):
actions = []
agent, board = game_state.agents[agent_id], game_state.board
# A given turn consists of the following:
# 1. Collect gems (up to 3 different) OR
# Collect gems (2 same, if stack >=4) OR
# Reserve one of 12 available cards OR
# Buy one of 12 available cards OR
# Buy a previously reserved card.
# 2. Discard down to 10 gems if necessary.
# 3. Obtain a noble if requirements are met.
# Since the gamestate does not change during an agent's turn, all turn parts are able to be planned for at once.
# Action fields: {'type', 'collected_gems', 'returned_gems', 'card', 'noble'}
# Actions will always take the form of one of the following three templates:
# {'type': 'collect_diff'/'collect_same', 'collected_gems': {gem counts}, 'returned_gems': {gem counts}, 'noble': noble}
# {'type': 'reserve', 'card':card, 'collected_gems': {'yellow': 1/None}, 'returned_gems': {colour: 1/None}, 'noble': noble}
# {'type': 'buy_available'/'buy_reserve', 'card': card, 'returned_gems': {gem counts}, 'noble': noble}
# First, check if any nobles are waiting to visit from the last turn. Ensure each action to follow recognises
# this, and in the exceedingly rare case that there are multiple nobles waiting (meaning that, at the last turn,
# this agent had the choice of at least 3 nobles), multiply all generated actions by these nobles to allow the
# agent to choose again.
potential_nobles = []
for noble in board.nobles:
if self.noble_visit(agent, noble):
potential_nobles.append(noble)
if len(potential_nobles) == 0:
potential_nobles = [None]
# Generate actions (collect up to 3 different gems). Work out all legal combinations. Theoretical max is 10.
available_colours = [
colour
for colour, number in board.gems.items()
if colour != "yellow" and number > 0
]
num_holding_gem = sum(agent.gems.values())
if num_holding_gem <= 7:
min_comb_len = min(3, len(available_colours))
elif num_holding_gem == 8:
min_comb_len = min(2, len(available_colours))
else:
min_comb_len = min(1, len(available_colours))
for combo_length in range(min_comb_len, min(len(available_colours), 3) + 1):
for combo in itertools.combinations(available_colours, combo_length):
collected_gems = {colour: 1 for colour in combo}
# make sure there is no action that collect empty gem
if not collected_gems == {}:
# Find combos of gems to return, if any. Since the max to be returned can be 3, theoretical max
# combinations will be 51, and max actions generated by the end of this stage will be 510.
# Handling this branching factor properly will be crucial for agent performance.
# If return_combos comes back False, then taking these gems is invalid and won't be added.
return_combos = self.generate_return_combos(
agent.gems, collected_gems
)
for returned_gems in return_combos:
for noble in potential_nobles:
actions.append(
{
"type": "collect_diff",
"collected_gems": collected_gems,
"returned_gems": returned_gems,
"noble": noble,
}
)
# Generate actions (collect 2 identical gems). Theoretical max is 5.
available_colours = [
colour
for colour, number in board.gems.items()
if colour != "yellow" and number >= 4
]
for colour in available_colours:
collected_gems = {colour: 2}
# Like before, find combos to return, if any. Since the max to be returned is now 2, theoretical max
# combinations will be 21, and max actions generated here will be 105.
return_combos = self.generate_return_combos(agent.gems, collected_gems)
for returned_gems in return_combos:
for noble in potential_nobles:
actions.append(
{
"type": "collect_same",
"collected_gems": collected_gems,
"returned_gems": returned_gems,
"noble": noble,
}
)
# Generate actions (reserve card). Agent can reserve only if it possesses < 3 cards currently reserved.
# With a reservation, the agent will receive one seal (yellow), if there are any left. Reservations are stored
# and displayed under the agent's yellow stack, as they won't generate their true colour until fully purchased.
# There is a possible 12 cards to be reserved, and if the agent goes over limit, there are max 6 gem colours
# that can be returned, leading to a theoretical max of 72 actions here.
if len(agent.cards["yellow"]) < 3:
collected_gems = {"yellow": 1} if board.gems["yellow"] > 0 else {}
return_combos = self.generate_return_combos(agent.gems, collected_gems)
for returned_gems in return_combos:
for row, deck in enumerate(board.dealt):
for col, card in enumerate(deck):
if card:
for noble in potential_nobles:
actions.append(
{
"type": "reserve",
"card": card,
"card_position": (row, col),
"collected_gems": collected_gems,
"returned_gems": returned_gems,
"noble": noble,
}
)
# Generate actions (buy card). Agents can buy cards if they can cover its resource cost. Resources can come from
# an agent's gem and card stacks. Card stacks represent gem factories, or 'permanent gems'; if there are 2 blue
# cards already purchased, this acts like 2 extra blue gems to spend in a given turn. Gems are therefore only
# returned if the stack of that colour is insufficient to cover the cost.
# Agents are disallowed from purchasing > 7 cards of any one colour, for the purposes of a clean interface.
# This is not expected to affect gameplay, as there is essentially zero strategic reason to exceed this limit.
# Available cards consist of cards dealt onto the board, as well as cards previously reserved by this agent.
# There is a max 15 actions that can be generated here (15 possible cards to be bought: 12 dealt + 3 reserved).
# However, in the case that multiple nobles are made candidates for visiting with this move, this number will
# be multiplied accordingly. This however, is a rare event.
for row, deck in enumerate(board.dealt + [agent.cards["yellow"]]):
for col, card in enumerate(deck):
if not card or len(agent.cards[card.colour]) == 7:
continue
returned_gems = self.resources_sufficient(
agent, card.cost
) # Check if this card is affordable.
if (
type(returned_gems) == dict
): # If a dict was returned, this means the agent possesses sufficient resources.
# Check to see if the acquisition of a new card has meant new nobles becoming candidates to visit.
new_nobles = []
for noble in board.nobles:
agent_post_action = copy.deepcopy(agent)
# Give the card featured in this action to a copy of the agent.
agent_post_action.cards[card.colour].append(card)
# Use this copied agent to check whether this noble can visit.
if self.noble_visit(agent_post_action, noble):
new_nobles.append(
noble
) # If so, add noble to the new list.
if not new_nobles:
new_nobles = [None]
for noble in new_nobles:
actions.append(
{
"type": (
"buy_reserve"
if card in agent.cards["yellow"]
else "buy_available"
),
"card": card,
"card_position": (row, col),
"returned_gems": returned_gems,
"noble": noble,
}
)
# Return list of actions. If there are no actions (almost impossible), all this player can do is pass.
# A noble is still permitted to visit if conditions are met.
if not actions:
for noble in potential_nobles:
actions.append({"type": "pass", "noble": noble})
return actions
# END FILE -----------------------------------------------------------------------------------------------------------#