Source code for splendor.agents.our_agents.minmax

"""
Implementation of an agent that selects the first legal action.
"""

import operator
import random
from typing import override

import numpy as np

from splendor.splendor.constants import WINNING_SCORE_TRESHOLD
from splendor.splendor.splendor_model import SplendorGameRule, SplendorState
from splendor.splendor.types import ActionType
from splendor.template import Agent

# This agent supports only a game of 2 players
EXPECTED_AMOUNT_OF_PLAYERS = 2
DEPTH = 2
GEMS_AMOUNT_THRESHOLD = 8



[docs]
class MiniMaxAgent(Agent):
    """
    A Minimax agent, utilizing the zero-sum property of the game,
    there is only a single winner in each game, for determining which
    action to play.
    """

    # pylint: disable=too-few-public-methods


[docs]
    @override
    def SelectAction(
        self,
        actions: list[ActionType],
        game_state: SplendorState,
        game_rule: SplendorGameRule,
    ) -> ActionType:
        assert len(game_state.agents) == EXPECTED_AMOUNT_OF_PLAYERS
        selected_action = self._select_action_recursion(game_state, game_rule, DEPTH)[0]
        assert selected_action is not None
        return selected_action


    def _select_action_recursion(  # noqa: PLR0913,PLR0917
        self,
        game_state: SplendorState,
        game_rule: SplendorGameRule,
        depth: int,
        is_maximizing: bool = True,
        alpha: float = -np.inf,
        beta: float = np.inf,
    ) -> tuple[ActionType | None, float]:
        # pylint: disable=too-many-arguments,too-many-positional-arguments
        if depth == 0:
            return None, self._evaluation_function(game_state)
        agent_id = self.id if is_maximizing else 1 - self.id
        actions = game_rule.getLegalActions(game_state, agent_id)
        random.shuffle(actions)
        actions.sort(key=operator.itemgetter("type"))
        assert len(actions) != 0

        best_action = None
        best_value = -np.inf if is_maximizing else np.inf
        for action in actions:
            next_state = game_rule.generateSuccessor(game_state, action, agent_id)
            _, action_value = self._select_action_recursion(
                next_state, game_rule, depth - 1, not is_maximizing, alpha, beta
            )
            # generateSuccessor alternates the game_state inplace,
            # that's why we need to call generatePredecessor to revert
            # it (even though we do not need its output)
            _ = game_rule.generatePredecessor(game_state, action, agent_id)

            if is_maximizing:
                if action_value > best_value:
                    best_value = action_value
                    best_action = action
                alpha = max(alpha, best_value)
                if beta <= alpha:
                    break
            else:
                if action_value < best_value:
                    best_value = action_value
                    best_action = action
                beta = min(beta, best_value)
                if beta <= alpha:
                    break

        return best_action, best_value

    def _evaluation_function(self, state: SplendorState) -> float:
        # pylint: disable=too-many-locals
        agent_state = state.agents[self.id]
        score_factor = 2
        cards_factor = 0.7
        gems_factor = 0.1
        gems_var_factor = -0.2
        color_cost_factor = 0.1
        reward = 0

        max_score = max(agent.score for agent in state.agents)
        if max_score >= WINNING_SCORE_TRESHOLD:
            reward = 99999 + max_score
            if max_score > agent_state.score:
                reward *= -1
            return reward
        if sum(agent_state.gems.values()) >= GEMS_AMOUNT_THRESHOLD:
            gems_factor = -0.7
        gems_var = np.var(list(agent_state.gems.values()))

        for card in state.board.dealt_list() + agent_state.cards["yellow"]:
            relevant_to_nobles = 0
            for _, noble_cost in state.board.nobles:
                if (
                    card.colour in noble_cost
                    and len(agent_state.cards[card.colour]) < noble_cost[card.colour]
                ):
                    # Card is relevant for nobles, increase its weight
                    relevant_to_nobles += 1

            for color, color_cost in card.cost.items():
                reward -= abs(
                    (
                        color_cost
                        - (agent_state.gems[color] + len(agent_state.cards[color]))
                    )
                    * color_cost_factor
                    * (card.points + 1 + relevant_to_nobles * 0.5)
                )

        return (
            reward
            + agent_state.score * score_factor
            + len(agent_state.cards) * cards_factor
            + sum(agent_state.gems.values()) * gems_factor
            + gems_var * gems_var_factor
        )



myAgent = MiniMaxAgent  # pylint: disable=invalid-name