Source code for splendor.agents.our_agents.ppo.arguments_parsing

from argparse import ArgumentParser
from dataclasses import dataclass
from pathlib import Path

from splendor.version import get_version

from splendor.agents.generic.random import myAgent as RandomAgent
from splendor.agents.our_agents.minmax import myAgent as MinMaxAgent

from .ppo_rnn.gru.network import PPO_GRU
from .ppo_rnn.gru.ppo_agent import DEFAULT_SAVED_PPO_GRU_PATH
from .network import PPO
from .ppo_agent import DEFAULT_SAVED_PPO_PATH
from .ppo_base import PPOBaseFactory
from .constants import (
    SEED,
    LEARNING_RATE,
    WEIGHT_DECAY,
)


[docs] @dataclass class NeuralNetArch: name: str ppo_factory: PPOBaseFactory is_recurrent: bool default_saved_weights: Path
OPPONENTS_AGENTS = { "random": [RandomAgent(0)], "minimax": [MinMaxAgent(0)], } DEFAULT_OPPONENT = "random" DEFAULT_TEST_OPPONENT = "minimax" OPPONENTS_CHOICES = OPPONENTS_AGENTS.keys() NN_ARCHITECTURES = { "mlp": NeuralNetArch("ppo_mlp", PPO, False, DEFAULT_SAVED_PPO_PATH), "gru": NeuralNetArch("ppo_gru", PPO_GRU, True, DEFAULT_SAVED_PPO_GRU_PATH), } NN_ARCHITECTURES_CHOICES = NN_ARCHITECTURES.keys() DEFAULT_ARCHITECTURE = "mlp" WORKING_DIR = Path().absolute()
[docs] def parse_args(): """ Parse command-line arguments. """ parser = ArgumentParser( prog="ppo", description="Train a PPO agent.", ) parser.add_argument("--version", action="version", version=get_version()) parser.add_argument( "-l", "--learning-rate", default=LEARNING_RATE, type=float, help="The learning rate to use during training with gradient descent", ) parser.add_argument( "-d", "--weight-decay", default=WEIGHT_DECAY, type=float, help="The weight decay (L2 regularization) to use during training with gradient descent", ) parser.add_argument( "-w", "--working-dir", default=WORKING_DIR, type=Path, help="Path to directory to work in (will create a directory with " "current timestamp for each run)", ) parser.add_argument( "-s", "--seed", default=SEED, type=int, help="Seed to set for numpy's, torch's and random's random number generators.", ) parser.add_argument( "-t", "--transfer-learning", action="store_true", help="Learn from previosly learned model, i.e. trasfer learning from previos training sessions", ) parser.add_argument( "--saved-weights", default=None, type=Path, help="Path to the weights to start from a new learning session (ignored if not in transfer-learning mode)", ) parser.add_argument( "-o", "--opponent", type=str, default=DEFAULT_OPPONENT, choices=OPPONENTS_CHOICES, help="Against whom the PPO should train", ) parser.add_argument( "--test-opponent", type=str, default=DEFAULT_TEST_OPPONENT, choices=OPPONENTS_CHOICES, help="Against whom the PPO should be evaluated", ) parser.add_argument( "--device", default="cuda", type=str, choices=("cuda", "cpu", "mps"), help="On which device to do heavy mathematical computation", ) parser.add_argument( "-a", "--architecture", type=str, default=DEFAULT_ARCHITECTURE, choices=NN_ARCHITECTURES_CHOICES, help="What type of architecture of the neural network should be used", ) options = parser.parse_args() return options