metadata_version: 2
saved_at: '2026-05-22T19:23:59.249701+00:00'
entrypoint: src.train_game
game: simplified__first_attack
algo: efficientzero
seed: 0
max_env_step: 150000
exp_name: /home/molfetta/combinatorial_reasoning_post_training/models/main5_hf_bot_mode_recovery_50k_20260522/simplified__first_attack/attempt-01_260522_170112
checkpoint_dir: /home/molfetta/combinatorial_reasoning_post_training/models/main5_hf_bot_mode_recovery_50k_20260522/simplified__first_attack/attempt-01_260522_170112/ckpt
config_path: /home/molfetta/combinatorial_reasoning_post_training/data/supported_training/runs/main5_hf_bot_mode_recovery_50k_20260522/configs/simplified__first_attack.yaml
cli_args:
  game: null
  config: /home/molfetta/combinatorial_reasoning_post_training/data/supported_training/runs/main5_hf_bot_mode_recovery_50k_20260522/configs/simplified__first_attack.yaml
  list_games: false
  algo: null
  num_simulations: null
  batch_size: null
  update_per_collect: null
  lr: null
  seed: null
  max_env_step: null
  num_res_blocks: null
  num_channels: null
  replay_buffer_size: null
  num_unroll_steps: null
  reanalyze_ratio: null
  battle_mode: null
  no_wandb: false
  cpu: false
  torch_num_threads: 4
  torch_num_interop_threads: 1
  no_checkpoints: false
  ckpt_dir: /home/molfetta/combinatorial_reasoning_post_training/models/main5_hf_bot_mode_recovery_50k_20260522/simplified__first_attack/attempt-01
  model_path: null
  ckpt_every_env_steps: null
  max_checkpoints_to_keep: null
  best_ckpt_strategy: null
  best_ckpt_ema_alpha: null
  best_ckpt_min_episodes: null
  eval_every_train_iters: null
  eval_every_env_steps: null
  eval_every_env_step_ratio: null
  eval_opponent_type: null
  fixed_bot_evaluator_type: null
  no_fixed_bot_seat_swap: false
  eval_opponent_checkpoint_path: null
  eval_opponent_checkpoint_selector: null
  eval_opponent_update_policy: null
  eval_opponent_num_simulations: null
  eval_opponent_episodes: null
  eval_opponent_env_num: null
  eval_opponent_promotion_threshold: null
  eval_opponent_fallback_to_env_bot: false
  wandb_project: crpt-simplified5-corrected-quality
  wandb_run_name: main5_hf_bot_mode_recovery_50k_20260522__simplified__first_attack__a01
  wandb_group: main5_hf_bot_mode_recovery_50k_20260522
  wandb_run_id: crpt-cb73f8ff7f8fa54600a2
  wandb_dir: /home/molfetta/combinatorial_reasoning_post_training/data/supported_training/runs/main5_hf_bot_mode_recovery_50k_20260522/wandb
source_config:
  game: simplified__first_attack
  algo: efficientzero
  wandb_project: crpt
  wandb_enabled: true
  checkpoints:
    save: true
    every_env_steps: 10000
    max_checkpoints_to_keep: 3
    load_from: /home/molfetta/combinatorial_reasoning_post_training/models/main5_hf_bot_mode_200k_collector_tuned_20260522/simplified__first_attack/attempt-01_260522_142417/ckpt/envstep_100000.pth.tar
  evaluation:
    every_train_iters: null
    runtime_battle_mode: eval_mode
    opponent_regime: meaningful_fixed_bot
    opponent_impl: placement_constraint
    primary_metric: win_rate_vs_fixed_bot
    bot_strength_tier: moderate
    bot_deterministic: true
    requires_paired_audit: false
    meaningful_fixed_bot: true
    opponent_type: env_bot
    fixed_bot_evaluator:
      type: arena
      seat_swap: true
    every_env_steps: 10000
  env:
    battle_mode: play_with_bot_mode
    battle_mode_in_simulation_env: self_play_mode
    bot_action_type: rule
    prob_random_action_in_bot: 0.0
    collector_env_num: 32
    evaluator_env_num: 20
    n_evaluator_episode: 20
    extra_config:
      collector_bot_mode_seat_swap: true
      collector_bot_mode_live_seat_weights:
        '1': 0.25
        '2': 0.75
    stop_value: 2
  defaults:
    seed: 0
    num_simulations: 50
    batch_size: 256
    update_per_collect: 25
    learning_rate: 0.003
    replay_buffer_size: 50000
    discount_factor: 1
    game_segment_length: 5
    td_steps: 5
    reanalyze_ratio: 0.0
    num_unroll_steps: 5
    piecewise_decay_lr_scheduler: false
    max_env_step: 150000
  model:
    num_res_blocks: 1
    num_channels: 32
model_info:
  policy_type: efficientzero
  model_class: EfficientZeroModel
  trainable_params: 6742822
  model_config:
    observation_shape:
    - 3
    - 6
    - 6
    action_space_size: 36
    image_channel: 3
    num_res_blocks: 1
    num_channels: 32
  num_res_blocks: 1
  num_channels: 32
  observation_shape:
  - 3
  - 6
  - 6
  action_space_size: 36
  image_channel: 3
resolved_main_config:
  exp_name: /home/molfetta/combinatorial_reasoning_post_training/models/main5_hf_bot_mode_recovery_50k_20260522/simplified__first_attack/attempt-01_260522_170112
  env:
    battle_mode: play_with_bot_mode
    bot_action_type: rule
    channel_last: false
    collector_env_num: 32
    evaluator_env_num: 20
    n_evaluator_episode: 20
    manager:
      shared_memory: false
    battle_mode_in_simulation_env: self_play_mode
    prob_random_action_in_bot: 0.0
    collector_bot_mode_seat_swap: true
    collector_bot_mode_live_seat_weights:
      '1': 0.25
      '2': 0.75
  policy:
    model:
      observation_shape:
      - 3
      - 6
      - 6
      action_space_size: 36
      image_channel: 3
      num_res_blocks: 1
      num_channels: 32
    model_path: /home/molfetta/combinatorial_reasoning_post_training/models/main5_hf_bot_mode_200k_collector_tuned_20260522/simplified__first_attack/attempt-01_260522_142417/ckpt/envstep_100000.pth.tar
    cuda: true
    env_type: board_games
    action_type: varied_action_space
    game_segment_length: 5
    update_per_collect: 25
    batch_size: 256
    optim_type: Adam
    learning_rate: 0.003
    grad_clip_value: 0.5
    num_simulations: 50
    reanalyze_ratio: 0.0
    td_steps: 5
    discount_factor: 1
    n_episode: 32
    eval_freq: 150001
    replay_buffer_size: 50000
    collector_env_num: 32
    evaluator_env_num: 20
    use_wandb: true
    best_ckpt_strategy: raw
    best_ckpt_ema_alpha: 0.3
    best_ckpt_min_episodes: 20
    num_unroll_steps: 5
    battle_mode: play_with_bot_mode
    battle_mode_in_simulation_env: self_play_mode
    piecewise_decay_lr_scheduler: false
    mcts_ctree: true
    eval_opponent_type: env_bot
    fixed_bot_evaluator:
      type: arena
      seat_swap: true
    previous_best_checkpoint:
      path: null
      selector: best
      update_policy: on_new_best
      num_simulations: null
      n_evaluator_episode: null
      evaluator_env_num: null
      promotion_threshold: 0.0
      fallback_to_env_bot: false
    device: cuda
resolved_create_config:
  env:
    type: simplified__first_attack
    import_names:
    - custom_games_simplified.simplified__first_attack.envs.first_attack_env
  env_manager:
    type: subprocess
  policy:
    type: efficientzero
    import_names:
    - lzero.policy.efficientzero