| metadata_version: 2 |
| saved_at: '2026-05-22T19:23:59.249701+00:00' |
| entrypoint: src.train_game |
| game: simplified__first_attack |
| algo: efficientzero |
| seed: 0 |
| max_env_step: 150000 |
| exp_name: /home/molfetta/combinatorial_reasoning_post_training/models/main5_hf_bot_mode_recovery_50k_20260522/simplified__first_attack/attempt-01_260522_170112 |
| checkpoint_dir: /home/molfetta/combinatorial_reasoning_post_training/models/main5_hf_bot_mode_recovery_50k_20260522/simplified__first_attack/attempt-01_260522_170112/ckpt |
| config_path: /home/molfetta/combinatorial_reasoning_post_training/data/supported_training/runs/main5_hf_bot_mode_recovery_50k_20260522/configs/simplified__first_attack.yaml |
| cli_args: |
| game: null |
| config: /home/molfetta/combinatorial_reasoning_post_training/data/supported_training/runs/main5_hf_bot_mode_recovery_50k_20260522/configs/simplified__first_attack.yaml |
| list_games: false |
| algo: null |
| num_simulations: null |
| batch_size: null |
| update_per_collect: null |
| lr: null |
| seed: null |
| max_env_step: null |
| num_res_blocks: null |
| num_channels: null |
| replay_buffer_size: null |
| num_unroll_steps: null |
| reanalyze_ratio: null |
| battle_mode: null |
| no_wandb: false |
| cpu: false |
| torch_num_threads: 4 |
| torch_num_interop_threads: 1 |
| no_checkpoints: false |
| ckpt_dir: /home/molfetta/combinatorial_reasoning_post_training/models/main5_hf_bot_mode_recovery_50k_20260522/simplified__first_attack/attempt-01 |
| model_path: null |
| ckpt_every_env_steps: null |
| max_checkpoints_to_keep: null |
| best_ckpt_strategy: null |
| best_ckpt_ema_alpha: null |
| best_ckpt_min_episodes: null |
| eval_every_train_iters: null |
| eval_every_env_steps: null |
| eval_every_env_step_ratio: null |
| eval_opponent_type: null |
| fixed_bot_evaluator_type: null |
| no_fixed_bot_seat_swap: false |
| eval_opponent_checkpoint_path: null |
| eval_opponent_checkpoint_selector: null |
| eval_opponent_update_policy: null |
| eval_opponent_num_simulations: null |
| eval_opponent_episodes: null |
| eval_opponent_env_num: null |
| eval_opponent_promotion_threshold: null |
| eval_opponent_fallback_to_env_bot: false |
| wandb_project: crpt-simplified5-corrected-quality |
| wandb_run_name: main5_hf_bot_mode_recovery_50k_20260522__simplified__first_attack__a01 |
| wandb_group: main5_hf_bot_mode_recovery_50k_20260522 |
| wandb_run_id: crpt-cb73f8ff7f8fa54600a2 |
| wandb_dir: /home/molfetta/combinatorial_reasoning_post_training/data/supported_training/runs/main5_hf_bot_mode_recovery_50k_20260522/wandb |
| source_config: |
| game: simplified__first_attack |
| algo: efficientzero |
| wandb_project: crpt |
| wandb_enabled: true |
| checkpoints: |
| save: true |
| every_env_steps: 10000 |
| max_checkpoints_to_keep: 3 |
| load_from: /home/molfetta/combinatorial_reasoning_post_training/models/main5_hf_bot_mode_200k_collector_tuned_20260522/simplified__first_attack/attempt-01_260522_142417/ckpt/envstep_100000.pth.tar |
| evaluation: |
| every_train_iters: null |
| runtime_battle_mode: eval_mode |
| opponent_regime: meaningful_fixed_bot |
| opponent_impl: placement_constraint |
| primary_metric: win_rate_vs_fixed_bot |
| bot_strength_tier: moderate |
| bot_deterministic: true |
| requires_paired_audit: false |
| meaningful_fixed_bot: true |
| opponent_type: env_bot |
| fixed_bot_evaluator: |
| type: arena |
| seat_swap: true |
| every_env_steps: 10000 |
| env: |
| battle_mode: play_with_bot_mode |
| battle_mode_in_simulation_env: self_play_mode |
| bot_action_type: rule |
| prob_random_action_in_bot: 0.0 |
| collector_env_num: 32 |
| evaluator_env_num: 20 |
| n_evaluator_episode: 20 |
| extra_config: |
| collector_bot_mode_seat_swap: true |
| collector_bot_mode_live_seat_weights: |
| '1': 0.25 |
| '2': 0.75 |
| stop_value: 2 |
| defaults: |
| seed: 0 |
| num_simulations: 50 |
| batch_size: 256 |
| update_per_collect: 25 |
| learning_rate: 0.003 |
| replay_buffer_size: 50000 |
| discount_factor: 1 |
| game_segment_length: 5 |
| td_steps: 5 |
| reanalyze_ratio: 0.0 |
| num_unroll_steps: 5 |
| piecewise_decay_lr_scheduler: false |
| max_env_step: 150000 |
| model: |
| num_res_blocks: 1 |
| num_channels: 32 |
| model_info: |
| policy_type: efficientzero |
| model_class: EfficientZeroModel |
| trainable_params: 6742822 |
| model_config: |
| observation_shape: |
| - 3 |
| - 6 |
| - 6 |
| action_space_size: 36 |
| image_channel: 3 |
| num_res_blocks: 1 |
| num_channels: 32 |
| num_res_blocks: 1 |
| num_channels: 32 |
| observation_shape: |
| - 3 |
| - 6 |
| - 6 |
| action_space_size: 36 |
| image_channel: 3 |
| resolved_main_config: |
| exp_name: /home/molfetta/combinatorial_reasoning_post_training/models/main5_hf_bot_mode_recovery_50k_20260522/simplified__first_attack/attempt-01_260522_170112 |
| env: |
| battle_mode: play_with_bot_mode |
| bot_action_type: rule |
| channel_last: false |
| collector_env_num: 32 |
| evaluator_env_num: 20 |
| n_evaluator_episode: 20 |
| manager: |
| shared_memory: false |
| battle_mode_in_simulation_env: self_play_mode |
| prob_random_action_in_bot: 0.0 |
| collector_bot_mode_seat_swap: true |
| collector_bot_mode_live_seat_weights: |
| '1': 0.25 |
| '2': 0.75 |
| policy: |
| model: |
| observation_shape: |
| - 3 |
| - 6 |
| - 6 |
| action_space_size: 36 |
| image_channel: 3 |
| num_res_blocks: 1 |
| num_channels: 32 |
| model_path: /home/molfetta/combinatorial_reasoning_post_training/models/main5_hf_bot_mode_200k_collector_tuned_20260522/simplified__first_attack/attempt-01_260522_142417/ckpt/envstep_100000.pth.tar |
| cuda: true |
| env_type: board_games |
| action_type: varied_action_space |
| game_segment_length: 5 |
| update_per_collect: 25 |
| batch_size: 256 |
| optim_type: Adam |
| learning_rate: 0.003 |
| grad_clip_value: 0.5 |
| num_simulations: 50 |
| reanalyze_ratio: 0.0 |
| td_steps: 5 |
| discount_factor: 1 |
| n_episode: 32 |
| eval_freq: 150001 |
| replay_buffer_size: 50000 |
| collector_env_num: 32 |
| evaluator_env_num: 20 |
| use_wandb: true |
| best_ckpt_strategy: raw |
| best_ckpt_ema_alpha: 0.3 |
| best_ckpt_min_episodes: 20 |
| num_unroll_steps: 5 |
| battle_mode: play_with_bot_mode |
| battle_mode_in_simulation_env: self_play_mode |
| piecewise_decay_lr_scheduler: false |
| mcts_ctree: true |
| eval_opponent_type: env_bot |
| fixed_bot_evaluator: |
| type: arena |
| seat_swap: true |
| previous_best_checkpoint: |
| path: null |
| selector: best |
| update_policy: on_new_best |
| num_simulations: null |
| n_evaluator_episode: null |
| evaluator_env_num: null |
| promotion_threshold: 0.0 |
| fallback_to_env_bot: false |
| device: cuda |
| resolved_create_config: |
| env: |
| type: simplified__first_attack |
| import_names: |
| - custom_games_simplified.simplified__first_attack.envs.first_attack_env |
| env_manager: |
| type: subprocess |
| policy: |
| type: efficientzero |
| import_names: |
| - lzero.policy.efficientzero |
|
|