metadata_version: 2 saved_at: '2026-05-22T19:23:59.249701+00:00' entrypoint: src.train_game game: simplified__first_attack algo: efficientzero seed: 0 max_env_step: 150000 exp_name: /home/molfetta/combinatorial_reasoning_post_training/models/main5_hf_bot_mode_recovery_50k_20260522/simplified__first_attack/attempt-01_260522_170112 checkpoint_dir: /home/molfetta/combinatorial_reasoning_post_training/models/main5_hf_bot_mode_recovery_50k_20260522/simplified__first_attack/attempt-01_260522_170112/ckpt config_path: /home/molfetta/combinatorial_reasoning_post_training/data/supported_training/runs/main5_hf_bot_mode_recovery_50k_20260522/configs/simplified__first_attack.yaml cli_args: game: null config: /home/molfetta/combinatorial_reasoning_post_training/data/supported_training/runs/main5_hf_bot_mode_recovery_50k_20260522/configs/simplified__first_attack.yaml list_games: false algo: null num_simulations: null batch_size: null update_per_collect: null lr: null seed: null max_env_step: null num_res_blocks: null num_channels: null replay_buffer_size: null num_unroll_steps: null reanalyze_ratio: null battle_mode: null no_wandb: false cpu: false torch_num_threads: 4 torch_num_interop_threads: 1 no_checkpoints: false ckpt_dir: /home/molfetta/combinatorial_reasoning_post_training/models/main5_hf_bot_mode_recovery_50k_20260522/simplified__first_attack/attempt-01 model_path: null ckpt_every_env_steps: null max_checkpoints_to_keep: null best_ckpt_strategy: null best_ckpt_ema_alpha: null best_ckpt_min_episodes: null eval_every_train_iters: null eval_every_env_steps: null eval_every_env_step_ratio: null eval_opponent_type: null fixed_bot_evaluator_type: null no_fixed_bot_seat_swap: false eval_opponent_checkpoint_path: null eval_opponent_checkpoint_selector: null eval_opponent_update_policy: null eval_opponent_num_simulations: null eval_opponent_episodes: null eval_opponent_env_num: null eval_opponent_promotion_threshold: null eval_opponent_fallback_to_env_bot: false wandb_project: crpt-simplified5-corrected-quality wandb_run_name: main5_hf_bot_mode_recovery_50k_20260522__simplified__first_attack__a01 wandb_group: main5_hf_bot_mode_recovery_50k_20260522 wandb_run_id: crpt-cb73f8ff7f8fa54600a2 wandb_dir: /home/molfetta/combinatorial_reasoning_post_training/data/supported_training/runs/main5_hf_bot_mode_recovery_50k_20260522/wandb source_config: game: simplified__first_attack algo: efficientzero wandb_project: crpt wandb_enabled: true checkpoints: save: true every_env_steps: 10000 max_checkpoints_to_keep: 3 load_from: /home/molfetta/combinatorial_reasoning_post_training/models/main5_hf_bot_mode_200k_collector_tuned_20260522/simplified__first_attack/attempt-01_260522_142417/ckpt/envstep_100000.pth.tar evaluation: every_train_iters: null runtime_battle_mode: eval_mode opponent_regime: meaningful_fixed_bot opponent_impl: placement_constraint primary_metric: win_rate_vs_fixed_bot bot_strength_tier: moderate bot_deterministic: true requires_paired_audit: false meaningful_fixed_bot: true opponent_type: env_bot fixed_bot_evaluator: type: arena seat_swap: true every_env_steps: 10000 env: battle_mode: play_with_bot_mode battle_mode_in_simulation_env: self_play_mode bot_action_type: rule prob_random_action_in_bot: 0.0 collector_env_num: 32 evaluator_env_num: 20 n_evaluator_episode: 20 extra_config: collector_bot_mode_seat_swap: true collector_bot_mode_live_seat_weights: '1': 0.25 '2': 0.75 stop_value: 2 defaults: seed: 0 num_simulations: 50 batch_size: 256 update_per_collect: 25 learning_rate: 0.003 replay_buffer_size: 50000 discount_factor: 1 game_segment_length: 5 td_steps: 5 reanalyze_ratio: 0.0 num_unroll_steps: 5 piecewise_decay_lr_scheduler: false max_env_step: 150000 model: num_res_blocks: 1 num_channels: 32 model_info: policy_type: efficientzero model_class: EfficientZeroModel trainable_params: 6742822 model_config: observation_shape: - 3 - 6 - 6 action_space_size: 36 image_channel: 3 num_res_blocks: 1 num_channels: 32 num_res_blocks: 1 num_channels: 32 observation_shape: - 3 - 6 - 6 action_space_size: 36 image_channel: 3 resolved_main_config: exp_name: /home/molfetta/combinatorial_reasoning_post_training/models/main5_hf_bot_mode_recovery_50k_20260522/simplified__first_attack/attempt-01_260522_170112 env: battle_mode: play_with_bot_mode bot_action_type: rule channel_last: false collector_env_num: 32 evaluator_env_num: 20 n_evaluator_episode: 20 manager: shared_memory: false battle_mode_in_simulation_env: self_play_mode prob_random_action_in_bot: 0.0 collector_bot_mode_seat_swap: true collector_bot_mode_live_seat_weights: '1': 0.25 '2': 0.75 policy: model: observation_shape: - 3 - 6 - 6 action_space_size: 36 image_channel: 3 num_res_blocks: 1 num_channels: 32 model_path: /home/molfetta/combinatorial_reasoning_post_training/models/main5_hf_bot_mode_200k_collector_tuned_20260522/simplified__first_attack/attempt-01_260522_142417/ckpt/envstep_100000.pth.tar cuda: true env_type: board_games action_type: varied_action_space game_segment_length: 5 update_per_collect: 25 batch_size: 256 optim_type: Adam learning_rate: 0.003 grad_clip_value: 0.5 num_simulations: 50 reanalyze_ratio: 0.0 td_steps: 5 discount_factor: 1 n_episode: 32 eval_freq: 150001 replay_buffer_size: 50000 collector_env_num: 32 evaluator_env_num: 20 use_wandb: true best_ckpt_strategy: raw best_ckpt_ema_alpha: 0.3 best_ckpt_min_episodes: 20 num_unroll_steps: 5 battle_mode: play_with_bot_mode battle_mode_in_simulation_env: self_play_mode piecewise_decay_lr_scheduler: false mcts_ctree: true eval_opponent_type: env_bot fixed_bot_evaluator: type: arena seat_swap: true previous_best_checkpoint: path: null selector: best update_policy: on_new_best num_simulations: null n_evaluator_episode: null evaluator_env_num: null promotion_threshold: 0.0 fallback_to_env_bot: false device: cuda resolved_create_config: env: type: simplified__first_attack import_names: - custom_games_simplified.simplified__first_attack.envs.first_attack_env env_manager: type: subprocess policy: type: efficientzero import_names: - lzero.policy.efficientzero