| game: simplified__first_attack | |
| algo: efficientzero | |
| wandb_project: crpt | |
| wandb_enabled: true | |
| checkpoints: | |
| save: true | |
| every_env_steps: 10000 | |
| max_checkpoints_to_keep: 3 | |
| load_from: /home/molfetta/combinatorial_reasoning_post_training/models/main5_hf_bot_mode_200k_collector_tuned_20260522/simplified__first_attack/attempt-01_260522_142417/ckpt/envstep_100000.pth.tar | |
| evaluation: | |
| every_train_iters: null | |
| runtime_battle_mode: eval_mode | |
| opponent_regime: meaningful_fixed_bot | |
| opponent_impl: placement_constraint | |
| primary_metric: win_rate_vs_fixed_bot | |
| bot_strength_tier: moderate | |
| bot_deterministic: true | |
| requires_paired_audit: false | |
| meaningful_fixed_bot: true | |
| opponent_type: env_bot | |
| fixed_bot_evaluator: | |
| type: arena | |
| seat_swap: true | |
| every_env_steps: 10000 | |
| env: | |
| battle_mode: play_with_bot_mode | |
| battle_mode_in_simulation_env: self_play_mode | |
| bot_action_type: rule | |
| prob_random_action_in_bot: 0.0 | |
| collector_env_num: 32 | |
| evaluator_env_num: 20 | |
| n_evaluator_episode: 20 | |
| extra_config: | |
| collector_bot_mode_seat_swap: true | |
| collector_bot_mode_live_seat_weights: | |
| '1': 0.25 | |
| '2': 0.75 | |
| stop_value: 2 | |
| defaults: | |
| seed: 0 | |
| num_simulations: 50 | |
| batch_size: 256 | |
| update_per_collect: 25 | |
| learning_rate: 0.003 | |
| replay_buffer_size: 50000 | |
| discount_factor: 1 | |
| game_segment_length: 5 | |
| td_steps: 5 | |
| reanalyze_ratio: 0.0 | |
| num_unroll_steps: 5 | |
| piecewise_decay_lr_scheduler: false | |
| max_env_step: 150000 | |
| model: | |
| num_res_blocks: 1 | |
| num_channels: 32 | |