game: simplified__first_attack algo: efficientzero wandb_project: crpt wandb_enabled: true checkpoints: save: true every_env_steps: 10000 max_checkpoints_to_keep: 3 load_from: /home/molfetta/combinatorial_reasoning_post_training/models/main5_hf_bot_mode_200k_collector_tuned_20260522/simplified__first_attack/attempt-01_260522_142417/ckpt/envstep_100000.pth.tar evaluation: every_train_iters: null runtime_battle_mode: eval_mode opponent_regime: meaningful_fixed_bot opponent_impl: placement_constraint primary_metric: win_rate_vs_fixed_bot bot_strength_tier: moderate bot_deterministic: true requires_paired_audit: false meaningful_fixed_bot: true opponent_type: env_bot fixed_bot_evaluator: type: arena seat_swap: true every_env_steps: 10000 env: battle_mode: play_with_bot_mode battle_mode_in_simulation_env: self_play_mode bot_action_type: rule prob_random_action_in_bot: 0.0 collector_env_num: 32 evaluator_env_num: 20 n_evaluator_episode: 20 extra_config: collector_bot_mode_seat_swap: true collector_bot_mode_live_seat_weights: '1': 0.25 '2': 0.75 stop_value: 2 defaults: seed: 0 num_simulations: 50 batch_size: 256 update_per_collect: 25 learning_rate: 0.003 replay_buffer_size: 50000 discount_factor: 1 game_segment_length: 5 td_steps: 5 reanalyze_ratio: 0.0 num_unroll_steps: 5 piecewise_decay_lr_scheduler: false max_env_step: 150000 model: num_res_blocks: 1 num_channels: 32