File size: 1,495 Bytes
6ae1fd0 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 | game: simplified__first_attack
algo: efficientzero
wandb_project: crpt
wandb_enabled: true
checkpoints:
save: true
every_env_steps: 10000
max_checkpoints_to_keep: 3
load_from: /home/molfetta/combinatorial_reasoning_post_training/models/main5_hf_bot_mode_200k_collector_tuned_20260522/simplified__first_attack/attempt-01_260522_142417/ckpt/envstep_100000.pth.tar
evaluation:
every_train_iters: null
runtime_battle_mode: eval_mode
opponent_regime: meaningful_fixed_bot
opponent_impl: placement_constraint
primary_metric: win_rate_vs_fixed_bot
bot_strength_tier: moderate
bot_deterministic: true
requires_paired_audit: false
meaningful_fixed_bot: true
opponent_type: env_bot
fixed_bot_evaluator:
type: arena
seat_swap: true
every_env_steps: 10000
env:
battle_mode: play_with_bot_mode
battle_mode_in_simulation_env: self_play_mode
bot_action_type: rule
prob_random_action_in_bot: 0.0
collector_env_num: 32
evaluator_env_num: 20
n_evaluator_episode: 20
extra_config:
collector_bot_mode_seat_swap: true
collector_bot_mode_live_seat_weights:
'1': 0.25
'2': 0.75
stop_value: 2
defaults:
seed: 0
num_simulations: 50
batch_size: 256
update_per_collect: 25
learning_rate: 0.003
replay_buffer_size: 50000
discount_factor: 1
game_segment_length: 5
td_steps: 5
reanalyze_ratio: 0.0
num_unroll_steps: 5
piecewise_decay_lr_scheduler: false
max_env_step: 150000
model:
num_res_blocks: 1
num_channels: 32
|