FirstAttack-CK / metadata /source_config.yaml
LorMolf's picture
Update simplified__first_attack checkpoint metadata
6ae1fd0 verified
Raw
History Blame Contribute Delete
1.5 kB
game: simplified__first_attack
algo: efficientzero
wandb_project: crpt
wandb_enabled: true
checkpoints:
save: true
every_env_steps: 10000
max_checkpoints_to_keep: 3
load_from: /home/molfetta/combinatorial_reasoning_post_training/models/main5_hf_bot_mode_200k_collector_tuned_20260522/simplified__first_attack/attempt-01_260522_142417/ckpt/envstep_100000.pth.tar
evaluation:
every_train_iters: null
runtime_battle_mode: eval_mode
opponent_regime: meaningful_fixed_bot
opponent_impl: placement_constraint
primary_metric: win_rate_vs_fixed_bot
bot_strength_tier: moderate
bot_deterministic: true
requires_paired_audit: false
meaningful_fixed_bot: true
opponent_type: env_bot
fixed_bot_evaluator:
type: arena
seat_swap: true
every_env_steps: 10000
env:
battle_mode: play_with_bot_mode
battle_mode_in_simulation_env: self_play_mode
bot_action_type: rule
prob_random_action_in_bot: 0.0
collector_env_num: 32
evaluator_env_num: 20
n_evaluator_episode: 20
extra_config:
collector_bot_mode_seat_swap: true
collector_bot_mode_live_seat_weights:
'1': 0.25
'2': 0.75
stop_value: 2
defaults:
seed: 0
num_simulations: 50
batch_size: 256
update_per_collect: 25
learning_rate: 0.003
replay_buffer_size: 50000
discount_factor: 1
game_segment_length: 5
td_steps: 5
reanalyze_ratio: 0.0
num_unroll_steps: 5
piecewise_decay_lr_scheduler: false
max_env_step: 150000
model:
num_res_blocks: 1
num_channels: 32