FirstAttack-CK / metadata /metadata.yaml
LorMolf's picture
Update simplified__first_attack checkpoint metadata
c413f8e verified
Raw
History Blame Contribute Delete
6.78 kB
metadata_version: 2
saved_at: '2026-05-22T19:23:59.249701+00:00'
entrypoint: src.train_game
game: simplified__first_attack
algo: efficientzero
seed: 0
max_env_step: 150000
exp_name: /home/molfetta/combinatorial_reasoning_post_training/models/main5_hf_bot_mode_recovery_50k_20260522/simplified__first_attack/attempt-01_260522_170112
checkpoint_dir: /home/molfetta/combinatorial_reasoning_post_training/models/main5_hf_bot_mode_recovery_50k_20260522/simplified__first_attack/attempt-01_260522_170112/ckpt
config_path: /home/molfetta/combinatorial_reasoning_post_training/data/supported_training/runs/main5_hf_bot_mode_recovery_50k_20260522/configs/simplified__first_attack.yaml
cli_args:
game: null
config: /home/molfetta/combinatorial_reasoning_post_training/data/supported_training/runs/main5_hf_bot_mode_recovery_50k_20260522/configs/simplified__first_attack.yaml
list_games: false
algo: null
num_simulations: null
batch_size: null
update_per_collect: null
lr: null
seed: null
max_env_step: null
num_res_blocks: null
num_channels: null
replay_buffer_size: null
num_unroll_steps: null
reanalyze_ratio: null
battle_mode: null
no_wandb: false
cpu: false
torch_num_threads: 4
torch_num_interop_threads: 1
no_checkpoints: false
ckpt_dir: /home/molfetta/combinatorial_reasoning_post_training/models/main5_hf_bot_mode_recovery_50k_20260522/simplified__first_attack/attempt-01
model_path: null
ckpt_every_env_steps: null
max_checkpoints_to_keep: null
best_ckpt_strategy: null
best_ckpt_ema_alpha: null
best_ckpt_min_episodes: null
eval_every_train_iters: null
eval_every_env_steps: null
eval_every_env_step_ratio: null
eval_opponent_type: null
fixed_bot_evaluator_type: null
no_fixed_bot_seat_swap: false
eval_opponent_checkpoint_path: null
eval_opponent_checkpoint_selector: null
eval_opponent_update_policy: null
eval_opponent_num_simulations: null
eval_opponent_episodes: null
eval_opponent_env_num: null
eval_opponent_promotion_threshold: null
eval_opponent_fallback_to_env_bot: false
wandb_project: crpt-simplified5-corrected-quality
wandb_run_name: main5_hf_bot_mode_recovery_50k_20260522__simplified__first_attack__a01
wandb_group: main5_hf_bot_mode_recovery_50k_20260522
wandb_run_id: crpt-cb73f8ff7f8fa54600a2
wandb_dir: /home/molfetta/combinatorial_reasoning_post_training/data/supported_training/runs/main5_hf_bot_mode_recovery_50k_20260522/wandb
source_config:
game: simplified__first_attack
algo: efficientzero
wandb_project: crpt
wandb_enabled: true
checkpoints:
save: true
every_env_steps: 10000
max_checkpoints_to_keep: 3
load_from: /home/molfetta/combinatorial_reasoning_post_training/models/main5_hf_bot_mode_200k_collector_tuned_20260522/simplified__first_attack/attempt-01_260522_142417/ckpt/envstep_100000.pth.tar
evaluation:
every_train_iters: null
runtime_battle_mode: eval_mode
opponent_regime: meaningful_fixed_bot
opponent_impl: placement_constraint
primary_metric: win_rate_vs_fixed_bot
bot_strength_tier: moderate
bot_deterministic: true
requires_paired_audit: false
meaningful_fixed_bot: true
opponent_type: env_bot
fixed_bot_evaluator:
type: arena
seat_swap: true
every_env_steps: 10000
env:
battle_mode: play_with_bot_mode
battle_mode_in_simulation_env: self_play_mode
bot_action_type: rule
prob_random_action_in_bot: 0.0
collector_env_num: 32
evaluator_env_num: 20
n_evaluator_episode: 20
extra_config:
collector_bot_mode_seat_swap: true
collector_bot_mode_live_seat_weights:
'1': 0.25
'2': 0.75
stop_value: 2
defaults:
seed: 0
num_simulations: 50
batch_size: 256
update_per_collect: 25
learning_rate: 0.003
replay_buffer_size: 50000
discount_factor: 1
game_segment_length: 5
td_steps: 5
reanalyze_ratio: 0.0
num_unroll_steps: 5
piecewise_decay_lr_scheduler: false
max_env_step: 150000
model:
num_res_blocks: 1
num_channels: 32
model_info:
policy_type: efficientzero
model_class: EfficientZeroModel
trainable_params: 6742822
model_config:
observation_shape:
- 3
- 6
- 6
action_space_size: 36
image_channel: 3
num_res_blocks: 1
num_channels: 32
num_res_blocks: 1
num_channels: 32
observation_shape:
- 3
- 6
- 6
action_space_size: 36
image_channel: 3
resolved_main_config:
exp_name: /home/molfetta/combinatorial_reasoning_post_training/models/main5_hf_bot_mode_recovery_50k_20260522/simplified__first_attack/attempt-01_260522_170112
env:
battle_mode: play_with_bot_mode
bot_action_type: rule
channel_last: false
collector_env_num: 32
evaluator_env_num: 20
n_evaluator_episode: 20
manager:
shared_memory: false
battle_mode_in_simulation_env: self_play_mode
prob_random_action_in_bot: 0.0
collector_bot_mode_seat_swap: true
collector_bot_mode_live_seat_weights:
'1': 0.25
'2': 0.75
policy:
model:
observation_shape:
- 3
- 6
- 6
action_space_size: 36
image_channel: 3
num_res_blocks: 1
num_channels: 32
model_path: /home/molfetta/combinatorial_reasoning_post_training/models/main5_hf_bot_mode_200k_collector_tuned_20260522/simplified__first_attack/attempt-01_260522_142417/ckpt/envstep_100000.pth.tar
cuda: true
env_type: board_games
action_type: varied_action_space
game_segment_length: 5
update_per_collect: 25
batch_size: 256
optim_type: Adam
learning_rate: 0.003
grad_clip_value: 0.5
num_simulations: 50
reanalyze_ratio: 0.0
td_steps: 5
discount_factor: 1
n_episode: 32
eval_freq: 150001
replay_buffer_size: 50000
collector_env_num: 32
evaluator_env_num: 20
use_wandb: true
best_ckpt_strategy: raw
best_ckpt_ema_alpha: 0.3
best_ckpt_min_episodes: 20
num_unroll_steps: 5
battle_mode: play_with_bot_mode
battle_mode_in_simulation_env: self_play_mode
piecewise_decay_lr_scheduler: false
mcts_ctree: true
eval_opponent_type: env_bot
fixed_bot_evaluator:
type: arena
seat_swap: true
previous_best_checkpoint:
path: null
selector: best
update_policy: on_new_best
num_simulations: null
n_evaluator_episode: null
evaluator_env_num: null
promotion_threshold: 0.0
fallback_to_env_bot: false
device: cuda
resolved_create_config:
env:
type: simplified__first_attack
import_names:
- custom_games_simplified.simplified__first_attack.envs.first_attack_env
env_manager:
type: subprocess
policy:
type: efficientzero
import_names:
- lzero.policy.efficientzero