FirstAttack-CK / metadata /metadata.yaml

Update simplified__first_attack checkpoint metadata

c413f8e verified about 1 month ago

6.78 kB

	metadata_version: 2
	saved_at: '2026-05-22T19:23:59.249701+00:00'
	entrypoint: src.train_game
	game: simplified__first_attack
	algo: efficientzero
	seed: 0
	max_env_step: 150000
	exp_name: /home/molfetta/combinatorial_reasoning_post_training/models/main5_hf_bot_mode_recovery_50k_20260522/simplified__first_attack/attempt-01_260522_170112
	checkpoint_dir: /home/molfetta/combinatorial_reasoning_post_training/models/main5_hf_bot_mode_recovery_50k_20260522/simplified__first_attack/attempt-01_260522_170112/ckpt
	config_path: /home/molfetta/combinatorial_reasoning_post_training/data/supported_training/runs/main5_hf_bot_mode_recovery_50k_20260522/configs/simplified__first_attack.yaml
	cli_args:
	game: null
	config: /home/molfetta/combinatorial_reasoning_post_training/data/supported_training/runs/main5_hf_bot_mode_recovery_50k_20260522/configs/simplified__first_attack.yaml
	list_games: false
	algo: null
	num_simulations: null
	batch_size: null
	update_per_collect: null
	lr: null
	seed: null
	max_env_step: null
	num_res_blocks: null
	num_channels: null
	replay_buffer_size: null
	num_unroll_steps: null
	reanalyze_ratio: null
	battle_mode: null
	no_wandb: false
	cpu: false
	torch_num_threads: 4
	torch_num_interop_threads: 1
	no_checkpoints: false
	ckpt_dir: /home/molfetta/combinatorial_reasoning_post_training/models/main5_hf_bot_mode_recovery_50k_20260522/simplified__first_attack/attempt-01
	model_path: null
	ckpt_every_env_steps: null
	max_checkpoints_to_keep: null
	best_ckpt_strategy: null
	best_ckpt_ema_alpha: null
	best_ckpt_min_episodes: null
	eval_every_train_iters: null
	eval_every_env_steps: null
	eval_every_env_step_ratio: null
	eval_opponent_type: null
	fixed_bot_evaluator_type: null
	no_fixed_bot_seat_swap: false
	eval_opponent_checkpoint_path: null
	eval_opponent_checkpoint_selector: null
	eval_opponent_update_policy: null
	eval_opponent_num_simulations: null
	eval_opponent_episodes: null
	eval_opponent_env_num: null
	eval_opponent_promotion_threshold: null
	eval_opponent_fallback_to_env_bot: false
	wandb_project: crpt-simplified5-corrected-quality
	wandb_run_name: main5_hf_bot_mode_recovery_50k_20260522__simplified__first_attack__a01
	wandb_group: main5_hf_bot_mode_recovery_50k_20260522
	wandb_run_id: crpt-cb73f8ff7f8fa54600a2
	wandb_dir: /home/molfetta/combinatorial_reasoning_post_training/data/supported_training/runs/main5_hf_bot_mode_recovery_50k_20260522/wandb
	source_config:
	game: simplified__first_attack
	algo: efficientzero
	wandb_project: crpt
	wandb_enabled: true
	checkpoints:
	save: true
	every_env_steps: 10000
	max_checkpoints_to_keep: 3
	load_from: /home/molfetta/combinatorial_reasoning_post_training/models/main5_hf_bot_mode_200k_collector_tuned_20260522/simplified__first_attack/attempt-01_260522_142417/ckpt/envstep_100000.pth.tar
	evaluation:
	every_train_iters: null
	runtime_battle_mode: eval_mode
	opponent_regime: meaningful_fixed_bot
	opponent_impl: placement_constraint
	primary_metric: win_rate_vs_fixed_bot
	bot_strength_tier: moderate
	bot_deterministic: true
	requires_paired_audit: false
	meaningful_fixed_bot: true
	opponent_type: env_bot
	fixed_bot_evaluator:
	type: arena
	seat_swap: true
	every_env_steps: 10000
	env:
	battle_mode: play_with_bot_mode
	battle_mode_in_simulation_env: self_play_mode
	bot_action_type: rule
	prob_random_action_in_bot: 0.0
	collector_env_num: 32
	evaluator_env_num: 20
	n_evaluator_episode: 20
	extra_config:
	collector_bot_mode_seat_swap: true
	collector_bot_mode_live_seat_weights:
	'1': 0.25
	'2': 0.75
	stop_value: 2
	defaults:
	seed: 0
	num_simulations: 50
	batch_size: 256
	update_per_collect: 25
	learning_rate: 0.003
	replay_buffer_size: 50000
	discount_factor: 1
	game_segment_length: 5
	td_steps: 5
	reanalyze_ratio: 0.0
	num_unroll_steps: 5
	piecewise_decay_lr_scheduler: false
	max_env_step: 150000
	model:
	num_res_blocks: 1
	num_channels: 32
	model_info:
	policy_type: efficientzero
	model_class: EfficientZeroModel
	trainable_params: 6742822
	model_config:
	observation_shape:
	- 3
	- 6
	- 6
	action_space_size: 36
	image_channel: 3
	num_res_blocks: 1
	num_channels: 32
	num_res_blocks: 1
	num_channels: 32
	observation_shape:
	- 3
	- 6
	- 6
	action_space_size: 36
	image_channel: 3
	resolved_main_config:
	exp_name: /home/molfetta/combinatorial_reasoning_post_training/models/main5_hf_bot_mode_recovery_50k_20260522/simplified__first_attack/attempt-01_260522_170112
	env:
	battle_mode: play_with_bot_mode
	bot_action_type: rule
	channel_last: false
	collector_env_num: 32
	evaluator_env_num: 20
	n_evaluator_episode: 20
	manager:
	shared_memory: false
	battle_mode_in_simulation_env: self_play_mode
	prob_random_action_in_bot: 0.0
	collector_bot_mode_seat_swap: true
	collector_bot_mode_live_seat_weights:
	'1': 0.25
	'2': 0.75
	policy:
	model:
	observation_shape:
	- 3
	- 6
	- 6
	action_space_size: 36
	image_channel: 3
	num_res_blocks: 1
	num_channels: 32
	model_path: /home/molfetta/combinatorial_reasoning_post_training/models/main5_hf_bot_mode_200k_collector_tuned_20260522/simplified__first_attack/attempt-01_260522_142417/ckpt/envstep_100000.pth.tar
	cuda: true
	env_type: board_games
	action_type: varied_action_space
	game_segment_length: 5
	update_per_collect: 25
	batch_size: 256
	optim_type: Adam
	learning_rate: 0.003
	grad_clip_value: 0.5
	num_simulations: 50
	reanalyze_ratio: 0.0
	td_steps: 5
	discount_factor: 1
	n_episode: 32
	eval_freq: 150001
	replay_buffer_size: 50000
	collector_env_num: 32
	evaluator_env_num: 20
	use_wandb: true
	best_ckpt_strategy: raw
	best_ckpt_ema_alpha: 0.3
	best_ckpt_min_episodes: 20
	num_unroll_steps: 5
	battle_mode: play_with_bot_mode
	battle_mode_in_simulation_env: self_play_mode
	piecewise_decay_lr_scheduler: false
	mcts_ctree: true
	eval_opponent_type: env_bot
	fixed_bot_evaluator:
	type: arena
	seat_swap: true
	previous_best_checkpoint:
	path: null
	selector: best
	update_policy: on_new_best
	num_simulations: null
	n_evaluator_episode: null
	evaluator_env_num: null
	promotion_threshold: 0.0
	fallback_to_env_bot: false
	device: cuda
	resolved_create_config:
	env:
	type: simplified__first_attack
	import_names:
	- custom_games_simplified.simplified__first_attack.envs.first_attack_env
	env_manager:
	type: subprocess
	policy:
	type: efficientzero
	import_names:
	- lzero.policy.efficientzero