| exp_config = { |
| 'env': { |
| 'manager': { |
| 'episode_num': float("inf"), |
| 'max_retry': 1, |
| 'step_timeout': None, |
| 'auto_reset': True, |
| 'reset_timeout': None, |
| 'retry_type': 'reset', |
| 'retry_waiting_time': 0.1, |
| 'shared_memory': False, |
| 'copy_on_get': True, |
| 'context': 'fork', |
| 'wait_num': float("inf"), |
| 'step_wait_timeout': None, |
| 'connect_timeout': 60, |
| 'reset_inplace': False, |
| 'cfg_type': 'SyncSubprocessEnvManagerDict', |
| 'type': 'subprocess' |
| }, |
| 'stop_value': |
| 10000000000, |
| 'n_evaluator_episode': |
| 20, |
| 'env_id': |
| 'simplified__first_attack', |
| 'battle_mode': |
| 'play_with_bot_mode', |
| 'battle_mode_in_simulation_env': |
| 'self_play_mode', |
| 'bot_action_type': |
| 'rule', |
| 'agent_vs_human': |
| False, |
| 'prob_random_agent': |
| 0, |
| 'prob_expert_agent': |
| 0, |
| 'prob_random_action_in_bot': |
| 0.0, |
| 'collector_bot_mode_seat_swap': |
| True, |
| 'bot_mode_live_seat': |
| 1, |
| 'channel_last': |
| False, |
| 'scale': |
| True, |
| 'render_mode': |
| None, |
| 'replay_path': |
| None, |
| 'alphazero_mcts_ctree': |
| False, |
| 'cfg_type': |
| 'SimplifiedFirstAttackEnvDict', |
| 'type': |
| 'simplified__first_attack', |
| 'import_names': [ |
| 'custom_games_simplified.simplified__first_attack.envs.first_attack_env' |
| ], |
| 'collector_env_num': |
| 32, |
| 'evaluator_env_num': |
| 20, |
| 'collector_bot_mode_live_seat_weights': { |
| '1': 0.25, |
| '2': 0.75 |
| } |
| }, |
| 'policy': { |
| 'model': { |
| 'model_type': 'conv', |
| 'continuous_action_space': False, |
| 'observation_shape': (3, 6, 6), |
| 'self_supervised_learning_loss': True, |
| 'categorical_distribution': True, |
| 'image_channel': 3, |
| 'frame_stack_num': 1, |
| 'num_res_blocks': 1, |
| 'num_channels': 32, |
| 'reward_support_range': (-300.0, 301.0, 1.0), |
| 'value_support_range': (-300.0, 301.0, 1.0), |
| 'bias': True, |
| 'discrete_action_encoding_type': 'one_hot', |
| 'res_connection_in_dynamics': True, |
| 'norm_type': 'BN', |
| 'analysis_sim_norm': False, |
| 'analysis_dormant_ratio': False, |
| 'harmony_balance': False, |
| 'lstm_hidden_size': 512, |
| 'action_space_size': 36 |
| }, |
| 'learn': { |
| 'learner': { |
| 'train_iterations': 1000000000, |
| 'dataloader': { |
| 'num_workers': 0 |
| }, |
| 'log_policy': True, |
| 'hook': { |
| 'load_ckpt_before_run': '', |
| 'log_show_after_iter': 100, |
| 'save_ckpt_after_iter': 10000, |
| 'save_ckpt_after_run': True |
| }, |
| 'cfg_type': 'BaseLearnerDict' |
| }, |
| 'resume_training': False |
| }, |
| 'collect': { |
| 'collector': { |
| 'deepcopy_obs': False, |
| 'transform_obs': False, |
| 'collect_print_freq': 100, |
| 'cfg_type': 'SampleSerialCollectorDict', |
| 'type': 'sample' |
| } |
| }, |
| 'eval': { |
| 'evaluator': { |
| 'eval_freq': 1000, |
| 'render': { |
| 'render_freq': -1, |
| 'mode': 'train_iter' |
| }, |
| 'figure_path': None, |
| 'cfg_type': 'InteractionSerialEvaluatorDict', |
| 'stop_value': 10000000000, |
| 'n_episode': 20 |
| } |
| }, |
| 'other': { |
| 'replay_buffer': { |
| 'type': 'advanced', |
| 'replay_buffer_size': 4096, |
| 'max_use': float("inf"), |
| 'max_staleness': float("inf"), |
| 'alpha': 0.6, |
| 'beta': 0.4, |
| 'anneal_step': 100000, |
| 'enable_track_used_data': False, |
| 'deepcopy': False, |
| 'thruput_controller': { |
| 'push_sample_rate_limit': { |
| 'max': float("inf"), |
| 'min': 0 |
| }, |
| 'window_seconds': 30, |
| 'sample_min_limit_ratio': 1 |
| }, |
| 'monitor': { |
| 'sampled_data_attr': { |
| 'average_range': 5, |
| 'print_freq': 200 |
| }, |
| 'periodic_thruput': { |
| 'seconds': 60 |
| } |
| }, |
| 'cfg_type': 'AdvancedReplayBufferDict' |
| }, |
| 'commander': { |
| 'cfg_type': 'BaseSerialCommanderDict' |
| } |
| }, |
| 'on_policy': False, |
| 'cuda': True, |
| 'multi_gpu': False, |
| 'bp_update_sync': True, |
| 'traj_len_inf': False, |
| 'use_wandb': True, |
| 'use_rnd_model': False, |
| 'sampled_algo': False, |
| 'gumbel_algo': False, |
| 'mcts_ctree': True, |
| 'collector_env_num': 32, |
| 'evaluator_env_num': 20, |
| 'env_type': 'board_games', |
| 'action_type': 'varied_action_space', |
| 'battle_mode': 'play_with_bot_mode', |
| 'monitor_extra_statistics': True, |
| 'game_segment_length': 5, |
| 'eval_offline': False, |
| 'calculate_dormant_ratio': False, |
| 'analysis_sim_norm': False, |
| 'analysis_dormant_ratio': False, |
| 'transform2string': False, |
| 'gray_scale': False, |
| 'use_augmentation': False, |
| 'augmentation': ['shift', 'intensity'], |
| 'ignore_done': False, |
| 'update_per_collect': 25, |
| 'replay_ratio': 0.25, |
| 'batch_size': 256, |
| 'optim_type': 'Adam', |
| 'learning_rate': 0.003, |
| 'target_update_freq': 100, |
| 'target_update_freq_for_intrinsic_reward': 1000, |
| 'weight_decay': 0.0001, |
| 'momentum': 0.9, |
| 'grad_clip_value': 0.5, |
| 'n_episode': 32, |
| 'num_segments': 8, |
| 'num_simulations': 50, |
| 'discount_factor': 1, |
| 'td_steps': 5, |
| 'num_unroll_steps': 5, |
| 'reward_loss_weight': 1, |
| 'value_loss_weight': 0.25, |
| 'policy_loss_weight': 1, |
| 'policy_entropy_weight': 0, |
| 'ssl_loss_weight': 2, |
| 'piecewise_decay_lr_scheduler': False, |
| 'threshold_training_steps_for_final_lr': 50000, |
| 'manual_temperature_decay': False, |
| 'threshold_training_steps_for_final_temperature': 100000, |
| 'fixed_temperature_value': 0.25, |
| 'use_ture_chance_label_in_chance_encoder': False, |
| 'reanalyze_noise': True, |
| 'reuse_search': False, |
| 'collect_with_pure_policy': False, |
| 'use_priority': False, |
| 'priority_prob_alpha': 0.6, |
| 'priority_prob_beta': 0.4, |
| 'root_dirichlet_alpha': 0.3, |
| 'root_noise_weight': 0.25, |
| 'random_collect_episode_num': 0, |
| 'eps': { |
| 'eps_greedy_exploration_in_collect': False, |
| 'type': 'linear', |
| 'start': 1.0, |
| 'end': 0.05, |
| 'decay': 100000 |
| }, |
| 'cfg_type': 'EfficientZeroPolicyDict', |
| 'lstm_horizon_len': 5, |
| 'type': 'efficientzero', |
| 'import_names': ['lzero.policy.efficientzero'], |
| 'model_path': |
| '/home/molfetta/combinatorial_reasoning_post_training/models/main5_hf_bot_mode_200k_collector_tuned_20260522/simplified__first_attack/attempt-01_260522_142417/ckpt/envstep_100000.pth.tar', |
| 'reanalyze_ratio': 0.0, |
| 'eval_freq': 150001, |
| 'replay_buffer_size': 50000, |
| 'best_ckpt_strategy': 'raw', |
| 'best_ckpt_ema_alpha': 0.3, |
| 'best_ckpt_min_episodes': 20, |
| 'battle_mode_in_simulation_env': 'self_play_mode', |
| 'eval_opponent_type': 'env_bot', |
| 'fixed_bot_evaluator': { |
| 'type': 'arena', |
| 'seat_swap': True |
| }, |
| 'previous_best_checkpoint': { |
| 'path': None, |
| 'selector': 'best', |
| 'update_policy': 'on_new_best', |
| 'num_simulations': None, |
| 'n_evaluator_episode': None, |
| 'evaluator_env_num': None, |
| 'promotion_threshold': 0.0, |
| 'fallback_to_env_bot': False |
| }, |
| 'device': 'cuda' |
| }, |
| 'exp_name': |
| '/home/molfetta/combinatorial_reasoning_post_training/models/main5_hf_bot_mode_recovery_50k_20260522/simplified__first_attack/attempt-01_260522_170112', |
| 'seed': 0 |
| } |
|
|