OpenDILabCommunity
/

CartPole-v0-MuZero

+exp_config = {
+    'env': {
+        'manager': {
+            'episode_num': float("inf"),
+            'max_retry': 5,
+            'step_timeout': None,
+            'auto_reset': True,
+            'reset_timeout': None,
+            'retry_type': 'reset',
+            'retry_waiting_time': 0.1,
+            'shared_memory': False,
+            'copy_on_get': True,
+            'context': 'fork',
+            'wait_num': float("inf"),
+            'step_wait_timeout': None,
+            'connect_timeout': 60,
+            'reset_inplace': False,
+            'cfg_type': 'SyncSubprocessEnvManagerDict',
+            'type': 'subprocess'
+        },
+        'stop_value':
+        10000000000,
+        'n_evaluator_episode':
+        3,
+        'type':
+        'cartpole_lightzero',
+        'import_names':
+        ['zoo.classic_control.cartpole.envs.cartpole_lightzero_env'],
+        'env_id':
+        'CartPole-v0',
+        'continuous':
+        False,
+        'manually_discretization':
+        False,
+        'replay_path':
+        '/tmp/tmp4kdr3rf1/videos'
+    },
+    'policy': {
+        'model': {
+            'model_type': 'mlp',
+            'continuous_action_space': False,
+            'observation_shape': 4,
+            'self_supervised_learning_loss': True,
+            'categorical_distribution': True,
+            'image_channel': 1,
+            'frame_stack_num': 1,
+            'num_res_blocks': 1,
+            'num_channels': 64,
+            'support_scale': 300,
+            'bias': True,
+            'discrete_action_encoding_type': 'one_hot',
+            'res_connection_in_dynamics': True,
+            'norm_type': 'BN',
+            'action_space_size': 2,
+            'lstm_hidden_size': 128,
+            'latent_state_dim': 128
+        },
+        'learn': {
+            'learner': {
+                'train_iterations': 1000000000,
+                'dataloader': {
+                    'num_workers': 0
+                },
+                'log_policy': True,
+                'hook': {
+                    'load_ckpt_before_run': '',
+                    'log_show_after_iter': 100,
+                    'save_ckpt_after_iter': 10000,
+                    'save_ckpt_after_run': True
+                },
+                'cfg_type': 'BaseLearnerDict'
+            }
+        },
+        'collect': {
+            'collector': {
+                'deepcopy_obs': False,
+                'transform_obs': False,
+                'collect_print_freq': 100,
+                'cfg_type': 'SampleSerialCollectorDict',
+                'type': 'sample'
+            }
+        },
+        'eval': {
+            'evaluator': {
+                'eval_freq': 1000,
+                'render': {
+                    'render_freq': -1,
+                    'mode': 'train_iter'
+                },
+                'figure_path': None,
+                'cfg_type': 'InteractionSerialEvaluatorDict',
+                'stop_value': 10000000000,
+                'n_episode': 3
+            }
+        },
+        'other': {
+            'replay_buffer': {
+                'type': 'advanced',
+                'replay_buffer_size': 4096,
+                'max_use': float("inf"),
+                'max_staleness': float("inf"),
+                'alpha': 0.6,
+                'beta': 0.4,
+                'anneal_step': 100000,
+                'enable_track_used_data': False,
+                'deepcopy': False,
+                'thruput_controller': {
+                    'push_sample_rate_limit': {
+                        'max': float("inf"),
+                        'min': 0
+                    },
+                    'window_seconds': 30,
+                    'sample_min_limit_ratio': 1
+                },
+                'monitor': {
+                    'sampled_data_attr': {
+                        'average_range': 5,
+                        'print_freq': 200
+                    },
+                    'periodic_thruput': {
+                        'seconds': 60
+                    }
+                },
+                'cfg_type': 'AdvancedReplayBufferDict'
+            },
+            'commander': {
+                'cfg_type': 'BaseSerialCommanderDict'
+            }
+        },
+        'on_policy': False,
+        'cuda': True,
+        'multi_gpu': False,
+        'bp_update_sync': True,
+        'traj_len_inf': False,
+        'use_rnd_model': False,
+        'sampled_algo': False,
+        'gumbel_algo': False,
+        'mcts_ctree': True,
+        'collector_env_num': 8,
+        'evaluator_env_num': 3,
+        'env_type': 'not_board_games',
+        'battle_mode': 'play_with_bot_mode',
+        'monitor_extra_statistics': True,
+        'game_segment_length': 50,
+        'transform2string': False,
+        'gray_scale': False,
+        'use_augmentation': False,
+        'augmentation': ['shift', 'intensity'],
+        'ignore_done': False,
+        'update_per_collect': 100,
+        'model_update_ratio': 0.1,
+        'batch_size': 256,
+        'optim_type': 'Adam',
+        'learning_rate': 0.003,
+        'target_update_freq': 100,
+        'target_update_freq_for_intrinsic_reward': 1000,
+        'weight_decay': 0.0001,
+        'momentum': 0.9,
+        'grad_clip_value': 10,
+        'n_episode': 8,
+        'num_simulations': 25,
+        'discount_factor': 0.997,
+        'td_steps': 5,
+        'num_unroll_steps': 5,
+        'reward_loss_weight': 1,
+        'value_loss_weight': 0.25,
+        'policy_loss_weight': 1,
+        'policy_entropy_loss_weight': 0,
+        'ssl_loss_weight': 2,
+        'lr_piecewise_constant_decay': False,
+        'threshold_training_steps_for_final_lr': 50000,
+        'manual_temperature_decay': False,
+        'threshold_training_steps_for_final_temperature': 100000,
+        'fixed_temperature_value': 0.25,
+        'use_ture_chance_label_in_chance_encoder': False,
+        'use_priority': True,
+        'priority_prob_alpha': 0.6,
+        'priority_prob_beta': 0.4,
+        'root_dirichlet_alpha': 0.3,
+        'root_noise_weight': 0.25,
+        'random_collect_episode_num': 0,
+        'eps': {
+            'eps_greedy_exploration_in_collect': False,
+            'type': 'linear',
+            'start': 1.0,
+            'end': 0.05,
+            'decay': 100000
+        },
+        'cfg_type': 'MuZeroPolicyDict',
+        'type': 'muzero',
+        'import_names': ['lzero.policy.muzero'],
+        'reanalyze_ratio': 0,
+        'eval_freq': 200,
+        'replay_buffer_size': 1000000,
+        'device': 'cuda'
+    },
+    'exp_name': 'CartPole-v0-MuZero',
+    'seed': 0,
+    'wandb_logger': {
+        'gradient_logger': False,
+        'video_logger': False,
+        'plot_logger': False,
+        'action_logger': False,
+        'return_logger': False
+    }
+}