{ "help": false, "algo": "APPO", "env": "challenge", "experiment": "default_experiment", "train_dir": "/net/tscratch/people/plgbartekcupial/mrunner_scratch/sf2_nethack/20_03-18_43-laughing_jang/2024-03-20-monk-appo-ks-t-baseline_983r_1/train_dir", "restart_behavior": "resume", "device": "gpu", "seed": 1, "num_policies": 1, "async_rl": true, "serial_mode": false, "batched_sampling": false, "num_batches_to_accumulate": 2, "worker_num_splits": 2, "policy_workers_per_policy": 1, "max_policy_lag": 1000, "num_workers": 16, "num_envs_per_worker": 32, "batch_size": 4096, "num_batches_per_epoch": 4, "num_epochs": 1, "rollout": 128, "recurrence": 128, "shuffle_minibatches": false, "warmup": 0, "gamma": 1.0, "reward_scale": 1.0, "reward_clip": 10.0, "value_bootstrap": false, "normalize_returns": true, "exploration_loss_coeff": 0.0, "value_loss_coeff": 1.0, "kl_loss_coeff": 0.0, "exploration_loss": "entropy", "gae_lambda": 1.0, "ppo_clip_ratio": 0.1, "ppo_clip_value": 1.0, "with_vtrace": false, "vtrace_rho": 1.0, "vtrace_c": 1.0, "optimizer": "adam", "adam_eps": 1e-07, "adam_beta1": 0.9, "adam_beta2": 0.999, "momentum": 0.9, "max_grad_norm": 4, "learning_rate": 0.0001, "lr_schedule": "linear_decay", "lr_schedule_kl_threshold": 0.008, "lr_adaptive_min": 1e-06, "lr_adaptive_max": 0.01, "obs_subtract_mean": 0.0, "obs_scale": 1.0, "normalize_input": false, "normalize_input_keys": null, "decorrelate_experience_max_seconds": 0, "decorrelate_envs_on_one_worker": false, "actor_worker_gpus": [], "set_workers_cpu_affinity": true, "force_envs_single_thread": false, "default_niceness": 0, "log_to_file": true, "experiment_summaries_interval": 50, "flush_summaries_interval": 30, "stats_avg": 100, "summaries_use_frameskip": true, "heartbeat_interval": 20, "heartbeat_reporting_interval": 180, "train_for_env_steps": 500000000, "train_for_seconds": 10000000000, "save_every_sec": 120, "keep_checkpoints": 2, "load_checkpoint_kind": "latest", "save_milestones_sec": -1, "save_milestones_ith": 25000000, "save_best_every_sec": 5, "save_best_metric": "reward", "save_best_after": 100000, "benchmark": false, "encoder_mlp_layers": [ 512, 512 ], "encoder_conv_architecture": "convnet_simple", "encoder_conv_mlp_layers": [ 512 ], "use_rnn": true, "rnn_size": 1738, "rnn_type": "lstm", "rnn_num_layers": 1, "decoder_mlp_layers": [], "nonlinearity": "relu", "policy_initialization": "orthogonal", "policy_init_gain": 1.0, "actor_critic_share_weights": true, "adaptive_stddev": false, "continuous_tanh_scale": 0.0, "initial_stddev": 1.0, "use_env_info_cache": false, "env_gpu_actions": false, "env_gpu_observations": true, "env_frameskip": 1, "env_framestack": 1, "pixel_format": "CHW", "use_record_episode_statistics": false, "episode_counter": false, "with_wandb": true, "wandb_user": "bartekcupial", "wandb_project": "sf2_nethack", "wandb_group": "gmum", "wandb_job_type": "SF", "wandb_tags": [], "with_pbt": false, "pbt_mix_policies_in_one_env": true, "pbt_period_env_steps": 5000000, "pbt_start_mutation": 20000000, "pbt_replace_fraction": 0.3, "pbt_mutation_rate": 0.15, "pbt_replace_reward_gap": 0.1, "pbt_replace_reward_gap_absolute": 1e-06, "pbt_optimize_gamma": false, "pbt_target_objective": "true_objective", "pbt_perturb_min": 1.1, "pbt_perturb_max": 1.5, "character": "mon-hum-neu-mal", "max_episode_steps": 100000, "penalty_step": 0.0, "penalty_time": 0.0, "fn_penalty_step": "constant", "savedir": null, "save_ttyrec_every": 0, "gameloaddir": null, "state_counter": null, "add_image_observation": true, "crop_dim": 18, "pixel_size": 6, "reward_shaping": false, "use_tty_only": true, "use_prev_action": true, "h_dim": 1738, "msg_hdim": 64, "color_edim": 16, "char_edim": 16, "use_crop": true, "use_crop_norm": true, "screen_kernel_size": 3, "no_max_pool": false, "screen_conv_blocks": 2, "blstats_hdim": 512, "fc_after_cnn_hdim": 512, "use_resnet": true, "use_dataset": false, "behavioral_clone": false, "data_path": "/nle/nld-aa/nle_data", "db_path": "/ttyrecs/ttyrecs.db", "dataset_name": "autoascend", "dataset_num_splits": 2, "dataset_warmup": 0, "dataset_rollout": 32, "dataset_batch_size": 1024, "dataset_num_workers": 8, "dataset_demigod": false, "dataset_highscore": false, "dataset_midscore": false, "dataset_deep": false, "dataset_shuffle": true, "reset_on_rollout_boundary": false, "exp_tags": "['2024_03_20_monk-APPO-KS-T-baseline']", "exp_point": "monk-APPO-KS-T", "group": "monk-APPO-KS-T", "use_pretrained_checkpoint": true, "model": "ScaledNet", "model_path": "/net/pr2/projects/plgrid/plgggmum_crl/bcupial/sf_checkpoints/amzn-AA-BC_pretrained", "supervised_loss_coeff": 0.0, "kickstarting_loss_coeff": 0.5, "distillation_loss_coeff": 0.0, "supervised_loss_decay": 1.0, "kickstarting_loss_decay": 0.99998, "distillation_loss_decay": 1.0, "min_supervised_loss_coeff": 0.0, "min_kickstarting_loss_coeff": 0.0, "min_distillation_loss_coeff": 0.0, "substitute_regularization_with_exploration": false, "exploration_coeff_on_supervised_loss_coeff": 0.0, "exploration_coeff_on_kickstarting_loss_coeff": 0.0, "exploration_coeff_on_distillation_loss_coeff": 0.0, "teacher_path": "/net/pr2/projects/plgrid/plgggmum_crl/bcupial/sf_checkpoints/amzn-AA-BC_pretrained", "run_teacher_hs": false, "add_stats_to_info": true, "capture_video": false, "capture_video_ith": 100, "freeze": { "encoder": 0 }, "unfreeze": {}, "freeze_batch_norm": false, "skip_train": 25000000, "target_batch_size": 128, "optim_step_every_ith": 1, "command_line": "--env=challenge --exp_tags=['2024_03_20_monk-APPO-KS-T-baseline'] --exp_point=monk-APPO-KS-T --train_for_env_steps=500000000 --group=monk-APPO-KS-T --character=mon-hum-neu-mal --num_workers=16 --num_envs_per_worker=32 --worker_num_splits=2 --rollout=128 --batch_size=4096 --async_rl=True --serial_mode=False --wandb_user=bartekcupial --wandb_project=sf2_nethack --wandb_group=gmum --with_wandb=True --use_pretrained_checkpoint=True --model_path=/net/pr2/projects/plgrid/plgggmum_crl/bcupial/sf_checkpoints/amzn-AA-BC_pretrained --kickstarting_loss_coeff=0.5 --teacher_path=/net/pr2/projects/plgrid/plgggmum_crl/bcupial/sf_checkpoints/amzn-AA-BC_pretrained --run_teacher_hs=False --use_prev_action=True --model=ScaledNet --use_resnet=True --learning_rate=0.0001 --rnn_size=1738 --h_dim=1738 --exploration_loss_coeff=0.0 --gamma=1.0 --skip_train=25000000 --lr_schedule=linear_decay --save_milestones_ith=25000000 --kickstarting_loss_decay=0.99998 --seed=1 --freeze={'encoder': 0} --num_batches_per_epoch=4 --optim_step_every_ith=1 --target_batch_size=128", "cli_args": { "env": "challenge", "seed": 1, "async_rl": true, "serial_mode": false, "worker_num_splits": 2, "num_workers": 16, "num_envs_per_worker": 32, "batch_size": 4096, "num_batches_per_epoch": 4, "rollout": 128, "gamma": 1.0, "exploration_loss_coeff": 0.0, "learning_rate": 0.0001, "lr_schedule": "linear_decay", "train_for_env_steps": 500000000, "save_milestones_ith": 25000000, "rnn_size": 1738, "with_wandb": true, "wandb_user": "bartekcupial", "wandb_project": "sf2_nethack", "wandb_group": "gmum", "character": "mon-hum-neu-mal", "use_prev_action": true, "h_dim": 1738, "use_resnet": true, "exp_tags": "['2024_03_20_monk-APPO-KS-T-baseline']", "exp_point": "monk-APPO-KS-T", "group": "monk-APPO-KS-T", "use_pretrained_checkpoint": true, "model": "ScaledNet", "model_path": "/net/pr2/projects/plgrid/plgggmum_crl/bcupial/sf_checkpoints/amzn-AA-BC_pretrained", "kickstarting_loss_coeff": 0.5, "kickstarting_loss_decay": 0.99998, "teacher_path": "/net/pr2/projects/plgrid/plgggmum_crl/bcupial/sf_checkpoints/amzn-AA-BC_pretrained", "run_teacher_hs": false, "freeze": { "encoder": 0 }, "skip_train": 25000000, "target_batch_size": 128, "optim_step_every_ith": 1 }, "git_hash": "unknown", "git_repo_name": "not a git repository", "wandb_unique_id": "default_experiment_20240321_101612_080200" }