| { |
| "architectures": [ |
| "RDPNet" |
| ], |
| "model_cfg": { |
| "checkpoint_folder": "data/checkpoints/20250619_rdp_train_new/ckpts", |
| "eval": { |
| "action": "descrete", |
| "ckpt_to_load": null, |
| "len_traj_act": 2, |
| "max_steps": 195, |
| "num_sample": 1, |
| "pm_threshold": 0.9, |
| "rotation_threshold": 0.01, |
| "sample": true, |
| "save_results": true, |
| "split": [ |
| "val_unseen" |
| ], |
| "start_eval_epoch": -1, |
| "step_interval": 80, |
| "stop_mode": "stop_progress", |
| "success_distance": 3.0, |
| "use_ckpt_config": false |
| }, |
| "il": { |
| "batch_size": 4, |
| "camera_name": "pano_camera_0", |
| "ckpt_to_load": "", |
| "dataset_3dgs_root_dir": "data/datasets/3dgs", |
| "dataset_grutopia10_root_dir": "data/datasets/grutopia10", |
| "dataset_r2r_root_dir": "data/datasets/R2R_VLNCE_v1-3_corrected", |
| "epochs": 50, |
| "filter_failure": { |
| "min_rgb_nums": 15, |
| "use": true |
| }, |
| "inflection_weight_coef": null, |
| "lmdb_features_dir": "data/sample_episodes/20250211_sample_origin/sample_data.lmdb", |
| "load_from_ckpt": false, |
| "load_from_pretrain": true, |
| "loss": { |
| "alpha": 0.0001, |
| "dist_scale": 1 |
| }, |
| "lr": 0.0001, |
| "num_workers": 8, |
| "report_to": "wandb", |
| "save_filter_frozen_weights": true, |
| "save_interval_epochs": 5, |
| "save_interval_steps": null, |
| "use_descrete_dataset": true, |
| "use_iw": null, |
| "warmup_ratio": 0.1, |
| "weight_decay": 0.0001 |
| }, |
| "local_rank": 0, |
| "log_dir": "data/checkpoints/20250619_rdp_train_new/logs", |
| "model": { |
| "ablate_depth": null, |
| "ablate_instruction": null, |
| "ablate_rgb": null, |
| "cross_modal_encoder": { |
| "hidden_size": 512, |
| "input_type": 3, |
| "load_model": false, |
| "num_attention_heads": 8, |
| "num_x_layers": 2, |
| "txt_to_img": true, |
| "txt_to_img_layer": 2 |
| }, |
| "depth_encoder": null, |
| "diffusion_policy": { |
| "action_stats": { |
| "max": [ |
| 0.25, |
| 0.25, |
| 0.27 |
| ], |
| "min": [ |
| -0.25, |
| -0.25, |
| -0.27 |
| ] |
| }, |
| "clip_sample": true, |
| "cls_free_guidance_scale": 1.5, |
| "cls_mask_method": "mask_token", |
| "cls_mask_ratio": 0.25, |
| "len_traj_pred": 8, |
| "metric_waypoint_spacing": 1, |
| "num_diffusion_iters": 20, |
| "pred_type": "epsilon", |
| "random_mask_instr": true, |
| "random_mask_rgb": true, |
| "scheduler": "DDPM", |
| "stop_weight": 1, |
| "transformer_encoding_size": 512, |
| "transformer_n_cond_layers": 1, |
| "transformer_n_layers": 3, |
| "transformer_p_drop_emb": 0.2, |
| "txt_len": 80, |
| "type": "transformer", |
| "use": true, |
| "use_cls_free_guidance": true, |
| "waypoint_spacing": 1 |
| }, |
| "distance_predictor": { |
| "normalize": false, |
| "use": false |
| }, |
| "eval": { |
| "action": "descrete", |
| "auto_remove": false, |
| "ckpt_to_load": null, |
| "distance_threshold": 1.5, |
| "episode_count": -1, |
| "len_traj_act": 4, |
| "load_eval_subset": true, |
| "max_len_traj_act": 8, |
| "max_steps": 195, |
| "min_displacement": 0.15, |
| "min_len_traj_act": 3, |
| "num_sample": 1, |
| "pm_threshold": 0.9, |
| "re_eval": false, |
| "rotation_threshold": 0.01, |
| "sample": null, |
| "save_results": true, |
| "sim_cfg_file": "vln/configs/sim_cfg_policy_h1_eval.yaml", |
| "split": [ |
| "val_unseen" |
| ], |
| "start_eval_epoch": -1, |
| "step_interval": 80, |
| "stop_mode": "stop_progress", |
| "stop_progress_threshold": 0.85, |
| "stop_x_threshold": 0.015, |
| "stop_y_threshold": 0.015, |
| "stop_yaw_threshold": 0.05, |
| "success_distance": 3.0, |
| "train_eval_interval": 100, |
| "use_ckpt_config": false, |
| "use_dynamic_len_traj_act": false, |
| "vln_cfg_file": "vln/configs/vln_cfg_policy_eval.yaml" |
| }, |
| "image_encoder": { |
| "depth": { |
| "backbone": "resnet50", |
| "bottleneck": "resnet", |
| "cnn_type": "VlnResnetDepthEncoder", |
| "ddppo_checkpoint": "data/ddppo-models/gibson-4plus-mp3d-train-val-test-resnet50.pth", |
| "feature_dim": 768, |
| "load_model": true, |
| "output_size": 128, |
| "projection_dim": 512, |
| "update_depth_encoder": false |
| }, |
| "dropout": 0.1, |
| "env_drop": 0.3, |
| "img_stack_nums": 4, |
| "rgb": { |
| "feature_dim": 768, |
| "img_mod": "multi_patches_avg_pooling", |
| "load_model": true, |
| "model_name": "clip-long", |
| "model_path": "data/pretrained/clip-long/longclip-B.pt", |
| "multi_patches_num": 5, |
| "projection_dim": 512, |
| "rgb_proj": false, |
| "update_rgb_encoder": false |
| }, |
| "use_env_drop": true, |
| "use_stack": false |
| }, |
| "imu_encoder": { |
| "encoding_size": 64, |
| "input_size": 3, |
| "to_local_coords": true, |
| "use": true |
| }, |
| "instruction_encoder": null, |
| "learn_angle": true, |
| "len_traj_act": 4, |
| "max_step": 200, |
| "normalize_rgb": null, |
| "policy_name": "RDP_Policy", |
| "prev_action_encoder": { |
| "encoding_size": 64, |
| "input_size": null, |
| "to_local_coords": null, |
| "type": "continuous", |
| "use": null |
| }, |
| "progress_monitor": { |
| "alpha": null, |
| "concat_state_txt": true, |
| "use": true |
| }, |
| "rgb_encoder": null, |
| "seq2seq": null, |
| "state_encoder": { |
| "dropout_rate": 0.2, |
| "hidden_size": 512, |
| "num_recurrent_layers": 1, |
| "rgb_depth_embed_method": "flat", |
| "rnn_type": "GRU", |
| "use_dropout": false |
| }, |
| "stop_progress_predictor": { |
| "concat_state_txt": true, |
| "loss_alpha": 10, |
| "type": "continuous", |
| "use": true |
| }, |
| "text_encoder": { |
| "ablate": null, |
| "embedding_size": 512, |
| "eot_token": 49407, |
| "final_state_only": null, |
| "hidden_size": 512, |
| "load_model": true, |
| "max_length": 248, |
| "model_name": "clip-long", |
| "model_path": "data/pretrained/clip-long/longclip-B.pt", |
| "num_l_layers": 6, |
| "pad_token": 0, |
| "sot_token": 49406, |
| "type": "clip-long", |
| "update_text_encoder": false, |
| "vocab_size": 50265 |
| }, |
| "use_iw": false |
| }, |
| "model_name": "rdp", |
| "name": "20250619_rdp_train_new", |
| "num_gpus": 4, |
| "output_dir": "data/checkpoints/20250619_rdp_train_new/ckpts", |
| "seed": 0, |
| "tensorboard_dir": "data/checkpoints/20250619_rdp_train_new/tensorboard", |
| "torch_gpu_id": 0, |
| "torch_gpu_ids": [ |
| 0, |
| 1, |
| 2, |
| 3 |
| ], |
| "world_size": 4 |
| }, |
| "model_type": "rdp", |
| "torch_dtype": "float32", |
| "transformers_version": "4.26.1" |
| } |
|
|