{ "architectures": [ "Seq2SeqNet" ], "model_cfg": { "checkpoint_folder": "data/checkpoints/20250619_seq2seq_train/ckpts", "eval": { "action": null, "ckpt_to_load": null, "len_traj_act": null, "max_steps": 195, "num_sample": null, "pm_threshold": null, "rotation_threshold": 0.01, "sample": false, "save_results": true, "split": [ "val_seen" ], "start_eval_epoch": -1, "step_interval": 50, "stop_mode": null, "success_distance": 3.0, "use_ckpt_config": false }, "il": { "batch_size": 2, "camera_name": "pano_camera_0", "ckpt_to_load": "", "dataset_3dgs_root_dir": "data/datasets/3dgs", "dataset_grutopia10_root_dir": "data/datasets/grutopia10", "dataset_r2r_root_dir": "data/datasets/R2R_VLNCE_v1-3_preprocessed", "epochs": 100, "filter_failure": { "min_rgb_nums": 15, "use": true }, "inflection_weight_coef": 3.2, "lmdb_features_dir": "data/sample_episodes/20250211_sample_origin/sample_data.lmdb", "lmdb_map_size": 1000000000000.0, "load_from_ckpt": false, "load_from_pretrain": true, "loss": { "alpha": 0.0001, "dist_scale": 1 }, "lr": 0.0001, "num_workers": 8, "report_to": "wandb", "save_filter_frozen_weights": false, "save_interval_epochs": 5, "save_interval_steps": null, "use_descrete_dataset": null, "use_iw": true, "warmup_ratio": 0.1, "weight_decay": 0.0001 }, "local_rank": 0, "log_dir": "data/checkpoints/20250619_seq2seq_train/logs", "model": { "ablate_depth": null, "ablate_instruction": null, "ablate_rgb": null, "cross_modal_encoder": null, "depth_encoder": { "backbone": "resnet50", "cnn_type": "VlnResnetDepthEncoder", "ddppo_checkpoint": "data/ddppo-models/gibson-4plus-mp3d-train-val-test-resnet50.pth", "output_size": 128, "trainable": false }, "diffusion_policy": null, "distance_predictor": null, "image_encoder": null, "imu_encoder": null, "instruction_encoder": { "bidirectional": false, "dataset_vocab": "data/datasets/R2R_VLNCE_v1-3_preprocessed/train/train.json.gz", "embedding_file": "data/datasets/R2R_VLNCE_v1-3_preprocessed/embeddings.json.gz", "embedding_size": 50, "final_state_only": true, "fine_tune_embeddings": false, "hidden_size": 128, "load_model": null, "max_length": null, "rnn_type": "LSTM", "sensor_uuid": "instruction", "use_pretrained_embeddings": true, "vocab_size": 2504 }, "learn_angle": null, "len_traj_act": 4, "max_step": 200, "normalize_rgb": null, "policy_name": "Seq2Seq_Policy", "prev_action_encoder": null, "progress_monitor": { "alpha": 1.0, "concat_state_txt": null, "use": true }, "rgb_encoder": { "cnn_type": "TorchVisionResNet50", "output_size": 256, "trainable": false }, "seq2seq": { "use_prev_action": false }, "state_encoder": { "dropout_rate": null, "hidden_size": 512, "num_recurrent_layers": 1, "rgb_depth_embed_method": null, "rnn_type": "GRU", "use_dropout": null }, "stop_progress_predictor": null, "text_encoder": null }, "model_name": "seq2seq", "name": "20250619_seq2seq_train", "num_gpus": 1, "output_dir": "data/checkpoints/20250619_seq2seq_train/ckpts", "seed": 0, "tensorboard_dir": "data/checkpoints/20250619_seq2seq_train/tensorboard", "torch_gpu_id": 0, "torch_gpu_ids": [ 0 ], "world_size": 1 }, "model_type": "seq2seq", "torch_dtype": "float32", "transformers_version": "4.26.1" }