Add files using upload-large-folder tool

Browse files

Files changed (12) hide show

README.md +37 -6
checkpoints/step-001000-epoch-00-loss=0.1240.pt +2 -2
checkpoints/step-002000-epoch-01-loss=0.0893.pt +2 -2
checkpoints/step-003000-epoch-01-loss=0.0755.pt +2 -2
checkpoints/step-004000-epoch-02-loss=0.0635.pt +2 -2
checkpoints/step-005000-epoch-03-loss=0.0768.pt +2 -2
checkpoints/step-006000-epoch-03-loss=0.0703.pt +2 -2
config.json +9 -9
config.json.bak +91 -0
config.yaml +9 -9
memoryvla_realpushmultit_lora_bs64_v1.jsonl +9 -0
run-metrics.jsonl +1 -1

README.md CHANGED Viewed

@@ -54,13 +54,13 @@ restructure the loop around an outer epoch loop.
 ## Files
 ```
-config.json                              — full run config (resolved CLI + defaults)
 config.yaml                              — same, yaml flavor
 dataset_statistics.json                  — action mean/std over training split (REQUIRED for inference unnorm)
 run-metrics.jsonl                        — early run metadata
 memoryvla_realpushmultit_lora_bs64_v1.jsonl  — per-step train metrics
 checkpoints/
-  step-001000-epoch-00-loss=0.1240.pt    — 32 GB, FSDP full-shard consolidated, "only_trainable" save (LoRA + modules_to_save)
   step-002000-epoch-01-loss=0.0893.pt
   step-003000-epoch-01-loss=0.0755.pt
   step-004000-epoch-02-loss=0.0635.pt
@@ -68,10 +68,41 @@ checkpoints/
   step-006000-epoch-03-loss=0.0703.pt
 ```
-Each ckpt is ~32 GB; load via the MemoryVLA `load_vla(...)` path with the
-same prismatic base config and apply the same LoRA wrap before loading state
-dict. See `train_memoryvla_realpushmultit.py` for the resume code path
-(`--is_resume True --resume_step <step> --resume_epoch <epoch>`).
 ## Reproduce

 ## Files
 ```
+config.json                              — full run config (resolved CLI + defaults; base_vlm fixed to prism-dinosiglip-224px+7b)
 config.yaml                              — same, yaml flavor
 dataset_statistics.json                  — action mean/std over training split (REQUIRED for inference unnorm)
 run-metrics.jsonl                        — early run metadata
 memoryvla_realpushmultit_lora_bs64_v1.jsonl  — per-step train metrics
 checkpoints/
+  step-001000-epoch-00-loss=0.1240.pt    — 32 GB, merged: LoRA deltas folded into base weights, flat state_dict keys
   step-002000-epoch-01-loss=0.0893.pt
   step-003000-epoch-01-loss=0.0755.pt
   step-004000-epoch-02-loss=0.0635.pt
   step-006000-epoch-03-loss=0.0703.pt
 ```
+## Loading
+Each ckpt has been **merged** — LoRA adapter weights (PEFT LLaMA + SigLIP,
+our LoRALinear on DiT-L qkv / CogMem cross / GateFusion, custom MHA-LoRA on
+DiT per_attn) are folded into the corresponding base weights with the
+scaling factor `α/r` applied, then the wrap keys (`base_layer.weight`,
+`lora_A`, `lora_B`, `base_model.model.` prefix) are dropped. The resulting
+state-dict matches a fresh, non-LoRA-wrapped MemoryVLA model 1-for-1, so
+`load_vla(...)` loads cleanly with `strict=True` and rollout / inference
+needs no extra code:
+```python
+import sys, pathlib
+sys.path.insert(0, str(pathlib.Path("third_party/MemoryVLA").resolve()))
+from vla import load_vla
+vla = load_vla(
+    "checkpoints/step-006000-epoch-03-loss=0.0703.pt",
+    load_for_training=False,
+    action_model_type="DiT-L",
+    future_action_window_size=15,
+    past_action_window_size=0, action_dim=7,
+    mem_length=16, retrieval_layers=2, per_token_size=256,
+    fusion_type="gate", consolidate_type="tome",
+).to("cuda").to(torch.bfloat16).eval()
+```
+To **resume training** from one of these, set `--is_resume True
+--resume_step <step> --resume_epoch <epoch>`. `apply_memoryvla_lora` then
+wraps the model again with fresh (zero-initialised) adapters; the merged
+base carries the prior training's knowledge and new LoRA learns on top.
+The original unmerged ckpts are not preserved (the merge is exact and
+losslessly invertible only with the matching adapter shapes — see
+`scripts/merge_lora_ckpt.py` for the merge logic).
 ## Reproduce

checkpoints/step-001000-epoch-00-loss=0.1240.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c0b99d7d0d95961373f5a9b6ee59cbf183583f4aa57c9ecdd9e6f7ed8e5bb978
-size 33553162569

 version https://git-lfs.github.com/spec/v1
+oid sha256:dfed6137eb16709deb3f8196b7d4cbb69eb1c26cb1636afbc845c73f71fe8ecd
+size 33507489313

checkpoints/step-002000-epoch-01-loss=0.0893.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1b77029d5625dd6c30e6373c37e79cf022d7bc10a9412c421f694ec2ecd6e465
-size 33553162569

 version https://git-lfs.github.com/spec/v1
+oid sha256:23abc7e40600f004663d3c7da5f1f9be007fde2715bb8407362cfd3aa9808420
+size 33507489313

checkpoints/step-003000-epoch-01-loss=0.0755.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6d617e5730c5c5328edb3b2039ea6186e377a9d65aa913c7629fa76f4678e1e2
-size 33553162569

 version https://git-lfs.github.com/spec/v1
+oid sha256:3cbfd2777ccf3a1054c8d56d6386a64d650fd99326534ccfcecd8cc49883a2ac
+size 33507489313

checkpoints/step-004000-epoch-02-loss=0.0635.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:be42f8d4c70f14a4418b165d742d40616eac714d2e54b617a0874ebd83ac5f43
-size 33553162569

 version https://git-lfs.github.com/spec/v1
+oid sha256:14c7a257c63e5345798d8a73092308723937a604070bf7f70ba25f655baec773
+size 33507489313

checkpoints/step-005000-epoch-03-loss=0.0768.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:13a20ec24b09da7be97254f5f1c52c09e60955b86d2de0620b555da73006c68c
-size 33553162569

 version https://git-lfs.github.com/spec/v1
+oid sha256:fa7c773f90cade39d6b781fbed3d07fd21787fccb787c5302046a08227bb2f36
+size 33507489313

checkpoints/step-006000-epoch-03-loss=0.0703.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ce5e856e6303806a1f2fb69eba90200daa6df3313e4badb6a4df4357f09c93a0
-size 33553162569

 version https://git-lfs.github.com/spec/v1
+oid sha256:c4c8429d76d04c64e27aa7be7e42134763b2b446ec0ca1afc93e0700b3782705
+size 33507489313

config.json CHANGED Viewed

@@ -11,7 +11,7 @@
   "image_aug": false,
   "image_key": "img_third",
   "instruction": "push the T-shaped block to visit each T-shaped target region on the table without revisiting any",
-  "is_resume": false,
   "lora": {
     "alpha": 48.0,
     "cog_cross_targets": [
@@ -43,10 +43,10 @@
   },
   "mem_length": 16,
   "per_token_size": 256,
-  "pretrained_checkpoint": "/workspace/diffusion_policy/pretrained/openvla-7b-prismatic/checkpoints/step-295000-epoch-40-loss=0.2200.pt",
   "repeated_diffusion_steps": 4,
-  "resume_epoch": 0,
-  "resume_step": 0,
   "retrieval_layers": 2,
   "run_id": "memoryvla_realpushmultit_lora_bs64_v1",
   "run_id_note": null,
@@ -62,8 +62,8 @@
   "use_timestep_pe": true,
   "val_ratio": 0.05,
   "vla": {
-    "base_vlm": "siglip-224px+7b",
-    "data_mix": "bridge",
     "enable_gradient_checkpointing": true,
     "enable_mixed_precision_training": true,
     "epochs": 100,
@@ -77,11 +77,11 @@
     "max_steps": 10000,
     "per_device_batch_size": 64,
     "reduce_in_full_precision": true,
-    "shuffle_buffer_size": 256000,
     "train_strategy": "fsdp-full-shard",
-    "type": "siglip-224px+mx-bridge",
     "unfreeze_last_llm_layer": false,
-    "vla_id": "siglip-224px+mx-bridge",
     "warmup_ratio": 0.05,
     "weight_decay": 0.0
   },

   "image_aug": false,
   "image_key": "img_third",
   "instruction": "push the T-shaped block to visit each T-shaped target region on the table without revisiting any",
+  "is_resume": true,
   "lora": {
     "alpha": 48.0,
     "cog_cross_targets": [
   },
   "mem_length": 16,
   "per_token_size": 256,
+  "pretrained_checkpoint": "/workspace/diffusion_policy/runs/memoryvla_realpushmultit/memoryvla_realpushmultit_lora_bs64_v1/checkpoints/step-006000-epoch-03-loss=0.0703.pt",
   "repeated_diffusion_steps": 4,
+  "resume_epoch": 3,
+  "resume_step": 6000,
   "retrieval_layers": 2,
   "run_id": "memoryvla_realpushmultit_lora_bs64_v1",
   "run_id_note": null,
   "use_timestep_pe": true,
   "val_ratio": 0.05,
   "vla": {
+    "base_vlm": "prism-dinosiglip-224px+7b",
+    "data_mix": "oxe_magic_soup_plus_minus",
     "enable_gradient_checkpointing": true,
     "enable_mixed_precision_training": true,
     "epochs": 100,
     "max_steps": 10000,
     "per_device_batch_size": 64,
     "reduce_in_full_precision": true,
+    "shuffle_buffer_size": 250000,
     "train_strategy": "fsdp-full-shard",
+    "type": "prism-dinosiglip-224px+oxe+diffusion",
     "unfreeze_last_llm_layer": false,
+    "vla_id": "prism-dinosiglip-224px+oxe+diffusion",
     "warmup_ratio": 0.05,
     "weight_decay": 0.0
   },

config.json.bak ADDED Viewed

	@@ -0,0 +1,91 @@

+{
+  "action_dim": 7,
+  "action_model_type": "DiT-L",
+  "consolidate_type": "tome",
+  "dataset_name": "realpushmultit",
+  "episode_instructions_file": null,
+  "fusion_type": "gate",
+  "future_action_window_size": 15,
+  "group_size": 16,
+  "hf_token": ".hf_token",
+  "image_aug": false,
+  "image_key": "img_third",
+  "instruction": "push the T-shaped block to visit each T-shaped target region on the table without revisiting any",
+  "is_resume": true,
+  "lora": {
+    "alpha": 48.0,
+    "cog_cross_targets": [
+      "q_proj",
+      "k_proj",
+      "v_proj"
+    ],
+    "dit_attn_targets": [
+      "q",
+      "v"
+    ],
+    "dropout": 0.05,
+    "enabled": true,
+    "llama_alpha": 16.0,
+    "llama_r": 8,
+    "llama_targets": [
+      "q_proj",
+      "v_proj"
+    ],
+    "lora_cog_gate": true,
+    "lora_llama": true,
+    "lora_vision": true,
+    "r": 24,
+    "vision_alpha": 16.0,
+    "vision_r": 8,
+    "vision_targets": [
+      "qkv"
+    ]
+  },
+  "mem_length": 16,
+  "per_token_size": 256,
+  "pretrained_checkpoint": "/workspace/diffusion_policy/runs/memoryvla_realpushmultit/memoryvla_realpushmultit_lora_bs64_v1/checkpoints/step-006000-epoch-03-loss=0.0703.pt",
+  "repeated_diffusion_steps": 4,
+  "resume_epoch": 3,
+  "resume_step": 6000,
+  "retrieval_layers": 2,
+  "run_id": "memoryvla_realpushmultit_lora_bs64_v1",
+  "run_id_note": null,
+  "run_root_dir": "runs/memoryvla_realpushmultit",
+  "save_interval": 1000,
+  "seed": 42,
+  "trackers": [
+    "jsonl",
+    "wandb"
+  ],
+  "update_fused": false,
+  "use_ema": false,
+  "use_timestep_pe": true,
+  "val_ratio": 0.05,
+  "vla": {
+    "base_vlm": "siglip-224px+7b",
+    "data_mix": "bridge",
+    "enable_gradient_checkpointing": true,
+    "enable_mixed_precision_training": true,
+    "epochs": 100,
+    "expected_world_size": 4,
+    "freeze_llm_backbone": false,
+    "freeze_vision_backbone": false,
+    "global_batch_size": 256,
+    "learning_rate": 0.0002,
+    "lr_scheduler_type": "linear-warmup+cosine-decay",
+    "max_grad_norm": 1.0,
+    "max_steps": 10000,
+    "per_device_batch_size": 64,
+    "reduce_in_full_precision": true,
+    "shuffle_buffer_size": 256000,
+    "train_strategy": "fsdp-full-shard",
+    "type": "siglip-224px+mx-bridge",
+    "unfreeze_last_llm_layer": false,
+    "vla_id": "siglip-224px+mx-bridge",
+    "warmup_ratio": 0.05,
+    "weight_decay": 0.0
+  },
+  "wandb_entity": "williamcao-uc-san-diego",
+  "wandb_project": "memoryvla_realpushmultit_lora",
+  "zarr_path": "data/real_push_multit/RealPushMultiT_320.zarr"
+}

config.yaml CHANGED Viewed

@@ -11,7 +11,7 @@ image_aug: false
 image_key: img_third
 instruction: push the T-shaped block to visit each T-shaped target region on the table
   without revisiting any
-is_resume: false
 lora:
   alpha: 48.0
   cog_cross_targets:
@@ -38,10 +38,10 @@ lora:
   - qkv
 mem_length: 16
 per_token_size: 256
-pretrained_checkpoint: /workspace/diffusion_policy/pretrained/openvla-7b-prismatic/checkpoints/step-295000-epoch-40-loss=0.2200.pt
 repeated_diffusion_steps: 4
-resume_epoch: 0
-resume_step: 0
 retrieval_layers: 2
 run_id: memoryvla_realpushmultit_lora_bs64_v1
 run_id_note: null
@@ -56,8 +56,8 @@ use_ema: false
 use_timestep_pe: true
 val_ratio: 0.05
 vla:
-  base_vlm: siglip-224px+7b
-  data_mix: bridge
   enable_gradient_checkpointing: true
   enable_mixed_precision_training: true
   epochs: 100
@@ -71,11 +71,11 @@ vla:
   max_steps: 10000
   per_device_batch_size: 64
   reduce_in_full_precision: true
-  shuffle_buffer_size: 256000
   train_strategy: fsdp-full-shard
-  type: siglip-224px+mx-bridge
   unfreeze_last_llm_layer: false
-  vla_id: siglip-224px+mx-bridge
   warmup_ratio: 0.05
   weight_decay: 0.0
 wandb_entity: williamcao-uc-san-diego

 image_key: img_third
 instruction: push the T-shaped block to visit each T-shaped target region on the table
   without revisiting any
+is_resume: true
 lora:
   alpha: 48.0
   cog_cross_targets:
   - qkv
 mem_length: 16
 per_token_size: 256
+pretrained_checkpoint: /workspace/diffusion_policy/runs/memoryvla_realpushmultit/memoryvla_realpushmultit_lora_bs64_v1/checkpoints/step-006000-epoch-03-loss=0.0703.pt
 repeated_diffusion_steps: 4
+resume_epoch: 3
+resume_step: 6000
 retrieval_layers: 2
 run_id: memoryvla_realpushmultit_lora_bs64_v1
 run_id_note: null
 use_timestep_pe: true
 val_ratio: 0.05
 vla:
+  base_vlm: prism-dinosiglip-224px+7b
+  data_mix: oxe_magic_soup_plus_minus
   enable_gradient_checkpointing: true
   enable_mixed_precision_training: true
   epochs: 100
   max_steps: 10000
   per_device_batch_size: 64
   reduce_in_full_precision: true
+  shuffle_buffer_size: 250000
   train_strategy: fsdp-full-shard
+  type: prism-dinosiglip-224px+oxe+diffusion
   unfreeze_last_llm_layer: false
+  vla_id: prism-dinosiglip-224px+oxe+diffusion
   warmup_ratio: 0.05
   weight_decay: 0.0
 wandb_entity: williamcao-uc-san-diego

memoryvla_realpushmultit_lora_bs64_v1.jsonl CHANGED Viewed

@@ -6481,3 +6481,12 @@
 {"VLA Train/Epoch": 4, "VLA Train/Learning Rate": 6.040664401219998e-05, "VLA Train/Loss": 0.05334499105811119, "VLA Train/Loss (Raw)": 0.05334499105811119, "VLA Train/Step": 6481, "VLA Train/Step Time": 5.510432720184326}
 {"VLA Train/Epoch": 4, "VLA Train/Learning Rate": 6.0376279240174616e-05, "VLA Train/Loss": 0.04682043939828873, "VLA Train/Loss (Raw)": 0.04682043939828873, "VLA Train/Step": 6482, "VLA Train/Step Time": 6.023637056350708}
 {"VLA Train/Epoch": 4, "VLA Train/Learning Rate": 6.034591880133977e-05, "VLA Train/Loss": 0.08309707045555115, "VLA Train/Loss (Raw)": 0.08309707045555115, "VLA Train/Step": 6483, "VLA Train/Step Time": 2.9199516773223877}

 {"VLA Train/Epoch": 4, "VLA Train/Learning Rate": 6.040664401219998e-05, "VLA Train/Loss": 0.05334499105811119, "VLA Train/Loss (Raw)": 0.05334499105811119, "VLA Train/Step": 6481, "VLA Train/Step Time": 5.510432720184326}
 {"VLA Train/Epoch": 4, "VLA Train/Learning Rate": 6.0376279240174616e-05, "VLA Train/Loss": 0.04682043939828873, "VLA Train/Loss (Raw)": 0.04682043939828873, "VLA Train/Step": 6482, "VLA Train/Step Time": 6.023637056350708}
 {"VLA Train/Epoch": 4, "VLA Train/Learning Rate": 6.034591880133977e-05, "VLA Train/Loss": 0.08309707045555115, "VLA Train/Loss (Raw)": 0.08309707045555115, "VLA Train/Step": 6483, "VLA Train/Step Time": 2.9199516773223877}
+{"VLA Train/Epoch": 3, "VLA Train/Learning Rate": 4.0000000000000003e-07, "VLA Train/Loss": 0.06522951275110245, "VLA Train/Loss (Raw)": 0.06522951275110245, "VLA Train/Step": 6001, "VLA Train/Step Time": 6.436929702758789}
+{"VLA Train/Epoch": 3, "VLA Train/Learning Rate": 8.000000000000001e-07, "VLA Train/Loss": 0.06474851816892624, "VLA Train/Loss (Raw)": 0.06474851816892624, "VLA Train/Step": 6002, "VLA Train/Step Time": 5.487528562545776}
+{"VLA Train/Epoch": 3, "VLA Train/Learning Rate": 1.2000000000000002e-06, "VLA Train/Loss": 0.06895651668310165, "VLA Train/Loss (Raw)": 0.06895651668310165, "VLA Train/Step": 6003, "VLA Train/Step Time": 5.5521111488342285}
+{"VLA Train/Epoch": 3, "VLA Train/Learning Rate": 1.6000000000000001e-06, "VLA Train/Loss": 0.05894169583916664, "VLA Train/Loss (Raw)": 0.05894169583916664, "VLA Train/Step": 6004, "VLA Train/Step Time": 5.520312786102295}
+{"VLA Train/Epoch": 3, "VLA Train/Learning Rate": 2.0000000000000003e-06, "VLA Train/Loss": 0.09873355180025101, "VLA Train/Loss (Raw)": 0.09873355180025101, "VLA Train/Step": 6005, "VLA Train/Step Time": 5.502042055130005}
+{"VLA Train/Epoch": 3, "VLA Train/Learning Rate": 2.4000000000000003e-06, "VLA Train/Loss": 0.0857497826218605, "VLA Train/Loss (Raw)": 0.0857497826218605, "VLA Train/Step": 6006, "VLA Train/Step Time": 5.524176359176636}
+{"VLA Train/Epoch": 3, "VLA Train/Learning Rate": 2.8000000000000003e-06, "VLA Train/Loss": 0.07324730604887009, "VLA Train/Loss (Raw)": 0.07324730604887009, "VLA Train/Step": 6007, "VLA Train/Step Time": 5.498190879821777}
+{"VLA Train/Epoch": 3, "VLA Train/Learning Rate": 3.2000000000000003e-06, "VLA Train/Loss": 0.06469529122114182, "VLA Train/Loss (Raw)": 0.06469529122114182, "VLA Train/Step": 6008, "VLA Train/Step Time": 5.506431341171265}
+{"VLA Train/Epoch": 3, "VLA Train/Learning Rate": 3.6e-06, "VLA Train/Loss": 0.07000145316123962, "VLA Train/Loss (Raw)": 0.07000145316123962, "VLA Train/Step": 6009, "VLA Train/Step Time": 5.5216052532196045}

run-metrics.jsonl CHANGED Viewed

@@ -1 +1 @@

- {"hparams": {"action_dim": 7, "action_model_type": "DiT-L", "consolidate_type": "tome", "dataset_name": "realpushmultit", "episode_instructions_file": null, "fusion_type": "gate", "future_action_window_size": 15, "group_size": 16, "hf_token": ".hf_token", "image_aug": false, "image_key": "img_third", "instruction": "push the T-shaped block to visit each T-shaped target region on the table without revisiting any", "is_resume": ~~false~~, "lora": {"alpha": 48.0, "cog_cross_targets": ["q_proj", "k_proj", "v_proj"], "dit_attn_targets": ["q", "v"], "dropout": 0.05, "enabled": true, "llama_alpha": 16.0, "llama_r": 8, "llama_targets": ["q_proj", "v_proj"], "lora_cog_gate": true, "lora_llama": true, "lora_vision": true, "r": 24, "vision_alpha": 16.0, "vision_r": 8, "vision_targets": ["qkv"]}, "mem_length": 16, "per_token_size": 256, "pretrained_checkpoint": "/workspace/diffusion_policy/~~pretrained~~/~~openvla-7b-prismatic~~/checkpoints/step-~~295000~~-epoch-40-loss=0.~~2200~~.pt", "repeated_diffusion_steps": 4, "resume_epoch": 0, "resume_step": 0, "retrieval_layers": 2, "run_id": "memoryvla_realpushmultit_lora_bs64_v1", "run_id_note": null, "run_root_dir": "runs/memoryvla_realpushmultit", "save_interval": 1000, "seed": 42, "trackers": ["jsonl", "wandb"], "update_fused": false, "use_ema": false, "use_timestep_pe": true, "val_ratio": 0.05, "vla": {"base_vlm": "~~siglip~~-224px+7b", "data_mix": "~~bridge~~", "enable_gradient_checkpointing": true, "enable_mixed_precision_training": true, "epochs": 100, "expected_world_size": 4, "freeze_llm_backbone": false, "freeze_vision_backbone": false, "global_batch_size": 256, "learning_rate": 0.0002, "lr_scheduler_type": "linear-warmup+cosine-decay", "max_grad_norm": 1.0, "max_steps": 10000, "per_device_batch_size": 64, "reduce_in_full_precision": true, "shuffle_buffer_size": ~~256000~~, "train_strategy": "fsdp-full-shard", "type": "~~siglip~~-224px+~~mx-bridge~~", "unfreeze_last_llm_layer": false, "vla_id": "~~siglip~~-224px+~~mx-bridge~~", "warmup_ratio": 0.05, "weight_decay": 0.0}, "wandb_entity": "williamcao-uc-san-diego", "wandb_project": "memoryvla_realpushmultit_lora", "zarr_path": "data/real_push_multit/RealPushMultiT_320.zarr"}, "run_id": "memoryvla_realpushmultit_lora_bs64_v1"}

+ {"hparams": {"action_dim": 7, "action_model_type": "DiT-L", "consolidate_type": "tome", "dataset_name": "realpushmultit", "episode_instructions_file": null, "fusion_type": "gate", "future_action_window_size": 15, "group_size": 16, "hf_token": ".hf_token", "image_aug": false, "image_key": "img_third", "instruction": "push the T-shaped block to visit each T-shaped target region on the table without revisiting any", "is_resume": true, "lora": {"alpha": 48.0, "cog_cross_targets": ["q_proj", "k_proj", "v_proj"], "dit_attn_targets": ["q", "v"], "dropout": 0.05, "enabled": true, "llama_alpha": 16.0, "llama_r": 8, "llama_targets": ["q_proj", "v_proj"], "lora_cog_gate": true, "lora_llama": true, "lora_vision": true, "r": 24, "vision_alpha": 16.0, "vision_r": 8, "vision_targets": ["qkv"]}, "mem_length": 16, "per_token_size": 256, "pretrained_checkpoint": "/workspace/diffusion_policy/runs/memoryvla_realpushmultit/memoryvla_realpushmultit_lora_bs64_v1/checkpoints/step-006000-epoch-03-loss=0.0703.pt", "repeated_diffusion_steps": 4, "resume_epoch": 3, "resume_step": 6000, "retrieval_layers": 2, "run_id": "memoryvla_realpushmultit_lora_bs64_v1", "run_id_note": null, "run_root_dir": "runs/memoryvla_realpushmultit", "save_interval": 1000, "seed": 42, "trackers": ["jsonl", "wandb"], "update_fused": false, "use_ema": false, "use_timestep_pe": true, "val_ratio": 0.05, "vla": {"base_vlm": "prism-dinosiglip-224px+7b", "data_mix": "oxe_magic_soup_plus_minus", "enable_gradient_checkpointing": true, "enable_mixed_precision_training": true, "epochs": 100, "expected_world_size": 4, "freeze_llm_backbone": false, "freeze_vision_backbone": false, "global_batch_size": 256, "learning_rate": 0.0002, "lr_scheduler_type": "linear-warmup+cosine-decay", "max_grad_norm": 1.0, "max_steps": 10000, "per_device_batch_size": 64, "reduce_in_full_precision": true, "shuffle_buffer_size": 250000, "train_strategy": "fsdp-full-shard", "type": "prism-dinosiglip-224px+oxe+diffusion", "unfreeze_last_llm_layer": false, "vla_id": "prism-dinosiglip-224px+oxe+diffusion", "warmup_ratio": 0.05, "weight_decay": 0.0}, "wandb_entity": "williamcao-uc-san-diego", "wandb_project": "memoryvla_realpushmultit_lora", "zarr_path": "data/real_push_multit/RealPushMultiT_320.zarr"}, "run_id": "memoryvla_realpushmultit_lora_bs64_v1"}