File size: 2,550 Bytes
f2dc91e
 
 
 
 
 
 
 
 
 
 
 
 
ba3fb02
f2dc91e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ba3fb02
f2dc91e
ba3fb02
 
f2dc91e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ba3fb02
 
f2dc91e
 
 
 
 
 
 
 
 
 
 
 
 
ba3fb02
f2dc91e
ba3fb02
f2dc91e
ba3fb02
f2dc91e
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
{
  "action_dim": 7,
  "action_model_type": "DiT-L",
  "consolidate_type": "tome",
  "dataset_name": "realpushmultit",
  "episode_instructions_file": null,
  "fusion_type": "gate",
  "future_action_window_size": 15,
  "group_size": 16,
  "hf_token": ".hf_token",
  "image_aug": false,
  "image_key": "img_third",
  "instruction": "push the T-shaped block to visit each T-shaped target region on the table without revisiting any",
  "is_resume": true,
  "lora": {
    "alpha": 48.0,
    "cog_cross_targets": [
      "q_proj",
      "k_proj",
      "v_proj"
    ],
    "dit_attn_targets": [
      "q",
      "v"
    ],
    "dropout": 0.05,
    "enabled": true,
    "llama_alpha": 16.0,
    "llama_r": 8,
    "llama_targets": [
      "q_proj",
      "v_proj"
    ],
    "lora_cog_gate": true,
    "lora_llama": true,
    "lora_vision": true,
    "r": 24,
    "vision_alpha": 16.0,
    "vision_r": 8,
    "vision_targets": [
      "qkv"
    ]
  },
  "mem_length": 16,
  "per_token_size": 256,
  "pretrained_checkpoint": "/workspace/diffusion_policy/runs/memoryvla_realpushmultit/memoryvla_realpushmultit_lora_bs64_v1/checkpoints/step-006000-epoch-03-loss=0.0703.pt",
  "repeated_diffusion_steps": 4,
  "resume_epoch": 3,
  "resume_step": 6000,
  "retrieval_layers": 2,
  "run_id": "memoryvla_realpushmultit_lora_bs64_v1",
  "run_id_note": null,
  "run_root_dir": "runs/memoryvla_realpushmultit",
  "save_interval": 1000,
  "seed": 42,
  "trackers": [
    "jsonl",
    "wandb"
  ],
  "update_fused": false,
  "use_ema": false,
  "use_timestep_pe": true,
  "val_ratio": 0.05,
  "vla": {
    "base_vlm": "prism-dinosiglip-224px+7b",
    "data_mix": "oxe_magic_soup_plus_minus",
    "enable_gradient_checkpointing": true,
    "enable_mixed_precision_training": true,
    "epochs": 100,
    "expected_world_size": 4,
    "freeze_llm_backbone": false,
    "freeze_vision_backbone": false,
    "global_batch_size": 256,
    "learning_rate": 0.0002,
    "lr_scheduler_type": "linear-warmup+cosine-decay",
    "max_grad_norm": 1.0,
    "max_steps": 10000,
    "per_device_batch_size": 64,
    "reduce_in_full_precision": true,
    "shuffle_buffer_size": 250000,
    "train_strategy": "fsdp-full-shard",
    "type": "prism-dinosiglip-224px+oxe+diffusion",
    "unfreeze_last_llm_layer": false,
    "vla_id": "prism-dinosiglip-224px+oxe+diffusion",
    "warmup_ratio": 0.05,
    "weight_decay": 0.0
  },
  "wandb_entity": "williamcao-uc-san-diego",
  "wandb_project": "memoryvla_realpushmultit_lora",
  "zarr_path": "data/real_push_multit/RealPushMultiT_320.zarr"
}