trs07170 commited on
Commit
a9c85c5
·
verified ·
1 Parent(s): 7fed591

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ wandb/run-20250822_175544-mg58khw0/run-mg58khw0.wandb filter=lfs diff=lfs merge=lfs -text
bl_multiview_depth_set_table.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
checkpoints/step-004000-epoch-05-loss=0.2617.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:473480ba488abbfeebbca7147215571138c18985d2a92c13bb3b5e59604e59fd
3
+ size 4093057552
checkpoints/step-007160-epoch-10-loss=0.1436.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f801b6b80c5e6f235af6daf003b8b1a20811b6037367ccdec70f133fb56253b5
3
+ size 4093057552
config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "data_root_dir": "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/.new_maniskill_data",
3
+ "depth": false,
4
+ "global_pose": false,
5
+ "hf_token": ".hf_token",
6
+ "image_aug": false,
7
+ "is_grasped": false,
8
+ "is_resume": false,
9
+ "model_type": "my_vla_qwen",
10
+ "pretrained_checkpoint": "Stanford-ILIAD/prism-qwen25-extra-dinosiglip-224px-0_5b",
11
+ "qpos": false,
12
+ "resume_epoch": null,
13
+ "resume_step": null,
14
+ "run_id": "bl_multiview_depth_set_table",
15
+ "run_id_note": null,
16
+ "run_root_dir": "myvla_exp",
17
+ "save_interval": 1000,
18
+ "seed": 7,
19
+ "segmentation": false,
20
+ "trackers": [
21
+ "jsonl",
22
+ "wandb"
23
+ ],
24
+ "vla": {
25
+ "action_chunk_size": 8,
26
+ "action_tokenizer": "extra_action_tokenizer",
27
+ "base_vlm": "prism-qwen25-extra-dinosiglip-224px+0_5b",
28
+ "compress_history": false,
29
+ "data_mix": "bridge",
30
+ "enable_gradient_checkpointing": true,
31
+ "enable_mixed_precision_training": true,
32
+ "epochs": 10,
33
+ "expected_world_size": 8,
34
+ "freeze_llm_backbone": false,
35
+ "freeze_vision_backbone": true,
36
+ "global_batch_size": 512,
37
+ "image_sequence_len": 4,
38
+ "image_window_size": 1,
39
+ "learning_rate": 2e-05,
40
+ "lr_scheduler_type": "constant",
41
+ "max_grad_norm": 1.0,
42
+ "max_steps": null,
43
+ "per_device_batch_size": 8,
44
+ "reduce_in_full_precision": true,
45
+ "save_every_n_steps": 25000,
46
+ "shuffle_buffer_size": 256000,
47
+ "train_strategy": "fsdp-full-shard",
48
+ "type": "myvla-qwen-224px+mx-mshab",
49
+ "unfreeze_last_llm_layer": false,
50
+ "use_flow_matching": false,
51
+ "use_wrist_image": true,
52
+ "vla_id": "myvla-qwen-224px+mx-mshab",
53
+ "warmup_ratio": 0.0,
54
+ "weight_decay": 0.0
55
+ },
56
+ "wandb_entity": "traysen879-uc-san-diego",
57
+ "wandb_project": "mshab_vla"
58
+ }
config.yaml ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ data_root_dir: /home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/.new_maniskill_data
2
+ depth: false
3
+ global_pose: false
4
+ hf_token: .hf_token
5
+ image_aug: false
6
+ is_grasped: false
7
+ is_resume: false
8
+ model_type: my_vla_qwen
9
+ pretrained_checkpoint: Stanford-ILIAD/prism-qwen25-extra-dinosiglip-224px-0_5b
10
+ qpos: false
11
+ resume_epoch: null
12
+ resume_step: null
13
+ run_id: bl_multiview_depth_set_table
14
+ run_id_note: null
15
+ run_root_dir: myvla_exp
16
+ save_interval: 1000
17
+ seed: 7
18
+ segmentation: false
19
+ trackers:
20
+ - jsonl
21
+ - wandb
22
+ vla:
23
+ action_chunk_size: 8
24
+ action_tokenizer: extra_action_tokenizer
25
+ base_vlm: prism-qwen25-extra-dinosiglip-224px+0_5b
26
+ compress_history: false
27
+ data_mix: bridge
28
+ enable_gradient_checkpointing: true
29
+ enable_mixed_precision_training: true
30
+ epochs: 10
31
+ expected_world_size: 8
32
+ freeze_llm_backbone: false
33
+ freeze_vision_backbone: true
34
+ global_batch_size: 512
35
+ image_sequence_len: 4
36
+ image_window_size: 1
37
+ learning_rate: 2.0e-05
38
+ lr_scheduler_type: constant
39
+ max_grad_norm: 1.0
40
+ max_steps: null
41
+ per_device_batch_size: 8
42
+ reduce_in_full_precision: true
43
+ save_every_n_steps: 25000
44
+ shuffle_buffer_size: 256000
45
+ train_strategy: fsdp-full-shard
46
+ type: myvla-qwen-224px+mx-mshab
47
+ unfreeze_last_llm_layer: false
48
+ use_flow_matching: false
49
+ use_wrist_image: true
50
+ vla_id: myvla-qwen-224px+mx-mshab
51
+ warmup_ratio: 0.0
52
+ weight_decay: 0.0
53
+ wandb_entity: traysen879-uc-san-diego
54
+ wandb_project: mshab_vla
run-metrics.jsonl ADDED
@@ -0,0 +1 @@
 
 
1
+ {"hparams": {"data_root_dir": "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/.new_maniskill_data", "depth": false, "global_pose": false, "hf_token": ".hf_token", "image_aug": false, "is_grasped": false, "is_resume": false, "model_type": "my_vla_qwen", "pretrained_checkpoint": "Stanford-ILIAD/prism-qwen25-extra-dinosiglip-224px-0_5b", "qpos": false, "resume_epoch": null, "resume_step": null, "run_id": "bl_multiview_depth_set_table", "run_id_note": null, "run_root_dir": "myvla_exp", "save_interval": 1000, "seed": 7, "segmentation": false, "trackers": ["jsonl", "wandb"], "vla": {"action_chunk_size": 8, "action_tokenizer": "extra_action_tokenizer", "base_vlm": "prism-qwen25-extra-dinosiglip-224px+0_5b", "compress_history": false, "data_mix": "bridge", "enable_gradient_checkpointing": true, "enable_mixed_precision_training": true, "epochs": 10, "expected_world_size": 8, "freeze_llm_backbone": false, "freeze_vision_backbone": true, "global_batch_size": 512, "image_sequence_len": 4, "image_window_size": 1, "learning_rate": 2e-05, "lr_scheduler_type": "constant", "max_grad_norm": 1.0, "max_steps": null, "per_device_batch_size": 8, "reduce_in_full_precision": true, "save_every_n_steps": 25000, "shuffle_buffer_size": 256000, "train_strategy": "fsdp-full-shard", "type": "myvla-qwen-224px+mx-mshab", "unfreeze_last_llm_layer": false, "use_flow_matching": false, "use_wrist_image": true, "vla_id": "myvla-qwen-224px+mx-mshab", "warmup_ratio": 0.0, "weight_decay": 0.0}, "wandb_entity": "traysen879-uc-san-diego", "wandb_project": "mshab_vla"}, "run_id": "bl_multiview_depth_set_table"}
wandb/debug-internal.log ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-08-22T17:55:45.122997378Z","level":"INFO","msg":"stream: starting","core version":"0.21.0"}
2
+ {"time":"2025-08-22T17:55:45.417778442Z","level":"INFO","msg":"stream: created new stream","id":"mg58khw0"}
3
+ {"time":"2025-08-22T17:55:45.417944745Z","level":"INFO","msg":"stream: started","id":"mg58khw0"}
4
+ {"time":"2025-08-22T17:55:45.417987887Z","level":"INFO","msg":"writer: Do: started","stream_id":"mg58khw0"}
5
+ {"time":"2025-08-22T17:55:45.418028658Z","level":"INFO","msg":"sender: started","stream_id":"mg58khw0"}
6
+ {"time":"2025-08-22T17:55:45.418052908Z","level":"INFO","msg":"handler: started","stream_id":"mg58khw0"}
7
+ {"time":"2025-08-22T19:05:01.607571042Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
8
+ {"time":"2025-08-23T06:27:10.330396586Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
9
+ {"time":"2025-08-23T06:27:10.442620306Z","level":"INFO","msg":"handler: operation stats","stats":{}}
10
+ {"time":"2025-08-23T06:27:10.450768217Z","level":"INFO","msg":"stream: closing","id":"mg58khw0"}
11
+ {"time":"2025-08-23T06:27:10.450887019Z","level":"INFO","msg":"handler: closed","stream_id":"mg58khw0"}
12
+ {"time":"2025-08-23T06:27:10.450965602Z","level":"INFO","msg":"sender: closed","stream_id":"mg58khw0"}
13
+ {"time":"2025-08-23T06:27:10.45093435Z","level":"INFO","msg":"writer: Close: closed","stream_id":"mg58khw0"}
14
+ {"time":"2025-08-23T06:27:10.45361464Z","level":"INFO","msg":"stream: closed","id":"mg58khw0"}
wandb/debug.log ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-08-22 17:55:44,851 INFO MainThread:4004464 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0
2
+ 2025-08-22 17:55:44,851 INFO MainThread:4004464 [wandb_setup.py:_flush():80] Configure stats pid to 4004464
3
+ 2025-08-22 17:55:44,851 INFO MainThread:4004464 [wandb_setup.py:_flush():80] Loading settings from /home/ubuntu/.config/wandb/settings
4
+ 2025-08-22 17:55:44,852 INFO MainThread:4004464 [wandb_setup.py:_flush():80] Loading settings from /lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/wandb/settings
5
+ 2025-08-22 17:55:44,852 INFO MainThread:4004464 [wandb_setup.py:_flush():80] Loading settings from environment variables
6
+ 2025-08-22 17:55:44,852 INFO MainThread:4004464 [wandb_init.py:setup_run_log_directory():703] Logging user logs to myvla_exp/bl_multiview_depth_set_table/wandb/run-20250822_175544-mg58khw0/logs/debug.log
7
+ 2025-08-22 17:55:44,852 INFO MainThread:4004464 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to myvla_exp/bl_multiview_depth_set_table/wandb/run-20250822_175544-mg58khw0/logs/debug-internal.log
8
+ 2025-08-22 17:55:44,852 INFO MainThread:4004464 [wandb_init.py:init():830] calling init triggers
9
+ 2025-08-22 17:55:44,852 INFO MainThread:4004464 [wandb_init.py:init():835] wandb.init called with sweep_config: {}
10
+ config: {'vla': {'type': 'myvla-qwen-224px+mx-mshab', 'vla_id': 'myvla-qwen-224px+mx-mshab', 'base_vlm': 'prism-qwen25-extra-dinosiglip-224px+0_5b', 'freeze_vision_backbone': True, 'freeze_llm_backbone': False, 'unfreeze_last_llm_layer': False, 'data_mix': 'bridge', 'shuffle_buffer_size': 256000, 'epochs': 10, 'max_steps': None, 'save_every_n_steps': 25000, 'expected_world_size': 8, 'global_batch_size': 512, 'per_device_batch_size': 8, 'learning_rate': 2e-05, 'weight_decay': 0.0, 'max_grad_norm': 1.0, 'lr_scheduler_type': 'constant', 'warmup_ratio': 0.0, 'train_strategy': 'fsdp-full-shard', 'action_tokenizer': 'extra_action_tokenizer', 'image_sequence_len': 4, 'use_wrist_image': True, 'compress_history': False, 'use_flow_matching': False, 'action_chunk_size': 8, 'enable_gradient_checkpointing': True, 'enable_mixed_precision_training': True, 'reduce_in_full_precision': True, 'image_window_size': 1}, 'model_type': 'my_vla_qwen', 'data_root_dir': '/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/.new_maniskill_data', 'run_root_dir': 'myvla_exp', 'pretrained_checkpoint': 'Stanford-ILIAD/prism-qwen25-extra-dinosiglip-224px-0_5b', 'is_resume': False, 'resume_step': None, 'resume_epoch': None, 'run_id': 'bl_multiview_depth_set_table', 'run_id_note': None, 'save_interval': 1000, 'image_aug': False, 'seed': 7, 'hf_token': '.hf_token', 'trackers': ['jsonl', 'wandb'], 'wandb_project': 'mshab_vla', 'wandb_entity': 'traysen879-uc-san-diego', 'global_pose': False, 'is_grasped': False, 'qpos': False, 'depth': False, 'segmentation': False, '_wandb': {}}
11
+ 2025-08-22 17:55:44,852 INFO MainThread:4004464 [wandb_init.py:init():871] starting backend
12
+ 2025-08-22 17:55:45,099 INFO MainThread:4004464 [wandb_init.py:init():874] sending inform_init request
13
+ 2025-08-22 17:55:45,108 INFO MainThread:4004464 [wandb_init.py:init():882] backend started and connected
14
+ 2025-08-22 17:55:45,114 INFO MainThread:4004464 [wandb_init.py:init():953] updated telemetry
15
+ 2025-08-22 17:55:45,150 INFO MainThread:4004464 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout
16
+ 2025-08-22 17:55:45,586 INFO MainThread:4004464 [wandb_init.py:init():1029] starting run threads in backend
17
+ 2025-08-22 17:55:46,161 INFO MainThread:4004464 [wandb_run.py:_console_start():2458] atexit reg
18
+ 2025-08-22 17:55:46,161 INFO MainThread:4004464 [wandb_run.py:_redirect():2306] redirect: wrap_raw
19
+ 2025-08-22 17:55:46,163 INFO MainThread:4004464 [wandb_run.py:_redirect():2375] Wrapping output streams.
20
+ 2025-08-22 17:55:46,163 INFO MainThread:4004464 [wandb_run.py:_redirect():2398] Redirects installed.
21
+ 2025-08-22 17:55:46,173 INFO MainThread:4004464 [wandb_init.py:init():1075] run started, returning control to user process
22
+ 2025-08-23 06:27:09,979 INFO MainThread:4004464 [wandb_run.py:_finish():2224] finishing run traysen879-uc-san-diego/mshab_vla/mg58khw0
23
+ 2025-08-23 06:27:09,980 INFO MainThread:4004464 [wandb_run.py:_atexit_cleanup():2423] got exitcode: 0
24
+ 2025-08-23 06:27:09,980 INFO MainThread:4004464 [wandb_run.py:_restore():2405] restore
25
+ 2025-08-23 06:27:09,981 INFO MainThread:4004464 [wandb_run.py:_restore():2411] restore done
26
+ 2025-08-23 06:27:10,444 INFO MainThread:4004464 [wandb_run.py:_footer_history_summary_info():3903] rendering history
27
+ 2025-08-23 06:27:10,446 INFO MainThread:4004464 [wandb_run.py:_footer_history_summary_info():3935] rendering summary
28
+ 2025-08-23 06:27:10,446 INFO MainThread:4004464 [wandb_run.py:_footer_sync_info():3864] logging synced files
wandb/run-20250822_172424-grd0n90q/files/config.yaml ADDED
@@ -0,0 +1,210 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _wandb:
2
+ value:
3
+ cli_version: 0.21.0
4
+ e:
5
+ dnq1lm07509oy3nh24mhk899wrek9b94:
6
+ args:
7
+ - --pretrained_checkpoint
8
+ - Stanford-ILIAD/prism-qwen25-extra-dinosiglip-224px-0_5b
9
+ - --model_type
10
+ - my_vla_qwen
11
+ - --vla.type
12
+ - myvla-qwen-224px+mx-mshab
13
+ - --vla.expected_world_size
14
+ - "8"
15
+ - --vla.global_batch_size
16
+ - "512"
17
+ - --vla.per_device_batch_size
18
+ - "8"
19
+ - --vla.learning_rate
20
+ - "2e-5"
21
+ - --vla.freeze_vision_backbone
22
+ - "True"
23
+ - --vla.freeze_llm_backbone
24
+ - "False"
25
+ - --vla.use_flow_matching
26
+ - "False"
27
+ - --vla.compress_history
28
+ - "False"
29
+ - --vla.image_sequence_len
30
+ - "4"
31
+ - --vla.image_window_size
32
+ - "1"
33
+ - --vla.epochs
34
+ - "10"
35
+ - --save_interval
36
+ - "1000"
37
+ - --run_id
38
+ - bl_multiview_depth_set_table
39
+ codePath: vla-scripts/train.py
40
+ codePathLocal: vla-scripts/train.py
41
+ cpu_count: 240
42
+ cpu_count_logical: 240
43
+ cudaVersion: "12.4"
44
+ disk:
45
+ /:
46
+ total: "20812690710528"
47
+ used: "36965724160"
48
+ email: traysen879@gmail.com
49
+ executable: /home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/bin/python3.10
50
+ git:
51
+ commit: 409e4c9a165115624c271028e9b3ee335991b747
52
+ remote: https://github.com/TRS07170/myvla.git
53
+ gpu: NVIDIA A100-SXM4-80GB
54
+ gpu_count: 8
55
+ gpu_nvidia:
56
+ - architecture: Ampere
57
+ cudaCores: 6912
58
+ memoryTotal: "85899345920"
59
+ name: NVIDIA A100-SXM4-80GB
60
+ uuid: GPU-47bfdc91-9dec-cf54-0e0a-aa57ab6fb106
61
+ - architecture: Ampere
62
+ cudaCores: 6912
63
+ memoryTotal: "85899345920"
64
+ name: NVIDIA A100-SXM4-80GB
65
+ uuid: GPU-55a7184b-b6dc-a8b3-67d5-a65679215c83
66
+ - architecture: Ampere
67
+ cudaCores: 6912
68
+ memoryTotal: "85899345920"
69
+ name: NVIDIA A100-SXM4-80GB
70
+ uuid: GPU-1de758e0-e4a9-e2e9-027c-17f65db8a69e
71
+ - architecture: Ampere
72
+ cudaCores: 6912
73
+ memoryTotal: "85899345920"
74
+ name: NVIDIA A100-SXM4-80GB
75
+ uuid: GPU-d7f94efd-7e10-156f-fe37-e505ae7b62b1
76
+ - architecture: Ampere
77
+ cudaCores: 6912
78
+ memoryTotal: "85899345920"
79
+ name: NVIDIA A100-SXM4-80GB
80
+ uuid: GPU-813530b2-64f0-5fa3-3568-3811977d3b92
81
+ - architecture: Ampere
82
+ cudaCores: 6912
83
+ memoryTotal: "85899345920"
84
+ name: NVIDIA A100-SXM4-80GB
85
+ uuid: GPU-7eac47dc-0da1-f6b2-d261-8ab3a5d4ed03
86
+ - architecture: Ampere
87
+ cudaCores: 6912
88
+ memoryTotal: "85899345920"
89
+ name: NVIDIA A100-SXM4-80GB
90
+ uuid: GPU-335150e5-634c-68e2-4930-656c95e62244
91
+ - architecture: Ampere
92
+ cudaCores: 6912
93
+ memoryTotal: "85899345920"
94
+ name: NVIDIA A100-SXM4-80GB
95
+ uuid: GPU-b3ee08d0-187c-8f80-06d5-c46759764c41
96
+ host: 164-152-109-69
97
+ memory:
98
+ total: "1902324936704"
99
+ os: Linux-6.8.0-60-generic-x86_64-with-glibc2.35
100
+ program: /lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py
101
+ python: CPython 3.10.18
102
+ root: myvla_exp/bl_multiview_depth_set_table
103
+ startedAt: "2025-08-22T17:24:24.381806Z"
104
+ writerId: dnq1lm07509oy3nh24mhk899wrek9b94
105
+ m: []
106
+ python_version: 3.10.18
107
+ t:
108
+ "1":
109
+ - 1
110
+ - 2
111
+ - 3
112
+ - 11
113
+ - 41
114
+ - 49
115
+ - 63
116
+ - 71
117
+ "2":
118
+ - 1
119
+ - 2
120
+ - 3
121
+ - 11
122
+ - 41
123
+ - 49
124
+ - 63
125
+ - 71
126
+ "3":
127
+ - 13
128
+ - 16
129
+ - 61
130
+ "4": 3.10.18
131
+ "5": 0.21.0
132
+ "6": 4.40.1
133
+ "12": 0.21.0
134
+ "13": linux-x86_64
135
+ data_root_dir:
136
+ value: /home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/.new_maniskill_data
137
+ depth:
138
+ value: false
139
+ global_pose:
140
+ value: false
141
+ hf_token:
142
+ value: .hf_token
143
+ image_aug:
144
+ value: false
145
+ is_grasped:
146
+ value: false
147
+ is_resume:
148
+ value: false
149
+ model_type:
150
+ value: my_vla_qwen
151
+ pretrained_checkpoint:
152
+ value: Stanford-ILIAD/prism-qwen25-extra-dinosiglip-224px-0_5b
153
+ qpos:
154
+ value: false
155
+ resume_epoch:
156
+ value: null
157
+ resume_step:
158
+ value: null
159
+ run_id:
160
+ value: bl_multiview_depth_set_table
161
+ run_id_note:
162
+ value: null
163
+ run_root_dir:
164
+ value: myvla_exp
165
+ save_interval:
166
+ value: 1000
167
+ seed:
168
+ value: 7
169
+ segmentation:
170
+ value: false
171
+ trackers:
172
+ value:
173
+ - jsonl
174
+ - wandb
175
+ vla:
176
+ value:
177
+ action_chunk_size: 8
178
+ action_tokenizer: extra_action_tokenizer
179
+ base_vlm: prism-qwen25-extra-dinosiglip-224px+0_5b
180
+ compress_history: false
181
+ data_mix: bridge
182
+ enable_gradient_checkpointing: true
183
+ enable_mixed_precision_training: true
184
+ epochs: 10
185
+ expected_world_size: 8
186
+ freeze_llm_backbone: false
187
+ freeze_vision_backbone: true
188
+ global_batch_size: 512
189
+ image_sequence_len: 4
190
+ image_window_size: 1
191
+ learning_rate: 2e-05
192
+ lr_scheduler_type: constant
193
+ max_grad_norm: 1
194
+ max_steps: null
195
+ per_device_batch_size: 8
196
+ reduce_in_full_precision: true
197
+ save_every_n_steps: 25000
198
+ shuffle_buffer_size: 256000
199
+ train_strategy: fsdp-full-shard
200
+ type: myvla-qwen-224px+mx-mshab
201
+ unfreeze_last_llm_layer: false
202
+ use_flow_matching: false
203
+ use_wrist_image: true
204
+ vla_id: myvla-qwen-224px+mx-mshab
205
+ warmup_ratio: 0
206
+ weight_decay: 0
207
+ wandb_entity:
208
+ value: traysen879-uc-san-diego
209
+ wandb_project:
210
+ value: mshab_vla
wandb/run-20250822_172424-grd0n90q/files/output.log ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 08/22 [17:24:25] INFO  | >> [*] Starting VLA Training Loop ]8;id=817857;file:///lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py\train.py]8;;\:]8;id=998125;file:///lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py#322\322]8;;\
2
+ Traceback (most recent call last):
3
+ File "/lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py", line 342, in <module>
4
+ train()
5
+ File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/draccus/argparsing.py", line 203, in wrapper_inner
6
+ response = fn(cfg, *args, **kwargs)
7
+ File "/lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py", line 323, in train
8
+ train_strategy.run_vla_training(
9
+ File "/lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/prismatic/training/strategies/base_strategy.py", line 342, in run_vla_training
10
+ output, aux_loss = self.vlm(
11
+ File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
12
+ return self._call_impl(*args, **kwargs)
13
+ File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
14
+ return forward_call(*args, **kwargs)
15
+ File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", line 849, in forward
16
+ output = self._fsdp_wrapped_module(*args, **kwargs)
17
+ File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
18
+ return self._call_impl(*args, **kwargs)
19
+ File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
20
+ return forward_call(*args, **kwargs)
21
+ File "/lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/prismatic/models/vlas/myvla.py", line 277, in forward
22
+ output = super().forward(
23
+ File "/lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/prismatic/models/vlms/prismatic.py", line 373, in forward
24
+ patch_features = self.vision_backbone({k: pixel_values[k][multimodal_indices] for k in pixel_values})
25
+ File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
26
+ return self._call_impl(*args, **kwargs)
27
+ File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
28
+ return forward_call(*args, **kwargs)
29
+ File "/lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/prismatic/models/backbones/vision/dinosiglip_vit.py", line 169, in forward
30
+ patches = compute_sequence_patches(pixel_values, featurizers, self.image_sequence_len)
31
+ File "/lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/prismatic/models/backbones/vision/base_vision.py", line 47, in compute_sequence_patches
32
+ patches[k] = merge_two_dims(sequence_combine_call_split(trunc_pixels_k, featurizers[k]), start_dim=1)
33
+ File "/lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/prismatic/util/torch_utils.py", line 106, in sequence_combine_call_split
34
+ flat_outputs = fn(flat_sequence)
35
+ File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
36
+ return self._call_impl(*args, **kwargs)
37
+ File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
38
+ return forward_call(*args, **kwargs)
39
+ File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", line 849, in forward
40
+ output = self._fsdp_wrapped_module(*args, **kwargs)
41
+ File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
42
+ return self._call_impl(*args, **kwargs)
43
+ File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
44
+ return forward_call(*args, **kwargs)
45
+ File "/lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/prismatic/models/backbones/vision/base_vision.py", line 31, in wrapper
46
+ result = fn(*args, **kwargs)
47
+ File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/timm/models/vision_transformer.py", line 644, in get_intermediate_layers
48
+ outputs = self._intermediate_layers(x, n)
49
+ File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/timm/models/vision_transformer.py", line 626, in _intermediate_layers
50
+ x = blk(x)
51
+ File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
52
+ return self._call_impl(*args, **kwargs)
53
+ File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
54
+ return forward_call(*args, **kwargs)
55
+ File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", line 849, in forward
56
+ output = self._fsdp_wrapped_module(*args, **kwargs)
57
+ File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
58
+ return self._call_impl(*args, **kwargs)
59
+ File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
60
+ return forward_call(*args, **kwargs)
61
+ File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/timm/models/vision_transformer.py", line 157, in forward
62
+ x = x + self.drop_path2(self.ls2(self.mlp(self.norm2(x))))
63
+ File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
64
+ return self._call_impl(*args, **kwargs)
65
+ File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
66
+ return forward_call(*args, **kwargs)
67
+ File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/timm/layers/mlp.py", line 46, in forward
68
+ x = self.fc2(x)
69
+ File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
70
+ return self._call_impl(*args, **kwargs)
71
+ File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
72
+ return forward_call(*args, **kwargs)
73
+ File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/torch/nn/modules/linear.py", line 116, in forward
74
+ return F.linear(input, self.weight, self.bias)
75
+ KeyboardInterrupt
wandb/run-20250822_172424-grd0n90q/files/requirements.txt ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ setuptools==78.1.1
2
+ wheel==0.45.1
3
+ pip==25.1
4
+ sentencepiece==0.1.99
5
+ mpmath==1.3.0
6
+ libclang==18.1.1
7
+ flatbuffers==25.2.10
8
+ zipp==3.23.0
9
+ wrapt==1.14.1
10
+ urllib3==2.5.0
11
+ typing_extensions==4.14.1
12
+ typeguard==2.13.3
13
+ tqdm==4.67.1
14
+ toml==0.10.2
15
+ termcolor==3.1.0
16
+ tensorflow-io-gcs-filesystem==0.37.1
17
+ tensorflow-estimator==2.15.0
18
+ tensorboard-data-server==0.7.2
19
+ sympy==1.14.0
20
+ smmap==5.0.2
21
+ six==1.17.0
22
+ safetensors==0.5.3
23
+ regex==2025.7.34
24
+ PyYAML==6.0.2
25
+ pyparsing==3.2.3
26
+ Pygments==2.19.2
27
+ pyasn1==0.6.1
28
+ psutil==7.0.0
29
+ protobuf==4.21.12
30
+ platformdirs==4.3.8
31
+ pillow==11.3.0
32
+ packaging==25.0
33
+ opt_einsum==3.4.0
34
+ oauthlib==3.3.1
35
+ nvidia-nvtx-cu12==12.1.105
36
+ nvidia-nvjitlink-cu12==12.9.86
37
+ nvidia-nccl-cu12==2.19.3
38
+ nvidia-curand-cu12==10.3.2.106
39
+ nvidia-cufft-cu12==11.0.2.54
40
+ nvidia-cuda-runtime-cu12==12.1.105
41
+ nvidia-cuda-nvrtc-cu12==12.1.105
42
+ nvidia-cuda-cupti-cu12==12.1.105
43
+ nvidia-cublas-cu12==12.1.3.1
44
+ numpy==1.26.4
45
+ networkx==3.4.2
46
+ mypy_extensions==1.1.0
47
+ mergedeep==1.3.4
48
+ mdurl==0.1.2
49
+ MarkupSafe==3.0.2
50
+ Markdown==3.8.2
51
+ kiwisolver==1.4.8
52
+ keras==2.15.0
53
+ importlib_resources==6.5.2
54
+ idna==3.10
55
+ hf-xet==1.1.5
56
+ grpcio==1.74.0
57
+ gast==0.6.0
58
+ fsspec==2025.7.0
59
+ fonttools==4.59.0
60
+ filelock==3.18.0
61
+ etils==1.13.0
62
+ einops==0.8.1
63
+ cycler==0.12.1
64
+ click==8.2.1
65
+ charset-normalizer==3.4.2
66
+ certifi==2025.8.3
67
+ cachetools==5.5.2
68
+ attrs==25.3.0
69
+ annotated-types==0.7.0
70
+ absl-py==2.3.1
71
+ Werkzeug==3.1.3
72
+ typing-inspection==0.4.1
73
+ typing-inspect==0.9.0
74
+ triton==2.2.0
75
+ trimesh==4.7.1
76
+ tensorflow-metadata==1.17.2
77
+ tensorflow-addons==0.23.0
78
+ sentry-sdk==2.34.1
79
+ scipy==1.15.3
80
+ rsa==4.9.1
81
+ requests==2.32.4
82
+ pyyaml-include==1.4.1
83
+ python-dateutil==2.9.0.post0
84
+ pydantic_core==2.33.2
85
+ pyasn1_modules==0.4.2
86
+ promise==2.3
87
+ OpenEXR==3.3.5
88
+ nvidia-cusparse-cu12==12.1.0.106
89
+ nvidia-cudnn-cu12==8.9.2.26
90
+ ml-dtypes==0.2.0
91
+ markdown-it-py==3.0.0
92
+ jsonlines==4.0.0
93
+ json-numpy==2.1.1
94
+ Jinja2==3.1.6
95
+ h5py==3.14.0
96
+ google-pasta==0.2.0
97
+ gitdb==4.0.12
98
+ dm-tree==0.1.9
99
+ contourpy==1.3.2
100
+ astunparse==1.6.3
101
+ rich==14.1.0
102
+ requests-oauthlib==2.0.0
103
+ pydantic==2.11.7
104
+ nvidia-cusolver-cu12==11.4.5.107
105
+ matplotlib==3.10.5
106
+ huggingface-hub==0.34.3
107
+ google-auth==2.40.3
108
+ GitPython==3.1.45
109
+ draccus==0.8.0
110
+ wandb==0.21.0
111
+ torch==2.2.0
112
+ tokenizers==0.19.1
113
+ google-auth-oauthlib==1.2.2
114
+ array_record==0.7.2
115
+ transformers==4.40.1
116
+ torchvision==0.17.0
117
+ torchaudio==2.2.0
118
+ tensorboard==2.15.2
119
+ accelerate==1.9.0
120
+ timm==0.9.10
121
+ tensorflow-datasets==4.9.3
122
+ tensorflow==2.15.0
123
+ peft==0.11.1
124
+ tensorflow-graphics==2021.12.3
125
+ dlimp==0.0.1
126
+ openvla==0.0.3
127
+ ninja==1.11.1.4
128
+ flash-attn==2.5.5
129
+ autocommand==2.2.2
130
+ backports.tarfile==1.2.0
131
+ importlib_metadata==8.0.0
132
+ inflect==7.3.1
133
+ jaraco.collections==5.1.0
134
+ jaraco.context==5.3.0
135
+ jaraco.functools==4.0.1
136
+ jaraco.text==3.12.1
137
+ more-itertools==10.3.0
138
+ packaging==24.2
139
+ platformdirs==4.2.2
140
+ tomli==2.0.1
141
+ typeguard==4.3.0
142
+ typing_extensions==4.12.2
143
+ wheel==0.45.1
144
+ zipp==3.19.2
wandb/run-20250822_172424-grd0n90q/files/wandb-metadata.json ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-6.8.0-60-generic-x86_64-with-glibc2.35",
3
+ "python": "CPython 3.10.18",
4
+ "startedAt": "2025-08-22T17:24:24.381806Z",
5
+ "args": [
6
+ "--pretrained_checkpoint",
7
+ "Stanford-ILIAD/prism-qwen25-extra-dinosiglip-224px-0_5b",
8
+ "--model_type",
9
+ "my_vla_qwen",
10
+ "--vla.type",
11
+ "myvla-qwen-224px+mx-mshab",
12
+ "--vla.expected_world_size",
13
+ "8",
14
+ "--vla.global_batch_size",
15
+ "512",
16
+ "--vla.per_device_batch_size",
17
+ "8",
18
+ "--vla.learning_rate",
19
+ "2e-5",
20
+ "--vla.freeze_vision_backbone",
21
+ "True",
22
+ "--vla.freeze_llm_backbone",
23
+ "False",
24
+ "--vla.use_flow_matching",
25
+ "False",
26
+ "--vla.compress_history",
27
+ "False",
28
+ "--vla.image_sequence_len",
29
+ "4",
30
+ "--vla.image_window_size",
31
+ "1",
32
+ "--vla.epochs",
33
+ "10",
34
+ "--save_interval",
35
+ "1000",
36
+ "--run_id",
37
+ "bl_multiview_depth_set_table"
38
+ ],
39
+ "program": "/lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py",
40
+ "codePath": "vla-scripts/train.py",
41
+ "codePathLocal": "vla-scripts/train.py",
42
+ "git": {
43
+ "remote": "https://github.com/TRS07170/myvla.git",
44
+ "commit": "409e4c9a165115624c271028e9b3ee335991b747"
45
+ },
46
+ "email": "traysen879@gmail.com",
47
+ "root": "myvla_exp/bl_multiview_depth_set_table",
48
+ "host": "164-152-109-69",
49
+ "executable": "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/bin/python3.10",
50
+ "cpu_count": 240,
51
+ "cpu_count_logical": 240,
52
+ "gpu": "NVIDIA A100-SXM4-80GB",
53
+ "gpu_count": 8,
54
+ "disk": {
55
+ "/": {
56
+ "total": "20812690710528",
57
+ "used": "36965724160"
58
+ }
59
+ },
60
+ "memory": {
61
+ "total": "1902324936704"
62
+ },
63
+ "gpu_nvidia": [
64
+ {
65
+ "name": "NVIDIA A100-SXM4-80GB",
66
+ "memoryTotal": "85899345920",
67
+ "cudaCores": 6912,
68
+ "architecture": "Ampere",
69
+ "uuid": "GPU-47bfdc91-9dec-cf54-0e0a-aa57ab6fb106"
70
+ },
71
+ {
72
+ "name": "NVIDIA A100-SXM4-80GB",
73
+ "memoryTotal": "85899345920",
74
+ "cudaCores": 6912,
75
+ "architecture": "Ampere",
76
+ "uuid": "GPU-55a7184b-b6dc-a8b3-67d5-a65679215c83"
77
+ },
78
+ {
79
+ "name": "NVIDIA A100-SXM4-80GB",
80
+ "memoryTotal": "85899345920",
81
+ "cudaCores": 6912,
82
+ "architecture": "Ampere",
83
+ "uuid": "GPU-1de758e0-e4a9-e2e9-027c-17f65db8a69e"
84
+ },
85
+ {
86
+ "name": "NVIDIA A100-SXM4-80GB",
87
+ "memoryTotal": "85899345920",
88
+ "cudaCores": 6912,
89
+ "architecture": "Ampere",
90
+ "uuid": "GPU-d7f94efd-7e10-156f-fe37-e505ae7b62b1"
91
+ },
92
+ {
93
+ "name": "NVIDIA A100-SXM4-80GB",
94
+ "memoryTotal": "85899345920",
95
+ "cudaCores": 6912,
96
+ "architecture": "Ampere",
97
+ "uuid": "GPU-813530b2-64f0-5fa3-3568-3811977d3b92"
98
+ },
99
+ {
100
+ "name": "NVIDIA A100-SXM4-80GB",
101
+ "memoryTotal": "85899345920",
102
+ "cudaCores": 6912,
103
+ "architecture": "Ampere",
104
+ "uuid": "GPU-7eac47dc-0da1-f6b2-d261-8ab3a5d4ed03"
105
+ },
106
+ {
107
+ "name": "NVIDIA A100-SXM4-80GB",
108
+ "memoryTotal": "85899345920",
109
+ "cudaCores": 6912,
110
+ "architecture": "Ampere",
111
+ "uuid": "GPU-335150e5-634c-68e2-4930-656c95e62244"
112
+ },
113
+ {
114
+ "name": "NVIDIA A100-SXM4-80GB",
115
+ "memoryTotal": "85899345920",
116
+ "cudaCores": 6912,
117
+ "architecture": "Ampere",
118
+ "uuid": "GPU-b3ee08d0-187c-8f80-06d5-c46759764c41"
119
+ }
120
+ ],
121
+ "cudaVersion": "12.4",
122
+ "writerId": "dnq1lm07509oy3nh24mhk899wrek9b94"
123
+ }
wandb/run-20250822_172424-grd0n90q/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"VLA Train/L1 Loss":0.6152674662097849,"VLA Train/Step Time":9.066001892089844,"_step":18,"_timestamp":1.7558836377797978e+09,"VLA Train/Step":18,"VLA Train/Loss (Raw)":0.5707007646560669,"_runtime":179.53910675,"VLA Train/Auxiliary Loss":0,"VLA Train/Action Token Accuracy":0.28155338764190674,"VLA Train/Epoch":0,"VLA Train/Loss":0.5707007646560669,"_wandb":{"runtime":179},"VLA Train/Learning Rate":2e-05}
wandb/run-20250822_172424-grd0n90q/logs/debug-core.log ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-08-22T17:24:24.667654596Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmp_342id0h/port-4002856.txt","pid":4002856,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
2
+ {"time":"2025-08-22T17:24:24.66880959Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-4002856-4003306-1128414806/socket","Net":"unix"}}
3
+ {"time":"2025-08-22T17:24:24.668943304Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":4002856}
4
+ {"time":"2025-08-22T17:24:24.688040644Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
5
+ {"time":"2025-08-22T17:24:24.702187716Z","level":"INFO","msg":"handleInformInit: received","streamId":"grd0n90q","id":"1(@)"}
6
+ {"time":"2025-08-22T17:24:24.994195243Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"grd0n90q","id":"1(@)"}
7
+ {"time":"2025-08-22T17:27:24.702921547Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
8
+ {"time":"2025-08-22T17:27:24.703114991Z","level":"INFO","msg":"server is shutting down"}
9
+ {"time":"2025-08-22T17:27:24.703256274Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-4002856-4003306-1128414806/socket","Net":"unix"}}
10
+ {"time":"2025-08-22T17:27:24.70309939Z","level":"INFO","msg":"connection: closing","id":"1(@)"}
11
+ {"time":"2025-08-22T17:27:24.703325336Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
12
+ {"time":"2025-08-22T17:27:24.841847315Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
13
+ {"time":"2025-08-22T17:27:24.841900846Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
14
+ {"time":"2025-08-22T17:27:24.841914286Z","level":"INFO","msg":"server is closed"}
wandb/run-20250822_172424-grd0n90q/logs/debug-internal.log ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-08-22T17:24:24.706169983Z","level":"INFO","msg":"stream: starting","core version":"0.21.0"}
2
+ {"time":"2025-08-22T17:24:24.993938237Z","level":"INFO","msg":"stream: created new stream","id":"grd0n90q"}
3
+ {"time":"2025-08-22T17:24:24.99408303Z","level":"INFO","msg":"stream: started","id":"grd0n90q"}
4
+ {"time":"2025-08-22T17:24:24.994179082Z","level":"INFO","msg":"handler: started","stream_id":"grd0n90q"}
5
+ {"time":"2025-08-22T17:24:24.994227393Z","level":"INFO","msg":"sender: started","stream_id":"grd0n90q"}
6
+ {"time":"2025-08-22T17:24:24.994136561Z","level":"INFO","msg":"writer: Do: started","stream_id":"grd0n90q"}
7
+ {"time":"2025-08-22T17:27:24.703158922Z","level":"INFO","msg":"stream: closing","id":"grd0n90q"}
8
+ {"time":"2025-08-22T17:27:24.785199848Z","level":"ERROR","msg":"HTTP error","status":404,"method":"POST","url":"https://api.wandb.ai/graphql"}
9
+ {"time":"2025-08-22T17:27:24.785511865Z","level":"ERROR","msg":"runfiles: CreateRunFiles returned error: returned error 404: {\"data\":{\"createRunFiles\":null},\"errors\":[{\"message\":\"run mshab_vla/grd0n90q not found during createRunFiles\",\"path\":[\"createRunFiles\"]}]}"}
10
+ {"time":"2025-08-22T17:27:24.790789631Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
11
+ {"time":"2025-08-22T17:27:24.831280893Z","level":"ERROR","msg":"HTTP error","status":404,"method":"POST","url":"https://api.wandb.ai/files/traysen879-uc-san-diego/mshab_vla/grd0n90q/file_stream"}
12
+ {"time":"2025-08-22T17:27:24.831581099Z","level":"ERROR+4","msg":"filestream: fatal error: filestream: failed to upload: 404 Not Found path=files/traysen879-uc-san-diego/mshab_vla/grd0n90q/file_stream: {\"error\":\"run mshab_vla/grd0n90q not found while streaming file\"}"}
13
+ {"time":"2025-08-22T17:27:24.834877722Z","level":"INFO","msg":"handler: closed","stream_id":"grd0n90q"}
14
+ {"time":"2025-08-22T17:27:24.834904482Z","level":"INFO","msg":"writer: Close: closed","stream_id":"grd0n90q"}
15
+ {"time":"2025-08-22T17:27:24.834946803Z","level":"INFO","msg":"sender: closed","stream_id":"grd0n90q"}
16
+ {"time":"2025-08-22T17:27:24.839882542Z","level":"INFO","msg":"stream: closed","id":"grd0n90q"}
wandb/run-20250822_172424-grd0n90q/logs/debug.log ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-08-22 17:24:24,427 INFO MainThread:4002856 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0
2
+ 2025-08-22 17:24:24,427 INFO MainThread:4002856 [wandb_setup.py:_flush():80] Configure stats pid to 4002856
3
+ 2025-08-22 17:24:24,427 INFO MainThread:4002856 [wandb_setup.py:_flush():80] Loading settings from /home/ubuntu/.config/wandb/settings
4
+ 2025-08-22 17:24:24,430 INFO MainThread:4002856 [wandb_setup.py:_flush():80] Loading settings from /lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/wandb/settings
5
+ 2025-08-22 17:24:24,432 INFO MainThread:4002856 [wandb_setup.py:_flush():80] Loading settings from environment variables
6
+ 2025-08-22 17:24:24,432 INFO MainThread:4002856 [wandb_init.py:setup_run_log_directory():703] Logging user logs to myvla_exp/bl_multiview_depth_set_table/wandb/run-20250822_172424-grd0n90q/logs/debug.log
7
+ 2025-08-22 17:24:24,434 INFO MainThread:4002856 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to myvla_exp/bl_multiview_depth_set_table/wandb/run-20250822_172424-grd0n90q/logs/debug-internal.log
8
+ 2025-08-22 17:24:24,436 INFO MainThread:4002856 [wandb_init.py:init():830] calling init triggers
9
+ 2025-08-22 17:24:24,438 INFO MainThread:4002856 [wandb_init.py:init():835] wandb.init called with sweep_config: {}
10
+ config: {'vla': {'type': 'myvla-qwen-224px+mx-mshab', 'vla_id': 'myvla-qwen-224px+mx-mshab', 'base_vlm': 'prism-qwen25-extra-dinosiglip-224px+0_5b', 'freeze_vision_backbone': True, 'freeze_llm_backbone': False, 'unfreeze_last_llm_layer': False, 'data_mix': 'bridge', 'shuffle_buffer_size': 256000, 'epochs': 10, 'max_steps': None, 'save_every_n_steps': 25000, 'expected_world_size': 8, 'global_batch_size': 512, 'per_device_batch_size': 8, 'learning_rate': 2e-05, 'weight_decay': 0.0, 'max_grad_norm': 1.0, 'lr_scheduler_type': 'constant', 'warmup_ratio': 0.0, 'train_strategy': 'fsdp-full-shard', 'action_tokenizer': 'extra_action_tokenizer', 'image_sequence_len': 4, 'use_wrist_image': True, 'compress_history': False, 'use_flow_matching': False, 'action_chunk_size': 8, 'enable_gradient_checkpointing': True, 'enable_mixed_precision_training': True, 'reduce_in_full_precision': True, 'image_window_size': 1}, 'model_type': 'my_vla_qwen', 'data_root_dir': '/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/.new_maniskill_data', 'run_root_dir': 'myvla_exp', 'pretrained_checkpoint': 'Stanford-ILIAD/prism-qwen25-extra-dinosiglip-224px-0_5b', 'is_resume': False, 'resume_step': None, 'resume_epoch': None, 'run_id': 'bl_multiview_depth_set_table', 'run_id_note': None, 'save_interval': 1000, 'image_aug': False, 'seed': 7, 'hf_token': '.hf_token', 'trackers': ['jsonl', 'wandb'], 'wandb_project': 'mshab_vla', 'wandb_entity': 'traysen879-uc-san-diego', 'global_pose': False, 'is_grasped': False, 'qpos': False, 'depth': False, 'segmentation': False, '_wandb': {}}
11
+ 2025-08-22 17:24:24,438 INFO MainThread:4002856 [wandb_init.py:init():871] starting backend
12
+ 2025-08-22 17:24:24,688 INFO MainThread:4002856 [wandb_init.py:init():874] sending inform_init request
13
+ 2025-08-22 17:24:24,698 INFO MainThread:4002856 [wandb_init.py:init():882] backend started and connected
14
+ 2025-08-22 17:24:24,703 INFO MainThread:4002856 [wandb_init.py:init():953] updated telemetry
15
+ 2025-08-22 17:24:24,733 INFO MainThread:4002856 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout
16
+ 2025-08-22 17:24:25,161 INFO MainThread:4002856 [wandb_init.py:init():1029] starting run threads in backend
17
+ 2025-08-22 17:24:25,682 INFO MainThread:4002856 [wandb_run.py:_console_start():2458] atexit reg
18
+ 2025-08-22 17:24:25,683 INFO MainThread:4002856 [wandb_run.py:_redirect():2306] redirect: wrap_raw
19
+ 2025-08-22 17:24:25,685 INFO MainThread:4002856 [wandb_run.py:_redirect():2375] Wrapping output streams.
20
+ 2025-08-22 17:24:25,685 INFO MainThread:4002856 [wandb_run.py:_redirect():2398] Redirects installed.
21
+ 2025-08-22 17:24:25,696 INFO MainThread:4002856 [wandb_init.py:init():1075] run started, returning control to user process
22
+ 2025-08-22 17:27:24,701 INFO MsgRouterThr:4002856 [mailbox.py:close():129] [no run ID] Closing mailbox, abandoning 1 handles.
wandb/run-20250822_172424-grd0n90q/run-grd0n90q.wandb ADDED
Binary file (79.9 kB). View file
 
wandb/run-20250822_175544-mg58khw0/files/config.yaml ADDED
@@ -0,0 +1,211 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _wandb:
2
+ value:
3
+ cli_version: 0.21.0
4
+ e:
5
+ kigmkgl7ewlmvaaruo3ggp33h5rhvks0:
6
+ args:
7
+ - --pretrained_checkpoint
8
+ - Stanford-ILIAD/prism-qwen25-extra-dinosiglip-224px-0_5b
9
+ - --model_type
10
+ - my_vla_qwen
11
+ - --vla.type
12
+ - myvla-qwen-224px+mx-mshab
13
+ - --vla.expected_world_size
14
+ - "8"
15
+ - --vla.global_batch_size
16
+ - "512"
17
+ - --vla.per_device_batch_size
18
+ - "8"
19
+ - --vla.learning_rate
20
+ - "2e-5"
21
+ - --vla.freeze_vision_backbone
22
+ - "True"
23
+ - --vla.freeze_llm_backbone
24
+ - "False"
25
+ - --vla.use_flow_matching
26
+ - "False"
27
+ - --vla.compress_history
28
+ - "False"
29
+ - --vla.image_sequence_len
30
+ - "4"
31
+ - --vla.image_window_size
32
+ - "1"
33
+ - --vla.epochs
34
+ - "10"
35
+ - --save_interval
36
+ - "1000"
37
+ - --run_id
38
+ - bl_multiview_depth_set_table
39
+ codePath: vla-scripts/train.py
40
+ codePathLocal: vla-scripts/train.py
41
+ cpu_count: 240
42
+ cpu_count_logical: 240
43
+ cudaVersion: "12.4"
44
+ disk:
45
+ /:
46
+ total: "20812690710528"
47
+ used: "36965580800"
48
+ email: traysen879@gmail.com
49
+ executable: /home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/bin/python3.10
50
+ git:
51
+ commit: 409e4c9a165115624c271028e9b3ee335991b747
52
+ remote: https://github.com/TRS07170/myvla.git
53
+ gpu: NVIDIA A100-SXM4-80GB
54
+ gpu_count: 8
55
+ gpu_nvidia:
56
+ - architecture: Ampere
57
+ cudaCores: 6912
58
+ memoryTotal: "85899345920"
59
+ name: NVIDIA A100-SXM4-80GB
60
+ uuid: GPU-47bfdc91-9dec-cf54-0e0a-aa57ab6fb106
61
+ - architecture: Ampere
62
+ cudaCores: 6912
63
+ memoryTotal: "85899345920"
64
+ name: NVIDIA A100-SXM4-80GB
65
+ uuid: GPU-55a7184b-b6dc-a8b3-67d5-a65679215c83
66
+ - architecture: Ampere
67
+ cudaCores: 6912
68
+ memoryTotal: "85899345920"
69
+ name: NVIDIA A100-SXM4-80GB
70
+ uuid: GPU-1de758e0-e4a9-e2e9-027c-17f65db8a69e
71
+ - architecture: Ampere
72
+ cudaCores: 6912
73
+ memoryTotal: "85899345920"
74
+ name: NVIDIA A100-SXM4-80GB
75
+ uuid: GPU-d7f94efd-7e10-156f-fe37-e505ae7b62b1
76
+ - architecture: Ampere
77
+ cudaCores: 6912
78
+ memoryTotal: "85899345920"
79
+ name: NVIDIA A100-SXM4-80GB
80
+ uuid: GPU-813530b2-64f0-5fa3-3568-3811977d3b92
81
+ - architecture: Ampere
82
+ cudaCores: 6912
83
+ memoryTotal: "85899345920"
84
+ name: NVIDIA A100-SXM4-80GB
85
+ uuid: GPU-7eac47dc-0da1-f6b2-d261-8ab3a5d4ed03
86
+ - architecture: Ampere
87
+ cudaCores: 6912
88
+ memoryTotal: "85899345920"
89
+ name: NVIDIA A100-SXM4-80GB
90
+ uuid: GPU-335150e5-634c-68e2-4930-656c95e62244
91
+ - architecture: Ampere
92
+ cudaCores: 6912
93
+ memoryTotal: "85899345920"
94
+ name: NVIDIA A100-SXM4-80GB
95
+ uuid: GPU-b3ee08d0-187c-8f80-06d5-c46759764c41
96
+ host: 164-152-109-69
97
+ memory:
98
+ total: "1902324936704"
99
+ os: Linux-6.8.0-60-generic-x86_64-with-glibc2.35
100
+ program: /lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py
101
+ python: CPython 3.10.18
102
+ root: myvla_exp/bl_multiview_depth_set_table
103
+ startedAt: "2025-08-22T17:55:44.807018Z"
104
+ writerId: kigmkgl7ewlmvaaruo3ggp33h5rhvks0
105
+ m: []
106
+ python_version: 3.10.18
107
+ t:
108
+ "1":
109
+ - 1
110
+ - 2
111
+ - 3
112
+ - 11
113
+ - 41
114
+ - 49
115
+ - 63
116
+ - 71
117
+ "2":
118
+ - 1
119
+ - 2
120
+ - 3
121
+ - 11
122
+ - 41
123
+ - 49
124
+ - 63
125
+ - 71
126
+ "3":
127
+ - 2
128
+ - 13
129
+ - 16
130
+ - 61
131
+ "4": 3.10.18
132
+ "5": 0.21.0
133
+ "6": 4.40.1
134
+ "12": 0.21.0
135
+ "13": linux-x86_64
136
+ data_root_dir:
137
+ value: /home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/.new_maniskill_data
138
+ depth:
139
+ value: false
140
+ global_pose:
141
+ value: false
142
+ hf_token:
143
+ value: .hf_token
144
+ image_aug:
145
+ value: false
146
+ is_grasped:
147
+ value: false
148
+ is_resume:
149
+ value: false
150
+ model_type:
151
+ value: my_vla_qwen
152
+ pretrained_checkpoint:
153
+ value: Stanford-ILIAD/prism-qwen25-extra-dinosiglip-224px-0_5b
154
+ qpos:
155
+ value: false
156
+ resume_epoch:
157
+ value: null
158
+ resume_step:
159
+ value: null
160
+ run_id:
161
+ value: bl_multiview_depth_set_table
162
+ run_id_note:
163
+ value: null
164
+ run_root_dir:
165
+ value: myvla_exp
166
+ save_interval:
167
+ value: 1000
168
+ seed:
169
+ value: 7
170
+ segmentation:
171
+ value: false
172
+ trackers:
173
+ value:
174
+ - jsonl
175
+ - wandb
176
+ vla:
177
+ value:
178
+ action_chunk_size: 8
179
+ action_tokenizer: extra_action_tokenizer
180
+ base_vlm: prism-qwen25-extra-dinosiglip-224px+0_5b
181
+ compress_history: false
182
+ data_mix: bridge
183
+ enable_gradient_checkpointing: true
184
+ enable_mixed_precision_training: true
185
+ epochs: 10
186
+ expected_world_size: 8
187
+ freeze_llm_backbone: false
188
+ freeze_vision_backbone: true
189
+ global_batch_size: 512
190
+ image_sequence_len: 4
191
+ image_window_size: 1
192
+ learning_rate: 2e-05
193
+ lr_scheduler_type: constant
194
+ max_grad_norm: 1
195
+ max_steps: null
196
+ per_device_batch_size: 8
197
+ reduce_in_full_precision: true
198
+ save_every_n_steps: 25000
199
+ shuffle_buffer_size: 256000
200
+ train_strategy: fsdp-full-shard
201
+ type: myvla-qwen-224px+mx-mshab
202
+ unfreeze_last_llm_layer: false
203
+ use_flow_matching: false
204
+ use_wrist_image: true
205
+ vla_id: myvla-qwen-224px+mx-mshab
206
+ warmup_ratio: 0
207
+ weight_decay: 0
208
+ wandb_entity:
209
+ value: traysen879-uc-san-diego
210
+ wandb_project:
211
+ value: mshab_vla
wandb/run-20250822_175544-mg58khw0/files/output.log ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ 08/22 [17:55:46] INFO  | >> [*] Starting VLA Training Loop ]8;id=817857;file:///lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py\train.py]8;;\:]8;id=998125;file:///lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py#322\322]8;;\
2
+
3
+ 08/23 [06:27:09] INFO  | >> [*] Done with Training =>> Finalizing Metrics ]8;id=454536;file:///lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py\train.py]8;;\:]8;id=267836;file:///lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py#332\332]8;;\
wandb/run-20250822_175544-mg58khw0/files/requirements.txt ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ setuptools==78.1.1
2
+ wheel==0.45.1
3
+ pip==25.1
4
+ sentencepiece==0.1.99
5
+ mpmath==1.3.0
6
+ libclang==18.1.1
7
+ flatbuffers==25.2.10
8
+ zipp==3.23.0
9
+ wrapt==1.14.1
10
+ urllib3==2.5.0
11
+ typing_extensions==4.14.1
12
+ typeguard==2.13.3
13
+ tqdm==4.67.1
14
+ toml==0.10.2
15
+ termcolor==3.1.0
16
+ tensorflow-io-gcs-filesystem==0.37.1
17
+ tensorflow-estimator==2.15.0
18
+ tensorboard-data-server==0.7.2
19
+ sympy==1.14.0
20
+ smmap==5.0.2
21
+ six==1.17.0
22
+ safetensors==0.5.3
23
+ regex==2025.7.34
24
+ PyYAML==6.0.2
25
+ pyparsing==3.2.3
26
+ Pygments==2.19.2
27
+ pyasn1==0.6.1
28
+ psutil==7.0.0
29
+ protobuf==4.21.12
30
+ platformdirs==4.3.8
31
+ pillow==11.3.0
32
+ packaging==25.0
33
+ opt_einsum==3.4.0
34
+ oauthlib==3.3.1
35
+ nvidia-nvtx-cu12==12.1.105
36
+ nvidia-nvjitlink-cu12==12.9.86
37
+ nvidia-nccl-cu12==2.19.3
38
+ nvidia-curand-cu12==10.3.2.106
39
+ nvidia-cufft-cu12==11.0.2.54
40
+ nvidia-cuda-runtime-cu12==12.1.105
41
+ nvidia-cuda-nvrtc-cu12==12.1.105
42
+ nvidia-cuda-cupti-cu12==12.1.105
43
+ nvidia-cublas-cu12==12.1.3.1
44
+ numpy==1.26.4
45
+ networkx==3.4.2
46
+ mypy_extensions==1.1.0
47
+ mergedeep==1.3.4
48
+ mdurl==0.1.2
49
+ MarkupSafe==3.0.2
50
+ Markdown==3.8.2
51
+ kiwisolver==1.4.8
52
+ keras==2.15.0
53
+ importlib_resources==6.5.2
54
+ idna==3.10
55
+ hf-xet==1.1.5
56
+ grpcio==1.74.0
57
+ gast==0.6.0
58
+ fsspec==2025.7.0
59
+ fonttools==4.59.0
60
+ filelock==3.18.0
61
+ etils==1.13.0
62
+ einops==0.8.1
63
+ cycler==0.12.1
64
+ click==8.2.1
65
+ charset-normalizer==3.4.2
66
+ certifi==2025.8.3
67
+ cachetools==5.5.2
68
+ attrs==25.3.0
69
+ annotated-types==0.7.0
70
+ absl-py==2.3.1
71
+ Werkzeug==3.1.3
72
+ typing-inspection==0.4.1
73
+ typing-inspect==0.9.0
74
+ triton==2.2.0
75
+ trimesh==4.7.1
76
+ tensorflow-metadata==1.17.2
77
+ tensorflow-addons==0.23.0
78
+ sentry-sdk==2.34.1
79
+ scipy==1.15.3
80
+ rsa==4.9.1
81
+ requests==2.32.4
82
+ pyyaml-include==1.4.1
83
+ python-dateutil==2.9.0.post0
84
+ pydantic_core==2.33.2
85
+ pyasn1_modules==0.4.2
86
+ promise==2.3
87
+ OpenEXR==3.3.5
88
+ nvidia-cusparse-cu12==12.1.0.106
89
+ nvidia-cudnn-cu12==8.9.2.26
90
+ ml-dtypes==0.2.0
91
+ markdown-it-py==3.0.0
92
+ jsonlines==4.0.0
93
+ json-numpy==2.1.1
94
+ Jinja2==3.1.6
95
+ h5py==3.14.0
96
+ google-pasta==0.2.0
97
+ gitdb==4.0.12
98
+ dm-tree==0.1.9
99
+ contourpy==1.3.2
100
+ astunparse==1.6.3
101
+ rich==14.1.0
102
+ requests-oauthlib==2.0.0
103
+ pydantic==2.11.7
104
+ nvidia-cusolver-cu12==11.4.5.107
105
+ matplotlib==3.10.5
106
+ huggingface-hub==0.34.3
107
+ google-auth==2.40.3
108
+ GitPython==3.1.45
109
+ draccus==0.8.0
110
+ wandb==0.21.0
111
+ torch==2.2.0
112
+ tokenizers==0.19.1
113
+ google-auth-oauthlib==1.2.2
114
+ array_record==0.7.2
115
+ transformers==4.40.1
116
+ torchvision==0.17.0
117
+ torchaudio==2.2.0
118
+ tensorboard==2.15.2
119
+ accelerate==1.9.0
120
+ timm==0.9.10
121
+ tensorflow-datasets==4.9.3
122
+ tensorflow==2.15.0
123
+ peft==0.11.1
124
+ tensorflow-graphics==2021.12.3
125
+ dlimp==0.0.1
126
+ openvla==0.0.3
127
+ ninja==1.11.1.4
128
+ flash-attn==2.5.5
129
+ autocommand==2.2.2
130
+ backports.tarfile==1.2.0
131
+ importlib_metadata==8.0.0
132
+ inflect==7.3.1
133
+ jaraco.collections==5.1.0
134
+ jaraco.context==5.3.0
135
+ jaraco.functools==4.0.1
136
+ jaraco.text==3.12.1
137
+ more-itertools==10.3.0
138
+ packaging==24.2
139
+ platformdirs==4.2.2
140
+ tomli==2.0.1
141
+ typeguard==4.3.0
142
+ typing_extensions==4.12.2
143
+ wheel==0.45.1
144
+ zipp==3.19.2
wandb/run-20250822_175544-mg58khw0/files/wandb-metadata.json ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-6.8.0-60-generic-x86_64-with-glibc2.35",
3
+ "python": "CPython 3.10.18",
4
+ "startedAt": "2025-08-22T17:55:44.807018Z",
5
+ "args": [
6
+ "--pretrained_checkpoint",
7
+ "Stanford-ILIAD/prism-qwen25-extra-dinosiglip-224px-0_5b",
8
+ "--model_type",
9
+ "my_vla_qwen",
10
+ "--vla.type",
11
+ "myvla-qwen-224px+mx-mshab",
12
+ "--vla.expected_world_size",
13
+ "8",
14
+ "--vla.global_batch_size",
15
+ "512",
16
+ "--vla.per_device_batch_size",
17
+ "8",
18
+ "--vla.learning_rate",
19
+ "2e-5",
20
+ "--vla.freeze_vision_backbone",
21
+ "True",
22
+ "--vla.freeze_llm_backbone",
23
+ "False",
24
+ "--vla.use_flow_matching",
25
+ "False",
26
+ "--vla.compress_history",
27
+ "False",
28
+ "--vla.image_sequence_len",
29
+ "4",
30
+ "--vla.image_window_size",
31
+ "1",
32
+ "--vla.epochs",
33
+ "10",
34
+ "--save_interval",
35
+ "1000",
36
+ "--run_id",
37
+ "bl_multiview_depth_set_table"
38
+ ],
39
+ "program": "/lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py",
40
+ "codePath": "vla-scripts/train.py",
41
+ "codePathLocal": "vla-scripts/train.py",
42
+ "git": {
43
+ "remote": "https://github.com/TRS07170/myvla.git",
44
+ "commit": "409e4c9a165115624c271028e9b3ee335991b747"
45
+ },
46
+ "email": "traysen879@gmail.com",
47
+ "root": "myvla_exp/bl_multiview_depth_set_table",
48
+ "host": "164-152-109-69",
49
+ "executable": "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/bin/python3.10",
50
+ "cpu_count": 240,
51
+ "cpu_count_logical": 240,
52
+ "gpu": "NVIDIA A100-SXM4-80GB",
53
+ "gpu_count": 8,
54
+ "disk": {
55
+ "/": {
56
+ "total": "20812690710528",
57
+ "used": "36965580800"
58
+ }
59
+ },
60
+ "memory": {
61
+ "total": "1902324936704"
62
+ },
63
+ "gpu_nvidia": [
64
+ {
65
+ "name": "NVIDIA A100-SXM4-80GB",
66
+ "memoryTotal": "85899345920",
67
+ "cudaCores": 6912,
68
+ "architecture": "Ampere",
69
+ "uuid": "GPU-47bfdc91-9dec-cf54-0e0a-aa57ab6fb106"
70
+ },
71
+ {
72
+ "name": "NVIDIA A100-SXM4-80GB",
73
+ "memoryTotal": "85899345920",
74
+ "cudaCores": 6912,
75
+ "architecture": "Ampere",
76
+ "uuid": "GPU-55a7184b-b6dc-a8b3-67d5-a65679215c83"
77
+ },
78
+ {
79
+ "name": "NVIDIA A100-SXM4-80GB",
80
+ "memoryTotal": "85899345920",
81
+ "cudaCores": 6912,
82
+ "architecture": "Ampere",
83
+ "uuid": "GPU-1de758e0-e4a9-e2e9-027c-17f65db8a69e"
84
+ },
85
+ {
86
+ "name": "NVIDIA A100-SXM4-80GB",
87
+ "memoryTotal": "85899345920",
88
+ "cudaCores": 6912,
89
+ "architecture": "Ampere",
90
+ "uuid": "GPU-d7f94efd-7e10-156f-fe37-e505ae7b62b1"
91
+ },
92
+ {
93
+ "name": "NVIDIA A100-SXM4-80GB",
94
+ "memoryTotal": "85899345920",
95
+ "cudaCores": 6912,
96
+ "architecture": "Ampere",
97
+ "uuid": "GPU-813530b2-64f0-5fa3-3568-3811977d3b92"
98
+ },
99
+ {
100
+ "name": "NVIDIA A100-SXM4-80GB",
101
+ "memoryTotal": "85899345920",
102
+ "cudaCores": 6912,
103
+ "architecture": "Ampere",
104
+ "uuid": "GPU-7eac47dc-0da1-f6b2-d261-8ab3a5d4ed03"
105
+ },
106
+ {
107
+ "name": "NVIDIA A100-SXM4-80GB",
108
+ "memoryTotal": "85899345920",
109
+ "cudaCores": 6912,
110
+ "architecture": "Ampere",
111
+ "uuid": "GPU-335150e5-634c-68e2-4930-656c95e62244"
112
+ },
113
+ {
114
+ "name": "NVIDIA A100-SXM4-80GB",
115
+ "memoryTotal": "85899345920",
116
+ "cudaCores": 6912,
117
+ "architecture": "Ampere",
118
+ "uuid": "GPU-b3ee08d0-187c-8f80-06d5-c46759764c41"
119
+ }
120
+ ],
121
+ "cudaVersion": "12.4",
122
+ "writerId": "kigmkgl7ewlmvaaruo3ggp33h5rhvks0"
123
+ }
wandb/run-20250822_175544-mg58khw0/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"VLA Train/Loss (Raw)":0.1435861736536026,"VLA Train/Step Time":6.651643753051758,"_step":7160,"_timestamp":1.7559304174719603e+09,"_wandb":{"runtime":45084},"_runtime":45084.391904516,"VLA Train/Loss":0.1435861736536026,"VLA Train/Auxiliary Loss":0,"VLA Train/Action Token Accuracy":0.6699029207229614,"VLA Train/Step":7160,"VLA Train/Epoch":10,"VLA Train/Learning Rate":2e-05,"VLA Train/L1 Loss":0.07942128307633732}
wandb/run-20250822_175544-mg58khw0/logs/debug-core.log ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-08-22T17:55:44.993792062Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpd1k24cho/port-4004464.txt","pid":4004464,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
2
+ {"time":"2025-08-22T17:55:44.995074839Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":4004464}
3
+ {"time":"2025-08-22T17:55:44.995034278Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-4004464-4005403-1358423967/socket","Net":"unix"}}
4
+ {"time":"2025-08-22T17:55:45.098967428Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
5
+ {"time":"2025-08-22T17:55:45.118162901Z","level":"INFO","msg":"handleInformInit: received","streamId":"mg58khw0","id":"1(@)"}
6
+ {"time":"2025-08-22T17:55:45.417996836Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"mg58khw0","id":"1(@)"}
7
+ {"time":"2025-08-23T06:27:10.450725106Z","level":"INFO","msg":"handleInformFinish: finish message received","streamId":"mg58khw0","id":"1(@)"}
8
+ {"time":"2025-08-23T06:27:10.45584368Z","level":"INFO","msg":"handleInformFinish: stream closed","streamId":"mg58khw0","id":"1(@)"}
9
+ {"time":"2025-08-23T06:30:42.448001203Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
10
+ {"time":"2025-08-23T06:30:42.448106914Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
11
+ {"time":"2025-08-23T06:30:42.448117954Z","level":"INFO","msg":"server is shutting down"}
12
+ {"time":"2025-08-23T06:30:42.448183167Z","level":"INFO","msg":"connection: closing","id":"1(@)"}
13
+ {"time":"2025-08-23T06:30:42.44832581Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
14
+ {"time":"2025-08-23T06:30:42.448204876Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-4004464-4005403-1358423967/socket","Net":"unix"}}
15
+ {"time":"2025-08-23T06:30:42.44833905Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
16
+ {"time":"2025-08-23T06:30:42.448423102Z","level":"INFO","msg":"server is closed"}
wandb/run-20250822_175544-mg58khw0/logs/debug-internal.log ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-08-22T17:55:45.122997378Z","level":"INFO","msg":"stream: starting","core version":"0.21.0"}
2
+ {"time":"2025-08-22T17:55:45.417778442Z","level":"INFO","msg":"stream: created new stream","id":"mg58khw0"}
3
+ {"time":"2025-08-22T17:55:45.417944745Z","level":"INFO","msg":"stream: started","id":"mg58khw0"}
4
+ {"time":"2025-08-22T17:55:45.417987887Z","level":"INFO","msg":"writer: Do: started","stream_id":"mg58khw0"}
5
+ {"time":"2025-08-22T17:55:45.418028658Z","level":"INFO","msg":"sender: started","stream_id":"mg58khw0"}
6
+ {"time":"2025-08-22T17:55:45.418052908Z","level":"INFO","msg":"handler: started","stream_id":"mg58khw0"}
7
+ {"time":"2025-08-22T19:05:01.607571042Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
8
+ {"time":"2025-08-23T06:27:10.330396586Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
9
+ {"time":"2025-08-23T06:27:10.442620306Z","level":"INFO","msg":"handler: operation stats","stats":{}}
10
+ {"time":"2025-08-23T06:27:10.450768217Z","level":"INFO","msg":"stream: closing","id":"mg58khw0"}
11
+ {"time":"2025-08-23T06:27:10.450887019Z","level":"INFO","msg":"handler: closed","stream_id":"mg58khw0"}
12
+ {"time":"2025-08-23T06:27:10.450965602Z","level":"INFO","msg":"sender: closed","stream_id":"mg58khw0"}
13
+ {"time":"2025-08-23T06:27:10.45093435Z","level":"INFO","msg":"writer: Close: closed","stream_id":"mg58khw0"}
14
+ {"time":"2025-08-23T06:27:10.45361464Z","level":"INFO","msg":"stream: closed","id":"mg58khw0"}
wandb/run-20250822_175544-mg58khw0/logs/debug.log ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-08-22 17:55:44,851 INFO MainThread:4004464 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0
2
+ 2025-08-22 17:55:44,851 INFO MainThread:4004464 [wandb_setup.py:_flush():80] Configure stats pid to 4004464
3
+ 2025-08-22 17:55:44,851 INFO MainThread:4004464 [wandb_setup.py:_flush():80] Loading settings from /home/ubuntu/.config/wandb/settings
4
+ 2025-08-22 17:55:44,852 INFO MainThread:4004464 [wandb_setup.py:_flush():80] Loading settings from /lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/wandb/settings
5
+ 2025-08-22 17:55:44,852 INFO MainThread:4004464 [wandb_setup.py:_flush():80] Loading settings from environment variables
6
+ 2025-08-22 17:55:44,852 INFO MainThread:4004464 [wandb_init.py:setup_run_log_directory():703] Logging user logs to myvla_exp/bl_multiview_depth_set_table/wandb/run-20250822_175544-mg58khw0/logs/debug.log
7
+ 2025-08-22 17:55:44,852 INFO MainThread:4004464 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to myvla_exp/bl_multiview_depth_set_table/wandb/run-20250822_175544-mg58khw0/logs/debug-internal.log
8
+ 2025-08-22 17:55:44,852 INFO MainThread:4004464 [wandb_init.py:init():830] calling init triggers
9
+ 2025-08-22 17:55:44,852 INFO MainThread:4004464 [wandb_init.py:init():835] wandb.init called with sweep_config: {}
10
+ config: {'vla': {'type': 'myvla-qwen-224px+mx-mshab', 'vla_id': 'myvla-qwen-224px+mx-mshab', 'base_vlm': 'prism-qwen25-extra-dinosiglip-224px+0_5b', 'freeze_vision_backbone': True, 'freeze_llm_backbone': False, 'unfreeze_last_llm_layer': False, 'data_mix': 'bridge', 'shuffle_buffer_size': 256000, 'epochs': 10, 'max_steps': None, 'save_every_n_steps': 25000, 'expected_world_size': 8, 'global_batch_size': 512, 'per_device_batch_size': 8, 'learning_rate': 2e-05, 'weight_decay': 0.0, 'max_grad_norm': 1.0, 'lr_scheduler_type': 'constant', 'warmup_ratio': 0.0, 'train_strategy': 'fsdp-full-shard', 'action_tokenizer': 'extra_action_tokenizer', 'image_sequence_len': 4, 'use_wrist_image': True, 'compress_history': False, 'use_flow_matching': False, 'action_chunk_size': 8, 'enable_gradient_checkpointing': True, 'enable_mixed_precision_training': True, 'reduce_in_full_precision': True, 'image_window_size': 1}, 'model_type': 'my_vla_qwen', 'data_root_dir': '/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/.new_maniskill_data', 'run_root_dir': 'myvla_exp', 'pretrained_checkpoint': 'Stanford-ILIAD/prism-qwen25-extra-dinosiglip-224px-0_5b', 'is_resume': False, 'resume_step': None, 'resume_epoch': None, 'run_id': 'bl_multiview_depth_set_table', 'run_id_note': None, 'save_interval': 1000, 'image_aug': False, 'seed': 7, 'hf_token': '.hf_token', 'trackers': ['jsonl', 'wandb'], 'wandb_project': 'mshab_vla', 'wandb_entity': 'traysen879-uc-san-diego', 'global_pose': False, 'is_grasped': False, 'qpos': False, 'depth': False, 'segmentation': False, '_wandb': {}}
11
+ 2025-08-22 17:55:44,852 INFO MainThread:4004464 [wandb_init.py:init():871] starting backend
12
+ 2025-08-22 17:55:45,099 INFO MainThread:4004464 [wandb_init.py:init():874] sending inform_init request
13
+ 2025-08-22 17:55:45,108 INFO MainThread:4004464 [wandb_init.py:init():882] backend started and connected
14
+ 2025-08-22 17:55:45,114 INFO MainThread:4004464 [wandb_init.py:init():953] updated telemetry
15
+ 2025-08-22 17:55:45,150 INFO MainThread:4004464 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout
16
+ 2025-08-22 17:55:45,586 INFO MainThread:4004464 [wandb_init.py:init():1029] starting run threads in backend
17
+ 2025-08-22 17:55:46,161 INFO MainThread:4004464 [wandb_run.py:_console_start():2458] atexit reg
18
+ 2025-08-22 17:55:46,161 INFO MainThread:4004464 [wandb_run.py:_redirect():2306] redirect: wrap_raw
19
+ 2025-08-22 17:55:46,163 INFO MainThread:4004464 [wandb_run.py:_redirect():2375] Wrapping output streams.
20
+ 2025-08-22 17:55:46,163 INFO MainThread:4004464 [wandb_run.py:_redirect():2398] Redirects installed.
21
+ 2025-08-22 17:55:46,173 INFO MainThread:4004464 [wandb_init.py:init():1075] run started, returning control to user process
22
+ 2025-08-23 06:27:09,979 INFO MainThread:4004464 [wandb_run.py:_finish():2224] finishing run traysen879-uc-san-diego/mshab_vla/mg58khw0
23
+ 2025-08-23 06:27:09,980 INFO MainThread:4004464 [wandb_run.py:_atexit_cleanup():2423] got exitcode: 0
24
+ 2025-08-23 06:27:09,980 INFO MainThread:4004464 [wandb_run.py:_restore():2405] restore
25
+ 2025-08-23 06:27:09,981 INFO MainThread:4004464 [wandb_run.py:_restore():2411] restore done
26
+ 2025-08-23 06:27:10,444 INFO MainThread:4004464 [wandb_run.py:_footer_history_summary_info():3903] rendering history
27
+ 2025-08-23 06:27:10,446 INFO MainThread:4004464 [wandb_run.py:_footer_history_summary_info():3935] rendering summary
28
+ 2025-08-23 06:27:10,446 INFO MainThread:4004464 [wandb_run.py:_footer_sync_info():3864] logging synced files
wandb/run-20250822_175544-mg58khw0/run-mg58khw0.wandb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f171964c393d3a2a2a01822280a9c562b11ad093c2cefbe365d066562591d66
3
+ size 21823104