Upload folder using huggingface_hub
Browse files- .gitattributes +1 -0
- bl_multiview_depth_set_table.jsonl +0 -0
- checkpoints/step-004000-epoch-05-loss=0.2617.pt +3 -0
- checkpoints/step-007160-epoch-10-loss=0.1436.pt +3 -0
- config.json +58 -0
- config.yaml +54 -0
- run-metrics.jsonl +1 -0
- wandb/debug-internal.log +14 -0
- wandb/debug.log +28 -0
- wandb/run-20250822_172424-grd0n90q/files/config.yaml +210 -0
- wandb/run-20250822_172424-grd0n90q/files/output.log +75 -0
- wandb/run-20250822_172424-grd0n90q/files/requirements.txt +144 -0
- wandb/run-20250822_172424-grd0n90q/files/wandb-metadata.json +123 -0
- wandb/run-20250822_172424-grd0n90q/files/wandb-summary.json +1 -0
- wandb/run-20250822_172424-grd0n90q/logs/debug-core.log +14 -0
- wandb/run-20250822_172424-grd0n90q/logs/debug-internal.log +16 -0
- wandb/run-20250822_172424-grd0n90q/logs/debug.log +22 -0
- wandb/run-20250822_172424-grd0n90q/run-grd0n90q.wandb +0 -0
- wandb/run-20250822_175544-mg58khw0/files/config.yaml +211 -0
- wandb/run-20250822_175544-mg58khw0/files/output.log +3 -0
- wandb/run-20250822_175544-mg58khw0/files/requirements.txt +144 -0
- wandb/run-20250822_175544-mg58khw0/files/wandb-metadata.json +123 -0
- wandb/run-20250822_175544-mg58khw0/files/wandb-summary.json +1 -0
- wandb/run-20250822_175544-mg58khw0/logs/debug-core.log +16 -0
- wandb/run-20250822_175544-mg58khw0/logs/debug-internal.log +14 -0
- wandb/run-20250822_175544-mg58khw0/logs/debug.log +28 -0
- wandb/run-20250822_175544-mg58khw0/run-mg58khw0.wandb +3 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
wandb/run-20250822_175544-mg58khw0/run-mg58khw0.wandb filter=lfs diff=lfs merge=lfs -text
|
bl_multiview_depth_set_table.jsonl
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
checkpoints/step-004000-epoch-05-loss=0.2617.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:473480ba488abbfeebbca7147215571138c18985d2a92c13bb3b5e59604e59fd
|
| 3 |
+
size 4093057552
|
checkpoints/step-007160-epoch-10-loss=0.1436.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f801b6b80c5e6f235af6daf003b8b1a20811b6037367ccdec70f133fb56253b5
|
| 3 |
+
size 4093057552
|
config.json
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"data_root_dir": "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/.new_maniskill_data",
|
| 3 |
+
"depth": false,
|
| 4 |
+
"global_pose": false,
|
| 5 |
+
"hf_token": ".hf_token",
|
| 6 |
+
"image_aug": false,
|
| 7 |
+
"is_grasped": false,
|
| 8 |
+
"is_resume": false,
|
| 9 |
+
"model_type": "my_vla_qwen",
|
| 10 |
+
"pretrained_checkpoint": "Stanford-ILIAD/prism-qwen25-extra-dinosiglip-224px-0_5b",
|
| 11 |
+
"qpos": false,
|
| 12 |
+
"resume_epoch": null,
|
| 13 |
+
"resume_step": null,
|
| 14 |
+
"run_id": "bl_multiview_depth_set_table",
|
| 15 |
+
"run_id_note": null,
|
| 16 |
+
"run_root_dir": "myvla_exp",
|
| 17 |
+
"save_interval": 1000,
|
| 18 |
+
"seed": 7,
|
| 19 |
+
"segmentation": false,
|
| 20 |
+
"trackers": [
|
| 21 |
+
"jsonl",
|
| 22 |
+
"wandb"
|
| 23 |
+
],
|
| 24 |
+
"vla": {
|
| 25 |
+
"action_chunk_size": 8,
|
| 26 |
+
"action_tokenizer": "extra_action_tokenizer",
|
| 27 |
+
"base_vlm": "prism-qwen25-extra-dinosiglip-224px+0_5b",
|
| 28 |
+
"compress_history": false,
|
| 29 |
+
"data_mix": "bridge",
|
| 30 |
+
"enable_gradient_checkpointing": true,
|
| 31 |
+
"enable_mixed_precision_training": true,
|
| 32 |
+
"epochs": 10,
|
| 33 |
+
"expected_world_size": 8,
|
| 34 |
+
"freeze_llm_backbone": false,
|
| 35 |
+
"freeze_vision_backbone": true,
|
| 36 |
+
"global_batch_size": 512,
|
| 37 |
+
"image_sequence_len": 4,
|
| 38 |
+
"image_window_size": 1,
|
| 39 |
+
"learning_rate": 2e-05,
|
| 40 |
+
"lr_scheduler_type": "constant",
|
| 41 |
+
"max_grad_norm": 1.0,
|
| 42 |
+
"max_steps": null,
|
| 43 |
+
"per_device_batch_size": 8,
|
| 44 |
+
"reduce_in_full_precision": true,
|
| 45 |
+
"save_every_n_steps": 25000,
|
| 46 |
+
"shuffle_buffer_size": 256000,
|
| 47 |
+
"train_strategy": "fsdp-full-shard",
|
| 48 |
+
"type": "myvla-qwen-224px+mx-mshab",
|
| 49 |
+
"unfreeze_last_llm_layer": false,
|
| 50 |
+
"use_flow_matching": false,
|
| 51 |
+
"use_wrist_image": true,
|
| 52 |
+
"vla_id": "myvla-qwen-224px+mx-mshab",
|
| 53 |
+
"warmup_ratio": 0.0,
|
| 54 |
+
"weight_decay": 0.0
|
| 55 |
+
},
|
| 56 |
+
"wandb_entity": "traysen879-uc-san-diego",
|
| 57 |
+
"wandb_project": "mshab_vla"
|
| 58 |
+
}
|
config.yaml
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
data_root_dir: /home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/.new_maniskill_data
|
| 2 |
+
depth: false
|
| 3 |
+
global_pose: false
|
| 4 |
+
hf_token: .hf_token
|
| 5 |
+
image_aug: false
|
| 6 |
+
is_grasped: false
|
| 7 |
+
is_resume: false
|
| 8 |
+
model_type: my_vla_qwen
|
| 9 |
+
pretrained_checkpoint: Stanford-ILIAD/prism-qwen25-extra-dinosiglip-224px-0_5b
|
| 10 |
+
qpos: false
|
| 11 |
+
resume_epoch: null
|
| 12 |
+
resume_step: null
|
| 13 |
+
run_id: bl_multiview_depth_set_table
|
| 14 |
+
run_id_note: null
|
| 15 |
+
run_root_dir: myvla_exp
|
| 16 |
+
save_interval: 1000
|
| 17 |
+
seed: 7
|
| 18 |
+
segmentation: false
|
| 19 |
+
trackers:
|
| 20 |
+
- jsonl
|
| 21 |
+
- wandb
|
| 22 |
+
vla:
|
| 23 |
+
action_chunk_size: 8
|
| 24 |
+
action_tokenizer: extra_action_tokenizer
|
| 25 |
+
base_vlm: prism-qwen25-extra-dinosiglip-224px+0_5b
|
| 26 |
+
compress_history: false
|
| 27 |
+
data_mix: bridge
|
| 28 |
+
enable_gradient_checkpointing: true
|
| 29 |
+
enable_mixed_precision_training: true
|
| 30 |
+
epochs: 10
|
| 31 |
+
expected_world_size: 8
|
| 32 |
+
freeze_llm_backbone: false
|
| 33 |
+
freeze_vision_backbone: true
|
| 34 |
+
global_batch_size: 512
|
| 35 |
+
image_sequence_len: 4
|
| 36 |
+
image_window_size: 1
|
| 37 |
+
learning_rate: 2.0e-05
|
| 38 |
+
lr_scheduler_type: constant
|
| 39 |
+
max_grad_norm: 1.0
|
| 40 |
+
max_steps: null
|
| 41 |
+
per_device_batch_size: 8
|
| 42 |
+
reduce_in_full_precision: true
|
| 43 |
+
save_every_n_steps: 25000
|
| 44 |
+
shuffle_buffer_size: 256000
|
| 45 |
+
train_strategy: fsdp-full-shard
|
| 46 |
+
type: myvla-qwen-224px+mx-mshab
|
| 47 |
+
unfreeze_last_llm_layer: false
|
| 48 |
+
use_flow_matching: false
|
| 49 |
+
use_wrist_image: true
|
| 50 |
+
vla_id: myvla-qwen-224px+mx-mshab
|
| 51 |
+
warmup_ratio: 0.0
|
| 52 |
+
weight_decay: 0.0
|
| 53 |
+
wandb_entity: traysen879-uc-san-diego
|
| 54 |
+
wandb_project: mshab_vla
|
run-metrics.jsonl
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"hparams": {"data_root_dir": "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/.new_maniskill_data", "depth": false, "global_pose": false, "hf_token": ".hf_token", "image_aug": false, "is_grasped": false, "is_resume": false, "model_type": "my_vla_qwen", "pretrained_checkpoint": "Stanford-ILIAD/prism-qwen25-extra-dinosiglip-224px-0_5b", "qpos": false, "resume_epoch": null, "resume_step": null, "run_id": "bl_multiview_depth_set_table", "run_id_note": null, "run_root_dir": "myvla_exp", "save_interval": 1000, "seed": 7, "segmentation": false, "trackers": ["jsonl", "wandb"], "vla": {"action_chunk_size": 8, "action_tokenizer": "extra_action_tokenizer", "base_vlm": "prism-qwen25-extra-dinosiglip-224px+0_5b", "compress_history": false, "data_mix": "bridge", "enable_gradient_checkpointing": true, "enable_mixed_precision_training": true, "epochs": 10, "expected_world_size": 8, "freeze_llm_backbone": false, "freeze_vision_backbone": true, "global_batch_size": 512, "image_sequence_len": 4, "image_window_size": 1, "learning_rate": 2e-05, "lr_scheduler_type": "constant", "max_grad_norm": 1.0, "max_steps": null, "per_device_batch_size": 8, "reduce_in_full_precision": true, "save_every_n_steps": 25000, "shuffle_buffer_size": 256000, "train_strategy": "fsdp-full-shard", "type": "myvla-qwen-224px+mx-mshab", "unfreeze_last_llm_layer": false, "use_flow_matching": false, "use_wrist_image": true, "vla_id": "myvla-qwen-224px+mx-mshab", "warmup_ratio": 0.0, "weight_decay": 0.0}, "wandb_entity": "traysen879-uc-san-diego", "wandb_project": "mshab_vla"}, "run_id": "bl_multiview_depth_set_table"}
|
wandb/debug-internal.log
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2025-08-22T17:55:45.122997378Z","level":"INFO","msg":"stream: starting","core version":"0.21.0"}
|
| 2 |
+
{"time":"2025-08-22T17:55:45.417778442Z","level":"INFO","msg":"stream: created new stream","id":"mg58khw0"}
|
| 3 |
+
{"time":"2025-08-22T17:55:45.417944745Z","level":"INFO","msg":"stream: started","id":"mg58khw0"}
|
| 4 |
+
{"time":"2025-08-22T17:55:45.417987887Z","level":"INFO","msg":"writer: Do: started","stream_id":"mg58khw0"}
|
| 5 |
+
{"time":"2025-08-22T17:55:45.418028658Z","level":"INFO","msg":"sender: started","stream_id":"mg58khw0"}
|
| 6 |
+
{"time":"2025-08-22T17:55:45.418052908Z","level":"INFO","msg":"handler: started","stream_id":"mg58khw0"}
|
| 7 |
+
{"time":"2025-08-22T19:05:01.607571042Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
|
| 8 |
+
{"time":"2025-08-23T06:27:10.330396586Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
|
| 9 |
+
{"time":"2025-08-23T06:27:10.442620306Z","level":"INFO","msg":"handler: operation stats","stats":{}}
|
| 10 |
+
{"time":"2025-08-23T06:27:10.450768217Z","level":"INFO","msg":"stream: closing","id":"mg58khw0"}
|
| 11 |
+
{"time":"2025-08-23T06:27:10.450887019Z","level":"INFO","msg":"handler: closed","stream_id":"mg58khw0"}
|
| 12 |
+
{"time":"2025-08-23T06:27:10.450965602Z","level":"INFO","msg":"sender: closed","stream_id":"mg58khw0"}
|
| 13 |
+
{"time":"2025-08-23T06:27:10.45093435Z","level":"INFO","msg":"writer: Close: closed","stream_id":"mg58khw0"}
|
| 14 |
+
{"time":"2025-08-23T06:27:10.45361464Z","level":"INFO","msg":"stream: closed","id":"mg58khw0"}
|
wandb/debug.log
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2025-08-22 17:55:44,851 INFO MainThread:4004464 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0
|
| 2 |
+
2025-08-22 17:55:44,851 INFO MainThread:4004464 [wandb_setup.py:_flush():80] Configure stats pid to 4004464
|
| 3 |
+
2025-08-22 17:55:44,851 INFO MainThread:4004464 [wandb_setup.py:_flush():80] Loading settings from /home/ubuntu/.config/wandb/settings
|
| 4 |
+
2025-08-22 17:55:44,852 INFO MainThread:4004464 [wandb_setup.py:_flush():80] Loading settings from /lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/wandb/settings
|
| 5 |
+
2025-08-22 17:55:44,852 INFO MainThread:4004464 [wandb_setup.py:_flush():80] Loading settings from environment variables
|
| 6 |
+
2025-08-22 17:55:44,852 INFO MainThread:4004464 [wandb_init.py:setup_run_log_directory():703] Logging user logs to myvla_exp/bl_multiview_depth_set_table/wandb/run-20250822_175544-mg58khw0/logs/debug.log
|
| 7 |
+
2025-08-22 17:55:44,852 INFO MainThread:4004464 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to myvla_exp/bl_multiview_depth_set_table/wandb/run-20250822_175544-mg58khw0/logs/debug-internal.log
|
| 8 |
+
2025-08-22 17:55:44,852 INFO MainThread:4004464 [wandb_init.py:init():830] calling init triggers
|
| 9 |
+
2025-08-22 17:55:44,852 INFO MainThread:4004464 [wandb_init.py:init():835] wandb.init called with sweep_config: {}
|
| 10 |
+
config: {'vla': {'type': 'myvla-qwen-224px+mx-mshab', 'vla_id': 'myvla-qwen-224px+mx-mshab', 'base_vlm': 'prism-qwen25-extra-dinosiglip-224px+0_5b', 'freeze_vision_backbone': True, 'freeze_llm_backbone': False, 'unfreeze_last_llm_layer': False, 'data_mix': 'bridge', 'shuffle_buffer_size': 256000, 'epochs': 10, 'max_steps': None, 'save_every_n_steps': 25000, 'expected_world_size': 8, 'global_batch_size': 512, 'per_device_batch_size': 8, 'learning_rate': 2e-05, 'weight_decay': 0.0, 'max_grad_norm': 1.0, 'lr_scheduler_type': 'constant', 'warmup_ratio': 0.0, 'train_strategy': 'fsdp-full-shard', 'action_tokenizer': 'extra_action_tokenizer', 'image_sequence_len': 4, 'use_wrist_image': True, 'compress_history': False, 'use_flow_matching': False, 'action_chunk_size': 8, 'enable_gradient_checkpointing': True, 'enable_mixed_precision_training': True, 'reduce_in_full_precision': True, 'image_window_size': 1}, 'model_type': 'my_vla_qwen', 'data_root_dir': '/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/.new_maniskill_data', 'run_root_dir': 'myvla_exp', 'pretrained_checkpoint': 'Stanford-ILIAD/prism-qwen25-extra-dinosiglip-224px-0_5b', 'is_resume': False, 'resume_step': None, 'resume_epoch': None, 'run_id': 'bl_multiview_depth_set_table', 'run_id_note': None, 'save_interval': 1000, 'image_aug': False, 'seed': 7, 'hf_token': '.hf_token', 'trackers': ['jsonl', 'wandb'], 'wandb_project': 'mshab_vla', 'wandb_entity': 'traysen879-uc-san-diego', 'global_pose': False, 'is_grasped': False, 'qpos': False, 'depth': False, 'segmentation': False, '_wandb': {}}
|
| 11 |
+
2025-08-22 17:55:44,852 INFO MainThread:4004464 [wandb_init.py:init():871] starting backend
|
| 12 |
+
2025-08-22 17:55:45,099 INFO MainThread:4004464 [wandb_init.py:init():874] sending inform_init request
|
| 13 |
+
2025-08-22 17:55:45,108 INFO MainThread:4004464 [wandb_init.py:init():882] backend started and connected
|
| 14 |
+
2025-08-22 17:55:45,114 INFO MainThread:4004464 [wandb_init.py:init():953] updated telemetry
|
| 15 |
+
2025-08-22 17:55:45,150 INFO MainThread:4004464 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout
|
| 16 |
+
2025-08-22 17:55:45,586 INFO MainThread:4004464 [wandb_init.py:init():1029] starting run threads in backend
|
| 17 |
+
2025-08-22 17:55:46,161 INFO MainThread:4004464 [wandb_run.py:_console_start():2458] atexit reg
|
| 18 |
+
2025-08-22 17:55:46,161 INFO MainThread:4004464 [wandb_run.py:_redirect():2306] redirect: wrap_raw
|
| 19 |
+
2025-08-22 17:55:46,163 INFO MainThread:4004464 [wandb_run.py:_redirect():2375] Wrapping output streams.
|
| 20 |
+
2025-08-22 17:55:46,163 INFO MainThread:4004464 [wandb_run.py:_redirect():2398] Redirects installed.
|
| 21 |
+
2025-08-22 17:55:46,173 INFO MainThread:4004464 [wandb_init.py:init():1075] run started, returning control to user process
|
| 22 |
+
2025-08-23 06:27:09,979 INFO MainThread:4004464 [wandb_run.py:_finish():2224] finishing run traysen879-uc-san-diego/mshab_vla/mg58khw0
|
| 23 |
+
2025-08-23 06:27:09,980 INFO MainThread:4004464 [wandb_run.py:_atexit_cleanup():2423] got exitcode: 0
|
| 24 |
+
2025-08-23 06:27:09,980 INFO MainThread:4004464 [wandb_run.py:_restore():2405] restore
|
| 25 |
+
2025-08-23 06:27:09,981 INFO MainThread:4004464 [wandb_run.py:_restore():2411] restore done
|
| 26 |
+
2025-08-23 06:27:10,444 INFO MainThread:4004464 [wandb_run.py:_footer_history_summary_info():3903] rendering history
|
| 27 |
+
2025-08-23 06:27:10,446 INFO MainThread:4004464 [wandb_run.py:_footer_history_summary_info():3935] rendering summary
|
| 28 |
+
2025-08-23 06:27:10,446 INFO MainThread:4004464 [wandb_run.py:_footer_sync_info():3864] logging synced files
|
wandb/run-20250822_172424-grd0n90q/files/config.yaml
ADDED
|
@@ -0,0 +1,210 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
_wandb:
|
| 2 |
+
value:
|
| 3 |
+
cli_version: 0.21.0
|
| 4 |
+
e:
|
| 5 |
+
dnq1lm07509oy3nh24mhk899wrek9b94:
|
| 6 |
+
args:
|
| 7 |
+
- --pretrained_checkpoint
|
| 8 |
+
- Stanford-ILIAD/prism-qwen25-extra-dinosiglip-224px-0_5b
|
| 9 |
+
- --model_type
|
| 10 |
+
- my_vla_qwen
|
| 11 |
+
- --vla.type
|
| 12 |
+
- myvla-qwen-224px+mx-mshab
|
| 13 |
+
- --vla.expected_world_size
|
| 14 |
+
- "8"
|
| 15 |
+
- --vla.global_batch_size
|
| 16 |
+
- "512"
|
| 17 |
+
- --vla.per_device_batch_size
|
| 18 |
+
- "8"
|
| 19 |
+
- --vla.learning_rate
|
| 20 |
+
- "2e-5"
|
| 21 |
+
- --vla.freeze_vision_backbone
|
| 22 |
+
- "True"
|
| 23 |
+
- --vla.freeze_llm_backbone
|
| 24 |
+
- "False"
|
| 25 |
+
- --vla.use_flow_matching
|
| 26 |
+
- "False"
|
| 27 |
+
- --vla.compress_history
|
| 28 |
+
- "False"
|
| 29 |
+
- --vla.image_sequence_len
|
| 30 |
+
- "4"
|
| 31 |
+
- --vla.image_window_size
|
| 32 |
+
- "1"
|
| 33 |
+
- --vla.epochs
|
| 34 |
+
- "10"
|
| 35 |
+
- --save_interval
|
| 36 |
+
- "1000"
|
| 37 |
+
- --run_id
|
| 38 |
+
- bl_multiview_depth_set_table
|
| 39 |
+
codePath: vla-scripts/train.py
|
| 40 |
+
codePathLocal: vla-scripts/train.py
|
| 41 |
+
cpu_count: 240
|
| 42 |
+
cpu_count_logical: 240
|
| 43 |
+
cudaVersion: "12.4"
|
| 44 |
+
disk:
|
| 45 |
+
/:
|
| 46 |
+
total: "20812690710528"
|
| 47 |
+
used: "36965724160"
|
| 48 |
+
email: traysen879@gmail.com
|
| 49 |
+
executable: /home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/bin/python3.10
|
| 50 |
+
git:
|
| 51 |
+
commit: 409e4c9a165115624c271028e9b3ee335991b747
|
| 52 |
+
remote: https://github.com/TRS07170/myvla.git
|
| 53 |
+
gpu: NVIDIA A100-SXM4-80GB
|
| 54 |
+
gpu_count: 8
|
| 55 |
+
gpu_nvidia:
|
| 56 |
+
- architecture: Ampere
|
| 57 |
+
cudaCores: 6912
|
| 58 |
+
memoryTotal: "85899345920"
|
| 59 |
+
name: NVIDIA A100-SXM4-80GB
|
| 60 |
+
uuid: GPU-47bfdc91-9dec-cf54-0e0a-aa57ab6fb106
|
| 61 |
+
- architecture: Ampere
|
| 62 |
+
cudaCores: 6912
|
| 63 |
+
memoryTotal: "85899345920"
|
| 64 |
+
name: NVIDIA A100-SXM4-80GB
|
| 65 |
+
uuid: GPU-55a7184b-b6dc-a8b3-67d5-a65679215c83
|
| 66 |
+
- architecture: Ampere
|
| 67 |
+
cudaCores: 6912
|
| 68 |
+
memoryTotal: "85899345920"
|
| 69 |
+
name: NVIDIA A100-SXM4-80GB
|
| 70 |
+
uuid: GPU-1de758e0-e4a9-e2e9-027c-17f65db8a69e
|
| 71 |
+
- architecture: Ampere
|
| 72 |
+
cudaCores: 6912
|
| 73 |
+
memoryTotal: "85899345920"
|
| 74 |
+
name: NVIDIA A100-SXM4-80GB
|
| 75 |
+
uuid: GPU-d7f94efd-7e10-156f-fe37-e505ae7b62b1
|
| 76 |
+
- architecture: Ampere
|
| 77 |
+
cudaCores: 6912
|
| 78 |
+
memoryTotal: "85899345920"
|
| 79 |
+
name: NVIDIA A100-SXM4-80GB
|
| 80 |
+
uuid: GPU-813530b2-64f0-5fa3-3568-3811977d3b92
|
| 81 |
+
- architecture: Ampere
|
| 82 |
+
cudaCores: 6912
|
| 83 |
+
memoryTotal: "85899345920"
|
| 84 |
+
name: NVIDIA A100-SXM4-80GB
|
| 85 |
+
uuid: GPU-7eac47dc-0da1-f6b2-d261-8ab3a5d4ed03
|
| 86 |
+
- architecture: Ampere
|
| 87 |
+
cudaCores: 6912
|
| 88 |
+
memoryTotal: "85899345920"
|
| 89 |
+
name: NVIDIA A100-SXM4-80GB
|
| 90 |
+
uuid: GPU-335150e5-634c-68e2-4930-656c95e62244
|
| 91 |
+
- architecture: Ampere
|
| 92 |
+
cudaCores: 6912
|
| 93 |
+
memoryTotal: "85899345920"
|
| 94 |
+
name: NVIDIA A100-SXM4-80GB
|
| 95 |
+
uuid: GPU-b3ee08d0-187c-8f80-06d5-c46759764c41
|
| 96 |
+
host: 164-152-109-69
|
| 97 |
+
memory:
|
| 98 |
+
total: "1902324936704"
|
| 99 |
+
os: Linux-6.8.0-60-generic-x86_64-with-glibc2.35
|
| 100 |
+
program: /lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py
|
| 101 |
+
python: CPython 3.10.18
|
| 102 |
+
root: myvla_exp/bl_multiview_depth_set_table
|
| 103 |
+
startedAt: "2025-08-22T17:24:24.381806Z"
|
| 104 |
+
writerId: dnq1lm07509oy3nh24mhk899wrek9b94
|
| 105 |
+
m: []
|
| 106 |
+
python_version: 3.10.18
|
| 107 |
+
t:
|
| 108 |
+
"1":
|
| 109 |
+
- 1
|
| 110 |
+
- 2
|
| 111 |
+
- 3
|
| 112 |
+
- 11
|
| 113 |
+
- 41
|
| 114 |
+
- 49
|
| 115 |
+
- 63
|
| 116 |
+
- 71
|
| 117 |
+
"2":
|
| 118 |
+
- 1
|
| 119 |
+
- 2
|
| 120 |
+
- 3
|
| 121 |
+
- 11
|
| 122 |
+
- 41
|
| 123 |
+
- 49
|
| 124 |
+
- 63
|
| 125 |
+
- 71
|
| 126 |
+
"3":
|
| 127 |
+
- 13
|
| 128 |
+
- 16
|
| 129 |
+
- 61
|
| 130 |
+
"4": 3.10.18
|
| 131 |
+
"5": 0.21.0
|
| 132 |
+
"6": 4.40.1
|
| 133 |
+
"12": 0.21.0
|
| 134 |
+
"13": linux-x86_64
|
| 135 |
+
data_root_dir:
|
| 136 |
+
value: /home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/.new_maniskill_data
|
| 137 |
+
depth:
|
| 138 |
+
value: false
|
| 139 |
+
global_pose:
|
| 140 |
+
value: false
|
| 141 |
+
hf_token:
|
| 142 |
+
value: .hf_token
|
| 143 |
+
image_aug:
|
| 144 |
+
value: false
|
| 145 |
+
is_grasped:
|
| 146 |
+
value: false
|
| 147 |
+
is_resume:
|
| 148 |
+
value: false
|
| 149 |
+
model_type:
|
| 150 |
+
value: my_vla_qwen
|
| 151 |
+
pretrained_checkpoint:
|
| 152 |
+
value: Stanford-ILIAD/prism-qwen25-extra-dinosiglip-224px-0_5b
|
| 153 |
+
qpos:
|
| 154 |
+
value: false
|
| 155 |
+
resume_epoch:
|
| 156 |
+
value: null
|
| 157 |
+
resume_step:
|
| 158 |
+
value: null
|
| 159 |
+
run_id:
|
| 160 |
+
value: bl_multiview_depth_set_table
|
| 161 |
+
run_id_note:
|
| 162 |
+
value: null
|
| 163 |
+
run_root_dir:
|
| 164 |
+
value: myvla_exp
|
| 165 |
+
save_interval:
|
| 166 |
+
value: 1000
|
| 167 |
+
seed:
|
| 168 |
+
value: 7
|
| 169 |
+
segmentation:
|
| 170 |
+
value: false
|
| 171 |
+
trackers:
|
| 172 |
+
value:
|
| 173 |
+
- jsonl
|
| 174 |
+
- wandb
|
| 175 |
+
vla:
|
| 176 |
+
value:
|
| 177 |
+
action_chunk_size: 8
|
| 178 |
+
action_tokenizer: extra_action_tokenizer
|
| 179 |
+
base_vlm: prism-qwen25-extra-dinosiglip-224px+0_5b
|
| 180 |
+
compress_history: false
|
| 181 |
+
data_mix: bridge
|
| 182 |
+
enable_gradient_checkpointing: true
|
| 183 |
+
enable_mixed_precision_training: true
|
| 184 |
+
epochs: 10
|
| 185 |
+
expected_world_size: 8
|
| 186 |
+
freeze_llm_backbone: false
|
| 187 |
+
freeze_vision_backbone: true
|
| 188 |
+
global_batch_size: 512
|
| 189 |
+
image_sequence_len: 4
|
| 190 |
+
image_window_size: 1
|
| 191 |
+
learning_rate: 2e-05
|
| 192 |
+
lr_scheduler_type: constant
|
| 193 |
+
max_grad_norm: 1
|
| 194 |
+
max_steps: null
|
| 195 |
+
per_device_batch_size: 8
|
| 196 |
+
reduce_in_full_precision: true
|
| 197 |
+
save_every_n_steps: 25000
|
| 198 |
+
shuffle_buffer_size: 256000
|
| 199 |
+
train_strategy: fsdp-full-shard
|
| 200 |
+
type: myvla-qwen-224px+mx-mshab
|
| 201 |
+
unfreeze_last_llm_layer: false
|
| 202 |
+
use_flow_matching: false
|
| 203 |
+
use_wrist_image: true
|
| 204 |
+
vla_id: myvla-qwen-224px+mx-mshab
|
| 205 |
+
warmup_ratio: 0
|
| 206 |
+
weight_decay: 0
|
| 207 |
+
wandb_entity:
|
| 208 |
+
value: traysen879-uc-san-diego
|
| 209 |
+
wandb_project:
|
| 210 |
+
value: mshab_vla
|
wandb/run-20250822_172424-grd0n90q/files/output.log
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[2;36m08/22 [17:24:25][0m[2;36m [0m[34mINFO [0m | >> [1m[[0m*[1m][0m Starting VLA Training Loop ]8;id=817857;file:///lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py\[2mtrain.py[0m]8;;\[2m:[0m]8;id=998125;file:///lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py#322\[2m322[0m]8;;\
|
| 2 |
+
Traceback (most recent call last):
|
| 3 |
+
File "/lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py", line 342, in <module>
|
| 4 |
+
train()
|
| 5 |
+
File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/draccus/argparsing.py", line 203, in wrapper_inner
|
| 6 |
+
response = fn(cfg, *args, **kwargs)
|
| 7 |
+
File "/lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py", line 323, in train
|
| 8 |
+
train_strategy.run_vla_training(
|
| 9 |
+
File "/lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/prismatic/training/strategies/base_strategy.py", line 342, in run_vla_training
|
| 10 |
+
output, aux_loss = self.vlm(
|
| 11 |
+
File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
|
| 12 |
+
return self._call_impl(*args, **kwargs)
|
| 13 |
+
File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
|
| 14 |
+
return forward_call(*args, **kwargs)
|
| 15 |
+
File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", line 849, in forward
|
| 16 |
+
output = self._fsdp_wrapped_module(*args, **kwargs)
|
| 17 |
+
File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
|
| 18 |
+
return self._call_impl(*args, **kwargs)
|
| 19 |
+
File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
|
| 20 |
+
return forward_call(*args, **kwargs)
|
| 21 |
+
File "/lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/prismatic/models/vlas/myvla.py", line 277, in forward
|
| 22 |
+
output = super().forward(
|
| 23 |
+
File "/lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/prismatic/models/vlms/prismatic.py", line 373, in forward
|
| 24 |
+
patch_features = self.vision_backbone({k: pixel_values[k][multimodal_indices] for k in pixel_values})
|
| 25 |
+
File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
|
| 26 |
+
return self._call_impl(*args, **kwargs)
|
| 27 |
+
File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
|
| 28 |
+
return forward_call(*args, **kwargs)
|
| 29 |
+
File "/lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/prismatic/models/backbones/vision/dinosiglip_vit.py", line 169, in forward
|
| 30 |
+
patches = compute_sequence_patches(pixel_values, featurizers, self.image_sequence_len)
|
| 31 |
+
File "/lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/prismatic/models/backbones/vision/base_vision.py", line 47, in compute_sequence_patches
|
| 32 |
+
patches[k] = merge_two_dims(sequence_combine_call_split(trunc_pixels_k, featurizers[k]), start_dim=1)
|
| 33 |
+
File "/lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/prismatic/util/torch_utils.py", line 106, in sequence_combine_call_split
|
| 34 |
+
flat_outputs = fn(flat_sequence)
|
| 35 |
+
File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
|
| 36 |
+
return self._call_impl(*args, **kwargs)
|
| 37 |
+
File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
|
| 38 |
+
return forward_call(*args, **kwargs)
|
| 39 |
+
File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", line 849, in forward
|
| 40 |
+
output = self._fsdp_wrapped_module(*args, **kwargs)
|
| 41 |
+
File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
|
| 42 |
+
return self._call_impl(*args, **kwargs)
|
| 43 |
+
File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
|
| 44 |
+
return forward_call(*args, **kwargs)
|
| 45 |
+
File "/lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/prismatic/models/backbones/vision/base_vision.py", line 31, in wrapper
|
| 46 |
+
result = fn(*args, **kwargs)
|
| 47 |
+
File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/timm/models/vision_transformer.py", line 644, in get_intermediate_layers
|
| 48 |
+
outputs = self._intermediate_layers(x, n)
|
| 49 |
+
File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/timm/models/vision_transformer.py", line 626, in _intermediate_layers
|
| 50 |
+
x = blk(x)
|
| 51 |
+
File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
|
| 52 |
+
return self._call_impl(*args, **kwargs)
|
| 53 |
+
File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
|
| 54 |
+
return forward_call(*args, **kwargs)
|
| 55 |
+
File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", line 849, in forward
|
| 56 |
+
output = self._fsdp_wrapped_module(*args, **kwargs)
|
| 57 |
+
File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
|
| 58 |
+
return self._call_impl(*args, **kwargs)
|
| 59 |
+
File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
|
| 60 |
+
return forward_call(*args, **kwargs)
|
| 61 |
+
File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/timm/models/vision_transformer.py", line 157, in forward
|
| 62 |
+
x = x + self.drop_path2(self.ls2(self.mlp(self.norm2(x))))
|
| 63 |
+
File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
|
| 64 |
+
return self._call_impl(*args, **kwargs)
|
| 65 |
+
File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
|
| 66 |
+
return forward_call(*args, **kwargs)
|
| 67 |
+
File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/timm/layers/mlp.py", line 46, in forward
|
| 68 |
+
x = self.fc2(x)
|
| 69 |
+
File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
|
| 70 |
+
return self._call_impl(*args, **kwargs)
|
| 71 |
+
File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
|
| 72 |
+
return forward_call(*args, **kwargs)
|
| 73 |
+
File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/torch/nn/modules/linear.py", line 116, in forward
|
| 74 |
+
return F.linear(input, self.weight, self.bias)
|
| 75 |
+
KeyboardInterrupt
|
wandb/run-20250822_172424-grd0n90q/files/requirements.txt
ADDED
|
@@ -0,0 +1,144 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
setuptools==78.1.1
|
| 2 |
+
wheel==0.45.1
|
| 3 |
+
pip==25.1
|
| 4 |
+
sentencepiece==0.1.99
|
| 5 |
+
mpmath==1.3.0
|
| 6 |
+
libclang==18.1.1
|
| 7 |
+
flatbuffers==25.2.10
|
| 8 |
+
zipp==3.23.0
|
| 9 |
+
wrapt==1.14.1
|
| 10 |
+
urllib3==2.5.0
|
| 11 |
+
typing_extensions==4.14.1
|
| 12 |
+
typeguard==2.13.3
|
| 13 |
+
tqdm==4.67.1
|
| 14 |
+
toml==0.10.2
|
| 15 |
+
termcolor==3.1.0
|
| 16 |
+
tensorflow-io-gcs-filesystem==0.37.1
|
| 17 |
+
tensorflow-estimator==2.15.0
|
| 18 |
+
tensorboard-data-server==0.7.2
|
| 19 |
+
sympy==1.14.0
|
| 20 |
+
smmap==5.0.2
|
| 21 |
+
six==1.17.0
|
| 22 |
+
safetensors==0.5.3
|
| 23 |
+
regex==2025.7.34
|
| 24 |
+
PyYAML==6.0.2
|
| 25 |
+
pyparsing==3.2.3
|
| 26 |
+
Pygments==2.19.2
|
| 27 |
+
pyasn1==0.6.1
|
| 28 |
+
psutil==7.0.0
|
| 29 |
+
protobuf==4.21.12
|
| 30 |
+
platformdirs==4.3.8
|
| 31 |
+
pillow==11.3.0
|
| 32 |
+
packaging==25.0
|
| 33 |
+
opt_einsum==3.4.0
|
| 34 |
+
oauthlib==3.3.1
|
| 35 |
+
nvidia-nvtx-cu12==12.1.105
|
| 36 |
+
nvidia-nvjitlink-cu12==12.9.86
|
| 37 |
+
nvidia-nccl-cu12==2.19.3
|
| 38 |
+
nvidia-curand-cu12==10.3.2.106
|
| 39 |
+
nvidia-cufft-cu12==11.0.2.54
|
| 40 |
+
nvidia-cuda-runtime-cu12==12.1.105
|
| 41 |
+
nvidia-cuda-nvrtc-cu12==12.1.105
|
| 42 |
+
nvidia-cuda-cupti-cu12==12.1.105
|
| 43 |
+
nvidia-cublas-cu12==12.1.3.1
|
| 44 |
+
numpy==1.26.4
|
| 45 |
+
networkx==3.4.2
|
| 46 |
+
mypy_extensions==1.1.0
|
| 47 |
+
mergedeep==1.3.4
|
| 48 |
+
mdurl==0.1.2
|
| 49 |
+
MarkupSafe==3.0.2
|
| 50 |
+
Markdown==3.8.2
|
| 51 |
+
kiwisolver==1.4.8
|
| 52 |
+
keras==2.15.0
|
| 53 |
+
importlib_resources==6.5.2
|
| 54 |
+
idna==3.10
|
| 55 |
+
hf-xet==1.1.5
|
| 56 |
+
grpcio==1.74.0
|
| 57 |
+
gast==0.6.0
|
| 58 |
+
fsspec==2025.7.0
|
| 59 |
+
fonttools==4.59.0
|
| 60 |
+
filelock==3.18.0
|
| 61 |
+
etils==1.13.0
|
| 62 |
+
einops==0.8.1
|
| 63 |
+
cycler==0.12.1
|
| 64 |
+
click==8.2.1
|
| 65 |
+
charset-normalizer==3.4.2
|
| 66 |
+
certifi==2025.8.3
|
| 67 |
+
cachetools==5.5.2
|
| 68 |
+
attrs==25.3.0
|
| 69 |
+
annotated-types==0.7.0
|
| 70 |
+
absl-py==2.3.1
|
| 71 |
+
Werkzeug==3.1.3
|
| 72 |
+
typing-inspection==0.4.1
|
| 73 |
+
typing-inspect==0.9.0
|
| 74 |
+
triton==2.2.0
|
| 75 |
+
trimesh==4.7.1
|
| 76 |
+
tensorflow-metadata==1.17.2
|
| 77 |
+
tensorflow-addons==0.23.0
|
| 78 |
+
sentry-sdk==2.34.1
|
| 79 |
+
scipy==1.15.3
|
| 80 |
+
rsa==4.9.1
|
| 81 |
+
requests==2.32.4
|
| 82 |
+
pyyaml-include==1.4.1
|
| 83 |
+
python-dateutil==2.9.0.post0
|
| 84 |
+
pydantic_core==2.33.2
|
| 85 |
+
pyasn1_modules==0.4.2
|
| 86 |
+
promise==2.3
|
| 87 |
+
OpenEXR==3.3.5
|
| 88 |
+
nvidia-cusparse-cu12==12.1.0.106
|
| 89 |
+
nvidia-cudnn-cu12==8.9.2.26
|
| 90 |
+
ml-dtypes==0.2.0
|
| 91 |
+
markdown-it-py==3.0.0
|
| 92 |
+
jsonlines==4.0.0
|
| 93 |
+
json-numpy==2.1.1
|
| 94 |
+
Jinja2==3.1.6
|
| 95 |
+
h5py==3.14.0
|
| 96 |
+
google-pasta==0.2.0
|
| 97 |
+
gitdb==4.0.12
|
| 98 |
+
dm-tree==0.1.9
|
| 99 |
+
contourpy==1.3.2
|
| 100 |
+
astunparse==1.6.3
|
| 101 |
+
rich==14.1.0
|
| 102 |
+
requests-oauthlib==2.0.0
|
| 103 |
+
pydantic==2.11.7
|
| 104 |
+
nvidia-cusolver-cu12==11.4.5.107
|
| 105 |
+
matplotlib==3.10.5
|
| 106 |
+
huggingface-hub==0.34.3
|
| 107 |
+
google-auth==2.40.3
|
| 108 |
+
GitPython==3.1.45
|
| 109 |
+
draccus==0.8.0
|
| 110 |
+
wandb==0.21.0
|
| 111 |
+
torch==2.2.0
|
| 112 |
+
tokenizers==0.19.1
|
| 113 |
+
google-auth-oauthlib==1.2.2
|
| 114 |
+
array_record==0.7.2
|
| 115 |
+
transformers==4.40.1
|
| 116 |
+
torchvision==0.17.0
|
| 117 |
+
torchaudio==2.2.0
|
| 118 |
+
tensorboard==2.15.2
|
| 119 |
+
accelerate==1.9.0
|
| 120 |
+
timm==0.9.10
|
| 121 |
+
tensorflow-datasets==4.9.3
|
| 122 |
+
tensorflow==2.15.0
|
| 123 |
+
peft==0.11.1
|
| 124 |
+
tensorflow-graphics==2021.12.3
|
| 125 |
+
dlimp==0.0.1
|
| 126 |
+
openvla==0.0.3
|
| 127 |
+
ninja==1.11.1.4
|
| 128 |
+
flash-attn==2.5.5
|
| 129 |
+
autocommand==2.2.2
|
| 130 |
+
backports.tarfile==1.2.0
|
| 131 |
+
importlib_metadata==8.0.0
|
| 132 |
+
inflect==7.3.1
|
| 133 |
+
jaraco.collections==5.1.0
|
| 134 |
+
jaraco.context==5.3.0
|
| 135 |
+
jaraco.functools==4.0.1
|
| 136 |
+
jaraco.text==3.12.1
|
| 137 |
+
more-itertools==10.3.0
|
| 138 |
+
packaging==24.2
|
| 139 |
+
platformdirs==4.2.2
|
| 140 |
+
tomli==2.0.1
|
| 141 |
+
typeguard==4.3.0
|
| 142 |
+
typing_extensions==4.12.2
|
| 143 |
+
wheel==0.45.1
|
| 144 |
+
zipp==3.19.2
|
wandb/run-20250822_172424-grd0n90q/files/wandb-metadata.json
ADDED
|
@@ -0,0 +1,123 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"os": "Linux-6.8.0-60-generic-x86_64-with-glibc2.35",
|
| 3 |
+
"python": "CPython 3.10.18",
|
| 4 |
+
"startedAt": "2025-08-22T17:24:24.381806Z",
|
| 5 |
+
"args": [
|
| 6 |
+
"--pretrained_checkpoint",
|
| 7 |
+
"Stanford-ILIAD/prism-qwen25-extra-dinosiglip-224px-0_5b",
|
| 8 |
+
"--model_type",
|
| 9 |
+
"my_vla_qwen",
|
| 10 |
+
"--vla.type",
|
| 11 |
+
"myvla-qwen-224px+mx-mshab",
|
| 12 |
+
"--vla.expected_world_size",
|
| 13 |
+
"8",
|
| 14 |
+
"--vla.global_batch_size",
|
| 15 |
+
"512",
|
| 16 |
+
"--vla.per_device_batch_size",
|
| 17 |
+
"8",
|
| 18 |
+
"--vla.learning_rate",
|
| 19 |
+
"2e-5",
|
| 20 |
+
"--vla.freeze_vision_backbone",
|
| 21 |
+
"True",
|
| 22 |
+
"--vla.freeze_llm_backbone",
|
| 23 |
+
"False",
|
| 24 |
+
"--vla.use_flow_matching",
|
| 25 |
+
"False",
|
| 26 |
+
"--vla.compress_history",
|
| 27 |
+
"False",
|
| 28 |
+
"--vla.image_sequence_len",
|
| 29 |
+
"4",
|
| 30 |
+
"--vla.image_window_size",
|
| 31 |
+
"1",
|
| 32 |
+
"--vla.epochs",
|
| 33 |
+
"10",
|
| 34 |
+
"--save_interval",
|
| 35 |
+
"1000",
|
| 36 |
+
"--run_id",
|
| 37 |
+
"bl_multiview_depth_set_table"
|
| 38 |
+
],
|
| 39 |
+
"program": "/lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py",
|
| 40 |
+
"codePath": "vla-scripts/train.py",
|
| 41 |
+
"codePathLocal": "vla-scripts/train.py",
|
| 42 |
+
"git": {
|
| 43 |
+
"remote": "https://github.com/TRS07170/myvla.git",
|
| 44 |
+
"commit": "409e4c9a165115624c271028e9b3ee335991b747"
|
| 45 |
+
},
|
| 46 |
+
"email": "traysen879@gmail.com",
|
| 47 |
+
"root": "myvla_exp/bl_multiview_depth_set_table",
|
| 48 |
+
"host": "164-152-109-69",
|
| 49 |
+
"executable": "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/bin/python3.10",
|
| 50 |
+
"cpu_count": 240,
|
| 51 |
+
"cpu_count_logical": 240,
|
| 52 |
+
"gpu": "NVIDIA A100-SXM4-80GB",
|
| 53 |
+
"gpu_count": 8,
|
| 54 |
+
"disk": {
|
| 55 |
+
"/": {
|
| 56 |
+
"total": "20812690710528",
|
| 57 |
+
"used": "36965724160"
|
| 58 |
+
}
|
| 59 |
+
},
|
| 60 |
+
"memory": {
|
| 61 |
+
"total": "1902324936704"
|
| 62 |
+
},
|
| 63 |
+
"gpu_nvidia": [
|
| 64 |
+
{
|
| 65 |
+
"name": "NVIDIA A100-SXM4-80GB",
|
| 66 |
+
"memoryTotal": "85899345920",
|
| 67 |
+
"cudaCores": 6912,
|
| 68 |
+
"architecture": "Ampere",
|
| 69 |
+
"uuid": "GPU-47bfdc91-9dec-cf54-0e0a-aa57ab6fb106"
|
| 70 |
+
},
|
| 71 |
+
{
|
| 72 |
+
"name": "NVIDIA A100-SXM4-80GB",
|
| 73 |
+
"memoryTotal": "85899345920",
|
| 74 |
+
"cudaCores": 6912,
|
| 75 |
+
"architecture": "Ampere",
|
| 76 |
+
"uuid": "GPU-55a7184b-b6dc-a8b3-67d5-a65679215c83"
|
| 77 |
+
},
|
| 78 |
+
{
|
| 79 |
+
"name": "NVIDIA A100-SXM4-80GB",
|
| 80 |
+
"memoryTotal": "85899345920",
|
| 81 |
+
"cudaCores": 6912,
|
| 82 |
+
"architecture": "Ampere",
|
| 83 |
+
"uuid": "GPU-1de758e0-e4a9-e2e9-027c-17f65db8a69e"
|
| 84 |
+
},
|
| 85 |
+
{
|
| 86 |
+
"name": "NVIDIA A100-SXM4-80GB",
|
| 87 |
+
"memoryTotal": "85899345920",
|
| 88 |
+
"cudaCores": 6912,
|
| 89 |
+
"architecture": "Ampere",
|
| 90 |
+
"uuid": "GPU-d7f94efd-7e10-156f-fe37-e505ae7b62b1"
|
| 91 |
+
},
|
| 92 |
+
{
|
| 93 |
+
"name": "NVIDIA A100-SXM4-80GB",
|
| 94 |
+
"memoryTotal": "85899345920",
|
| 95 |
+
"cudaCores": 6912,
|
| 96 |
+
"architecture": "Ampere",
|
| 97 |
+
"uuid": "GPU-813530b2-64f0-5fa3-3568-3811977d3b92"
|
| 98 |
+
},
|
| 99 |
+
{
|
| 100 |
+
"name": "NVIDIA A100-SXM4-80GB",
|
| 101 |
+
"memoryTotal": "85899345920",
|
| 102 |
+
"cudaCores": 6912,
|
| 103 |
+
"architecture": "Ampere",
|
| 104 |
+
"uuid": "GPU-7eac47dc-0da1-f6b2-d261-8ab3a5d4ed03"
|
| 105 |
+
},
|
| 106 |
+
{
|
| 107 |
+
"name": "NVIDIA A100-SXM4-80GB",
|
| 108 |
+
"memoryTotal": "85899345920",
|
| 109 |
+
"cudaCores": 6912,
|
| 110 |
+
"architecture": "Ampere",
|
| 111 |
+
"uuid": "GPU-335150e5-634c-68e2-4930-656c95e62244"
|
| 112 |
+
},
|
| 113 |
+
{
|
| 114 |
+
"name": "NVIDIA A100-SXM4-80GB",
|
| 115 |
+
"memoryTotal": "85899345920",
|
| 116 |
+
"cudaCores": 6912,
|
| 117 |
+
"architecture": "Ampere",
|
| 118 |
+
"uuid": "GPU-b3ee08d0-187c-8f80-06d5-c46759764c41"
|
| 119 |
+
}
|
| 120 |
+
],
|
| 121 |
+
"cudaVersion": "12.4",
|
| 122 |
+
"writerId": "dnq1lm07509oy3nh24mhk899wrek9b94"
|
| 123 |
+
}
|
wandb/run-20250822_172424-grd0n90q/files/wandb-summary.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"VLA Train/L1 Loss":0.6152674662097849,"VLA Train/Step Time":9.066001892089844,"_step":18,"_timestamp":1.7558836377797978e+09,"VLA Train/Step":18,"VLA Train/Loss (Raw)":0.5707007646560669,"_runtime":179.53910675,"VLA Train/Auxiliary Loss":0,"VLA Train/Action Token Accuracy":0.28155338764190674,"VLA Train/Epoch":0,"VLA Train/Loss":0.5707007646560669,"_wandb":{"runtime":179},"VLA Train/Learning Rate":2e-05}
|
wandb/run-20250822_172424-grd0n90q/logs/debug-core.log
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2025-08-22T17:24:24.667654596Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmp_342id0h/port-4002856.txt","pid":4002856,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
|
| 2 |
+
{"time":"2025-08-22T17:24:24.66880959Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-4002856-4003306-1128414806/socket","Net":"unix"}}
|
| 3 |
+
{"time":"2025-08-22T17:24:24.668943304Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":4002856}
|
| 4 |
+
{"time":"2025-08-22T17:24:24.688040644Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
|
| 5 |
+
{"time":"2025-08-22T17:24:24.702187716Z","level":"INFO","msg":"handleInformInit: received","streamId":"grd0n90q","id":"1(@)"}
|
| 6 |
+
{"time":"2025-08-22T17:24:24.994195243Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"grd0n90q","id":"1(@)"}
|
| 7 |
+
{"time":"2025-08-22T17:27:24.702921547Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
|
| 8 |
+
{"time":"2025-08-22T17:27:24.703114991Z","level":"INFO","msg":"server is shutting down"}
|
| 9 |
+
{"time":"2025-08-22T17:27:24.703256274Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-4002856-4003306-1128414806/socket","Net":"unix"}}
|
| 10 |
+
{"time":"2025-08-22T17:27:24.70309939Z","level":"INFO","msg":"connection: closing","id":"1(@)"}
|
| 11 |
+
{"time":"2025-08-22T17:27:24.703325336Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
|
| 12 |
+
{"time":"2025-08-22T17:27:24.841847315Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
|
| 13 |
+
{"time":"2025-08-22T17:27:24.841900846Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
|
| 14 |
+
{"time":"2025-08-22T17:27:24.841914286Z","level":"INFO","msg":"server is closed"}
|
wandb/run-20250822_172424-grd0n90q/logs/debug-internal.log
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2025-08-22T17:24:24.706169983Z","level":"INFO","msg":"stream: starting","core version":"0.21.0"}
|
| 2 |
+
{"time":"2025-08-22T17:24:24.993938237Z","level":"INFO","msg":"stream: created new stream","id":"grd0n90q"}
|
| 3 |
+
{"time":"2025-08-22T17:24:24.99408303Z","level":"INFO","msg":"stream: started","id":"grd0n90q"}
|
| 4 |
+
{"time":"2025-08-22T17:24:24.994179082Z","level":"INFO","msg":"handler: started","stream_id":"grd0n90q"}
|
| 5 |
+
{"time":"2025-08-22T17:24:24.994227393Z","level":"INFO","msg":"sender: started","stream_id":"grd0n90q"}
|
| 6 |
+
{"time":"2025-08-22T17:24:24.994136561Z","level":"INFO","msg":"writer: Do: started","stream_id":"grd0n90q"}
|
| 7 |
+
{"time":"2025-08-22T17:27:24.703158922Z","level":"INFO","msg":"stream: closing","id":"grd0n90q"}
|
| 8 |
+
{"time":"2025-08-22T17:27:24.785199848Z","level":"ERROR","msg":"HTTP error","status":404,"method":"POST","url":"https://api.wandb.ai/graphql"}
|
| 9 |
+
{"time":"2025-08-22T17:27:24.785511865Z","level":"ERROR","msg":"runfiles: CreateRunFiles returned error: returned error 404: {\"data\":{\"createRunFiles\":null},\"errors\":[{\"message\":\"run mshab_vla/grd0n90q not found during createRunFiles\",\"path\":[\"createRunFiles\"]}]}"}
|
| 10 |
+
{"time":"2025-08-22T17:27:24.790789631Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
|
| 11 |
+
{"time":"2025-08-22T17:27:24.831280893Z","level":"ERROR","msg":"HTTP error","status":404,"method":"POST","url":"https://api.wandb.ai/files/traysen879-uc-san-diego/mshab_vla/grd0n90q/file_stream"}
|
| 12 |
+
{"time":"2025-08-22T17:27:24.831581099Z","level":"ERROR+4","msg":"filestream: fatal error: filestream: failed to upload: 404 Not Found path=files/traysen879-uc-san-diego/mshab_vla/grd0n90q/file_stream: {\"error\":\"run mshab_vla/grd0n90q not found while streaming file\"}"}
|
| 13 |
+
{"time":"2025-08-22T17:27:24.834877722Z","level":"INFO","msg":"handler: closed","stream_id":"grd0n90q"}
|
| 14 |
+
{"time":"2025-08-22T17:27:24.834904482Z","level":"INFO","msg":"writer: Close: closed","stream_id":"grd0n90q"}
|
| 15 |
+
{"time":"2025-08-22T17:27:24.834946803Z","level":"INFO","msg":"sender: closed","stream_id":"grd0n90q"}
|
| 16 |
+
{"time":"2025-08-22T17:27:24.839882542Z","level":"INFO","msg":"stream: closed","id":"grd0n90q"}
|
wandb/run-20250822_172424-grd0n90q/logs/debug.log
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2025-08-22 17:24:24,427 INFO MainThread:4002856 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0
|
| 2 |
+
2025-08-22 17:24:24,427 INFO MainThread:4002856 [wandb_setup.py:_flush():80] Configure stats pid to 4002856
|
| 3 |
+
2025-08-22 17:24:24,427 INFO MainThread:4002856 [wandb_setup.py:_flush():80] Loading settings from /home/ubuntu/.config/wandb/settings
|
| 4 |
+
2025-08-22 17:24:24,430 INFO MainThread:4002856 [wandb_setup.py:_flush():80] Loading settings from /lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/wandb/settings
|
| 5 |
+
2025-08-22 17:24:24,432 INFO MainThread:4002856 [wandb_setup.py:_flush():80] Loading settings from environment variables
|
| 6 |
+
2025-08-22 17:24:24,432 INFO MainThread:4002856 [wandb_init.py:setup_run_log_directory():703] Logging user logs to myvla_exp/bl_multiview_depth_set_table/wandb/run-20250822_172424-grd0n90q/logs/debug.log
|
| 7 |
+
2025-08-22 17:24:24,434 INFO MainThread:4002856 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to myvla_exp/bl_multiview_depth_set_table/wandb/run-20250822_172424-grd0n90q/logs/debug-internal.log
|
| 8 |
+
2025-08-22 17:24:24,436 INFO MainThread:4002856 [wandb_init.py:init():830] calling init triggers
|
| 9 |
+
2025-08-22 17:24:24,438 INFO MainThread:4002856 [wandb_init.py:init():835] wandb.init called with sweep_config: {}
|
| 10 |
+
config: {'vla': {'type': 'myvla-qwen-224px+mx-mshab', 'vla_id': 'myvla-qwen-224px+mx-mshab', 'base_vlm': 'prism-qwen25-extra-dinosiglip-224px+0_5b', 'freeze_vision_backbone': True, 'freeze_llm_backbone': False, 'unfreeze_last_llm_layer': False, 'data_mix': 'bridge', 'shuffle_buffer_size': 256000, 'epochs': 10, 'max_steps': None, 'save_every_n_steps': 25000, 'expected_world_size': 8, 'global_batch_size': 512, 'per_device_batch_size': 8, 'learning_rate': 2e-05, 'weight_decay': 0.0, 'max_grad_norm': 1.0, 'lr_scheduler_type': 'constant', 'warmup_ratio': 0.0, 'train_strategy': 'fsdp-full-shard', 'action_tokenizer': 'extra_action_tokenizer', 'image_sequence_len': 4, 'use_wrist_image': True, 'compress_history': False, 'use_flow_matching': False, 'action_chunk_size': 8, 'enable_gradient_checkpointing': True, 'enable_mixed_precision_training': True, 'reduce_in_full_precision': True, 'image_window_size': 1}, 'model_type': 'my_vla_qwen', 'data_root_dir': '/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/.new_maniskill_data', 'run_root_dir': 'myvla_exp', 'pretrained_checkpoint': 'Stanford-ILIAD/prism-qwen25-extra-dinosiglip-224px-0_5b', 'is_resume': False, 'resume_step': None, 'resume_epoch': None, 'run_id': 'bl_multiview_depth_set_table', 'run_id_note': None, 'save_interval': 1000, 'image_aug': False, 'seed': 7, 'hf_token': '.hf_token', 'trackers': ['jsonl', 'wandb'], 'wandb_project': 'mshab_vla', 'wandb_entity': 'traysen879-uc-san-diego', 'global_pose': False, 'is_grasped': False, 'qpos': False, 'depth': False, 'segmentation': False, '_wandb': {}}
|
| 11 |
+
2025-08-22 17:24:24,438 INFO MainThread:4002856 [wandb_init.py:init():871] starting backend
|
| 12 |
+
2025-08-22 17:24:24,688 INFO MainThread:4002856 [wandb_init.py:init():874] sending inform_init request
|
| 13 |
+
2025-08-22 17:24:24,698 INFO MainThread:4002856 [wandb_init.py:init():882] backend started and connected
|
| 14 |
+
2025-08-22 17:24:24,703 INFO MainThread:4002856 [wandb_init.py:init():953] updated telemetry
|
| 15 |
+
2025-08-22 17:24:24,733 INFO MainThread:4002856 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout
|
| 16 |
+
2025-08-22 17:24:25,161 INFO MainThread:4002856 [wandb_init.py:init():1029] starting run threads in backend
|
| 17 |
+
2025-08-22 17:24:25,682 INFO MainThread:4002856 [wandb_run.py:_console_start():2458] atexit reg
|
| 18 |
+
2025-08-22 17:24:25,683 INFO MainThread:4002856 [wandb_run.py:_redirect():2306] redirect: wrap_raw
|
| 19 |
+
2025-08-22 17:24:25,685 INFO MainThread:4002856 [wandb_run.py:_redirect():2375] Wrapping output streams.
|
| 20 |
+
2025-08-22 17:24:25,685 INFO MainThread:4002856 [wandb_run.py:_redirect():2398] Redirects installed.
|
| 21 |
+
2025-08-22 17:24:25,696 INFO MainThread:4002856 [wandb_init.py:init():1075] run started, returning control to user process
|
| 22 |
+
2025-08-22 17:27:24,701 INFO MsgRouterThr:4002856 [mailbox.py:close():129] [no run ID] Closing mailbox, abandoning 1 handles.
|
wandb/run-20250822_172424-grd0n90q/run-grd0n90q.wandb
ADDED
|
Binary file (79.9 kB). View file
|
|
|
wandb/run-20250822_175544-mg58khw0/files/config.yaml
ADDED
|
@@ -0,0 +1,211 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
_wandb:
|
| 2 |
+
value:
|
| 3 |
+
cli_version: 0.21.0
|
| 4 |
+
e:
|
| 5 |
+
kigmkgl7ewlmvaaruo3ggp33h5rhvks0:
|
| 6 |
+
args:
|
| 7 |
+
- --pretrained_checkpoint
|
| 8 |
+
- Stanford-ILIAD/prism-qwen25-extra-dinosiglip-224px-0_5b
|
| 9 |
+
- --model_type
|
| 10 |
+
- my_vla_qwen
|
| 11 |
+
- --vla.type
|
| 12 |
+
- myvla-qwen-224px+mx-mshab
|
| 13 |
+
- --vla.expected_world_size
|
| 14 |
+
- "8"
|
| 15 |
+
- --vla.global_batch_size
|
| 16 |
+
- "512"
|
| 17 |
+
- --vla.per_device_batch_size
|
| 18 |
+
- "8"
|
| 19 |
+
- --vla.learning_rate
|
| 20 |
+
- "2e-5"
|
| 21 |
+
- --vla.freeze_vision_backbone
|
| 22 |
+
- "True"
|
| 23 |
+
- --vla.freeze_llm_backbone
|
| 24 |
+
- "False"
|
| 25 |
+
- --vla.use_flow_matching
|
| 26 |
+
- "False"
|
| 27 |
+
- --vla.compress_history
|
| 28 |
+
- "False"
|
| 29 |
+
- --vla.image_sequence_len
|
| 30 |
+
- "4"
|
| 31 |
+
- --vla.image_window_size
|
| 32 |
+
- "1"
|
| 33 |
+
- --vla.epochs
|
| 34 |
+
- "10"
|
| 35 |
+
- --save_interval
|
| 36 |
+
- "1000"
|
| 37 |
+
- --run_id
|
| 38 |
+
- bl_multiview_depth_set_table
|
| 39 |
+
codePath: vla-scripts/train.py
|
| 40 |
+
codePathLocal: vla-scripts/train.py
|
| 41 |
+
cpu_count: 240
|
| 42 |
+
cpu_count_logical: 240
|
| 43 |
+
cudaVersion: "12.4"
|
| 44 |
+
disk:
|
| 45 |
+
/:
|
| 46 |
+
total: "20812690710528"
|
| 47 |
+
used: "36965580800"
|
| 48 |
+
email: traysen879@gmail.com
|
| 49 |
+
executable: /home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/bin/python3.10
|
| 50 |
+
git:
|
| 51 |
+
commit: 409e4c9a165115624c271028e9b3ee335991b747
|
| 52 |
+
remote: https://github.com/TRS07170/myvla.git
|
| 53 |
+
gpu: NVIDIA A100-SXM4-80GB
|
| 54 |
+
gpu_count: 8
|
| 55 |
+
gpu_nvidia:
|
| 56 |
+
- architecture: Ampere
|
| 57 |
+
cudaCores: 6912
|
| 58 |
+
memoryTotal: "85899345920"
|
| 59 |
+
name: NVIDIA A100-SXM4-80GB
|
| 60 |
+
uuid: GPU-47bfdc91-9dec-cf54-0e0a-aa57ab6fb106
|
| 61 |
+
- architecture: Ampere
|
| 62 |
+
cudaCores: 6912
|
| 63 |
+
memoryTotal: "85899345920"
|
| 64 |
+
name: NVIDIA A100-SXM4-80GB
|
| 65 |
+
uuid: GPU-55a7184b-b6dc-a8b3-67d5-a65679215c83
|
| 66 |
+
- architecture: Ampere
|
| 67 |
+
cudaCores: 6912
|
| 68 |
+
memoryTotal: "85899345920"
|
| 69 |
+
name: NVIDIA A100-SXM4-80GB
|
| 70 |
+
uuid: GPU-1de758e0-e4a9-e2e9-027c-17f65db8a69e
|
| 71 |
+
- architecture: Ampere
|
| 72 |
+
cudaCores: 6912
|
| 73 |
+
memoryTotal: "85899345920"
|
| 74 |
+
name: NVIDIA A100-SXM4-80GB
|
| 75 |
+
uuid: GPU-d7f94efd-7e10-156f-fe37-e505ae7b62b1
|
| 76 |
+
- architecture: Ampere
|
| 77 |
+
cudaCores: 6912
|
| 78 |
+
memoryTotal: "85899345920"
|
| 79 |
+
name: NVIDIA A100-SXM4-80GB
|
| 80 |
+
uuid: GPU-813530b2-64f0-5fa3-3568-3811977d3b92
|
| 81 |
+
- architecture: Ampere
|
| 82 |
+
cudaCores: 6912
|
| 83 |
+
memoryTotal: "85899345920"
|
| 84 |
+
name: NVIDIA A100-SXM4-80GB
|
| 85 |
+
uuid: GPU-7eac47dc-0da1-f6b2-d261-8ab3a5d4ed03
|
| 86 |
+
- architecture: Ampere
|
| 87 |
+
cudaCores: 6912
|
| 88 |
+
memoryTotal: "85899345920"
|
| 89 |
+
name: NVIDIA A100-SXM4-80GB
|
| 90 |
+
uuid: GPU-335150e5-634c-68e2-4930-656c95e62244
|
| 91 |
+
- architecture: Ampere
|
| 92 |
+
cudaCores: 6912
|
| 93 |
+
memoryTotal: "85899345920"
|
| 94 |
+
name: NVIDIA A100-SXM4-80GB
|
| 95 |
+
uuid: GPU-b3ee08d0-187c-8f80-06d5-c46759764c41
|
| 96 |
+
host: 164-152-109-69
|
| 97 |
+
memory:
|
| 98 |
+
total: "1902324936704"
|
| 99 |
+
os: Linux-6.8.0-60-generic-x86_64-with-glibc2.35
|
| 100 |
+
program: /lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py
|
| 101 |
+
python: CPython 3.10.18
|
| 102 |
+
root: myvla_exp/bl_multiview_depth_set_table
|
| 103 |
+
startedAt: "2025-08-22T17:55:44.807018Z"
|
| 104 |
+
writerId: kigmkgl7ewlmvaaruo3ggp33h5rhvks0
|
| 105 |
+
m: []
|
| 106 |
+
python_version: 3.10.18
|
| 107 |
+
t:
|
| 108 |
+
"1":
|
| 109 |
+
- 1
|
| 110 |
+
- 2
|
| 111 |
+
- 3
|
| 112 |
+
- 11
|
| 113 |
+
- 41
|
| 114 |
+
- 49
|
| 115 |
+
- 63
|
| 116 |
+
- 71
|
| 117 |
+
"2":
|
| 118 |
+
- 1
|
| 119 |
+
- 2
|
| 120 |
+
- 3
|
| 121 |
+
- 11
|
| 122 |
+
- 41
|
| 123 |
+
- 49
|
| 124 |
+
- 63
|
| 125 |
+
- 71
|
| 126 |
+
"3":
|
| 127 |
+
- 2
|
| 128 |
+
- 13
|
| 129 |
+
- 16
|
| 130 |
+
- 61
|
| 131 |
+
"4": 3.10.18
|
| 132 |
+
"5": 0.21.0
|
| 133 |
+
"6": 4.40.1
|
| 134 |
+
"12": 0.21.0
|
| 135 |
+
"13": linux-x86_64
|
| 136 |
+
data_root_dir:
|
| 137 |
+
value: /home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/.new_maniskill_data
|
| 138 |
+
depth:
|
| 139 |
+
value: false
|
| 140 |
+
global_pose:
|
| 141 |
+
value: false
|
| 142 |
+
hf_token:
|
| 143 |
+
value: .hf_token
|
| 144 |
+
image_aug:
|
| 145 |
+
value: false
|
| 146 |
+
is_grasped:
|
| 147 |
+
value: false
|
| 148 |
+
is_resume:
|
| 149 |
+
value: false
|
| 150 |
+
model_type:
|
| 151 |
+
value: my_vla_qwen
|
| 152 |
+
pretrained_checkpoint:
|
| 153 |
+
value: Stanford-ILIAD/prism-qwen25-extra-dinosiglip-224px-0_5b
|
| 154 |
+
qpos:
|
| 155 |
+
value: false
|
| 156 |
+
resume_epoch:
|
| 157 |
+
value: null
|
| 158 |
+
resume_step:
|
| 159 |
+
value: null
|
| 160 |
+
run_id:
|
| 161 |
+
value: bl_multiview_depth_set_table
|
| 162 |
+
run_id_note:
|
| 163 |
+
value: null
|
| 164 |
+
run_root_dir:
|
| 165 |
+
value: myvla_exp
|
| 166 |
+
save_interval:
|
| 167 |
+
value: 1000
|
| 168 |
+
seed:
|
| 169 |
+
value: 7
|
| 170 |
+
segmentation:
|
| 171 |
+
value: false
|
| 172 |
+
trackers:
|
| 173 |
+
value:
|
| 174 |
+
- jsonl
|
| 175 |
+
- wandb
|
| 176 |
+
vla:
|
| 177 |
+
value:
|
| 178 |
+
action_chunk_size: 8
|
| 179 |
+
action_tokenizer: extra_action_tokenizer
|
| 180 |
+
base_vlm: prism-qwen25-extra-dinosiglip-224px+0_5b
|
| 181 |
+
compress_history: false
|
| 182 |
+
data_mix: bridge
|
| 183 |
+
enable_gradient_checkpointing: true
|
| 184 |
+
enable_mixed_precision_training: true
|
| 185 |
+
epochs: 10
|
| 186 |
+
expected_world_size: 8
|
| 187 |
+
freeze_llm_backbone: false
|
| 188 |
+
freeze_vision_backbone: true
|
| 189 |
+
global_batch_size: 512
|
| 190 |
+
image_sequence_len: 4
|
| 191 |
+
image_window_size: 1
|
| 192 |
+
learning_rate: 2e-05
|
| 193 |
+
lr_scheduler_type: constant
|
| 194 |
+
max_grad_norm: 1
|
| 195 |
+
max_steps: null
|
| 196 |
+
per_device_batch_size: 8
|
| 197 |
+
reduce_in_full_precision: true
|
| 198 |
+
save_every_n_steps: 25000
|
| 199 |
+
shuffle_buffer_size: 256000
|
| 200 |
+
train_strategy: fsdp-full-shard
|
| 201 |
+
type: myvla-qwen-224px+mx-mshab
|
| 202 |
+
unfreeze_last_llm_layer: false
|
| 203 |
+
use_flow_matching: false
|
| 204 |
+
use_wrist_image: true
|
| 205 |
+
vla_id: myvla-qwen-224px+mx-mshab
|
| 206 |
+
warmup_ratio: 0
|
| 207 |
+
weight_decay: 0
|
| 208 |
+
wandb_entity:
|
| 209 |
+
value: traysen879-uc-san-diego
|
| 210 |
+
wandb_project:
|
| 211 |
+
value: mshab_vla
|
wandb/run-20250822_175544-mg58khw0/files/output.log
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[2;36m08/22 [17:55:46][0m[2;36m [0m[34mINFO [0m | >> [1m[[0m*[1m][0m Starting VLA Training Loop ]8;id=817857;file:///lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py\[2mtrain.py[0m]8;;\[2m:[0m]8;id=998125;file:///lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py#322\[2m322[0m]8;;\
|
| 2 |
+
|
| 3 |
+
[2;36m08/23 [06:27:09][0m[2;36m [0m[34mINFO [0m | >> [1m[[0m*[1m][0m Done with Training =>> Finalizing Metrics ]8;id=454536;file:///lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py\[2mtrain.py[0m]8;;\[2m:[0m]8;id=267836;file:///lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py#332\[2m332[0m]8;;\
|
wandb/run-20250822_175544-mg58khw0/files/requirements.txt
ADDED
|
@@ -0,0 +1,144 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
setuptools==78.1.1
|
| 2 |
+
wheel==0.45.1
|
| 3 |
+
pip==25.1
|
| 4 |
+
sentencepiece==0.1.99
|
| 5 |
+
mpmath==1.3.0
|
| 6 |
+
libclang==18.1.1
|
| 7 |
+
flatbuffers==25.2.10
|
| 8 |
+
zipp==3.23.0
|
| 9 |
+
wrapt==1.14.1
|
| 10 |
+
urllib3==2.5.0
|
| 11 |
+
typing_extensions==4.14.1
|
| 12 |
+
typeguard==2.13.3
|
| 13 |
+
tqdm==4.67.1
|
| 14 |
+
toml==0.10.2
|
| 15 |
+
termcolor==3.1.0
|
| 16 |
+
tensorflow-io-gcs-filesystem==0.37.1
|
| 17 |
+
tensorflow-estimator==2.15.0
|
| 18 |
+
tensorboard-data-server==0.7.2
|
| 19 |
+
sympy==1.14.0
|
| 20 |
+
smmap==5.0.2
|
| 21 |
+
six==1.17.0
|
| 22 |
+
safetensors==0.5.3
|
| 23 |
+
regex==2025.7.34
|
| 24 |
+
PyYAML==6.0.2
|
| 25 |
+
pyparsing==3.2.3
|
| 26 |
+
Pygments==2.19.2
|
| 27 |
+
pyasn1==0.6.1
|
| 28 |
+
psutil==7.0.0
|
| 29 |
+
protobuf==4.21.12
|
| 30 |
+
platformdirs==4.3.8
|
| 31 |
+
pillow==11.3.0
|
| 32 |
+
packaging==25.0
|
| 33 |
+
opt_einsum==3.4.0
|
| 34 |
+
oauthlib==3.3.1
|
| 35 |
+
nvidia-nvtx-cu12==12.1.105
|
| 36 |
+
nvidia-nvjitlink-cu12==12.9.86
|
| 37 |
+
nvidia-nccl-cu12==2.19.3
|
| 38 |
+
nvidia-curand-cu12==10.3.2.106
|
| 39 |
+
nvidia-cufft-cu12==11.0.2.54
|
| 40 |
+
nvidia-cuda-runtime-cu12==12.1.105
|
| 41 |
+
nvidia-cuda-nvrtc-cu12==12.1.105
|
| 42 |
+
nvidia-cuda-cupti-cu12==12.1.105
|
| 43 |
+
nvidia-cublas-cu12==12.1.3.1
|
| 44 |
+
numpy==1.26.4
|
| 45 |
+
networkx==3.4.2
|
| 46 |
+
mypy_extensions==1.1.0
|
| 47 |
+
mergedeep==1.3.4
|
| 48 |
+
mdurl==0.1.2
|
| 49 |
+
MarkupSafe==3.0.2
|
| 50 |
+
Markdown==3.8.2
|
| 51 |
+
kiwisolver==1.4.8
|
| 52 |
+
keras==2.15.0
|
| 53 |
+
importlib_resources==6.5.2
|
| 54 |
+
idna==3.10
|
| 55 |
+
hf-xet==1.1.5
|
| 56 |
+
grpcio==1.74.0
|
| 57 |
+
gast==0.6.0
|
| 58 |
+
fsspec==2025.7.0
|
| 59 |
+
fonttools==4.59.0
|
| 60 |
+
filelock==3.18.0
|
| 61 |
+
etils==1.13.0
|
| 62 |
+
einops==0.8.1
|
| 63 |
+
cycler==0.12.1
|
| 64 |
+
click==8.2.1
|
| 65 |
+
charset-normalizer==3.4.2
|
| 66 |
+
certifi==2025.8.3
|
| 67 |
+
cachetools==5.5.2
|
| 68 |
+
attrs==25.3.0
|
| 69 |
+
annotated-types==0.7.0
|
| 70 |
+
absl-py==2.3.1
|
| 71 |
+
Werkzeug==3.1.3
|
| 72 |
+
typing-inspection==0.4.1
|
| 73 |
+
typing-inspect==0.9.0
|
| 74 |
+
triton==2.2.0
|
| 75 |
+
trimesh==4.7.1
|
| 76 |
+
tensorflow-metadata==1.17.2
|
| 77 |
+
tensorflow-addons==0.23.0
|
| 78 |
+
sentry-sdk==2.34.1
|
| 79 |
+
scipy==1.15.3
|
| 80 |
+
rsa==4.9.1
|
| 81 |
+
requests==2.32.4
|
| 82 |
+
pyyaml-include==1.4.1
|
| 83 |
+
python-dateutil==2.9.0.post0
|
| 84 |
+
pydantic_core==2.33.2
|
| 85 |
+
pyasn1_modules==0.4.2
|
| 86 |
+
promise==2.3
|
| 87 |
+
OpenEXR==3.3.5
|
| 88 |
+
nvidia-cusparse-cu12==12.1.0.106
|
| 89 |
+
nvidia-cudnn-cu12==8.9.2.26
|
| 90 |
+
ml-dtypes==0.2.0
|
| 91 |
+
markdown-it-py==3.0.0
|
| 92 |
+
jsonlines==4.0.0
|
| 93 |
+
json-numpy==2.1.1
|
| 94 |
+
Jinja2==3.1.6
|
| 95 |
+
h5py==3.14.0
|
| 96 |
+
google-pasta==0.2.0
|
| 97 |
+
gitdb==4.0.12
|
| 98 |
+
dm-tree==0.1.9
|
| 99 |
+
contourpy==1.3.2
|
| 100 |
+
astunparse==1.6.3
|
| 101 |
+
rich==14.1.0
|
| 102 |
+
requests-oauthlib==2.0.0
|
| 103 |
+
pydantic==2.11.7
|
| 104 |
+
nvidia-cusolver-cu12==11.4.5.107
|
| 105 |
+
matplotlib==3.10.5
|
| 106 |
+
huggingface-hub==0.34.3
|
| 107 |
+
google-auth==2.40.3
|
| 108 |
+
GitPython==3.1.45
|
| 109 |
+
draccus==0.8.0
|
| 110 |
+
wandb==0.21.0
|
| 111 |
+
torch==2.2.0
|
| 112 |
+
tokenizers==0.19.1
|
| 113 |
+
google-auth-oauthlib==1.2.2
|
| 114 |
+
array_record==0.7.2
|
| 115 |
+
transformers==4.40.1
|
| 116 |
+
torchvision==0.17.0
|
| 117 |
+
torchaudio==2.2.0
|
| 118 |
+
tensorboard==2.15.2
|
| 119 |
+
accelerate==1.9.0
|
| 120 |
+
timm==0.9.10
|
| 121 |
+
tensorflow-datasets==4.9.3
|
| 122 |
+
tensorflow==2.15.0
|
| 123 |
+
peft==0.11.1
|
| 124 |
+
tensorflow-graphics==2021.12.3
|
| 125 |
+
dlimp==0.0.1
|
| 126 |
+
openvla==0.0.3
|
| 127 |
+
ninja==1.11.1.4
|
| 128 |
+
flash-attn==2.5.5
|
| 129 |
+
autocommand==2.2.2
|
| 130 |
+
backports.tarfile==1.2.0
|
| 131 |
+
importlib_metadata==8.0.0
|
| 132 |
+
inflect==7.3.1
|
| 133 |
+
jaraco.collections==5.1.0
|
| 134 |
+
jaraco.context==5.3.0
|
| 135 |
+
jaraco.functools==4.0.1
|
| 136 |
+
jaraco.text==3.12.1
|
| 137 |
+
more-itertools==10.3.0
|
| 138 |
+
packaging==24.2
|
| 139 |
+
platformdirs==4.2.2
|
| 140 |
+
tomli==2.0.1
|
| 141 |
+
typeguard==4.3.0
|
| 142 |
+
typing_extensions==4.12.2
|
| 143 |
+
wheel==0.45.1
|
| 144 |
+
zipp==3.19.2
|
wandb/run-20250822_175544-mg58khw0/files/wandb-metadata.json
ADDED
|
@@ -0,0 +1,123 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"os": "Linux-6.8.0-60-generic-x86_64-with-glibc2.35",
|
| 3 |
+
"python": "CPython 3.10.18",
|
| 4 |
+
"startedAt": "2025-08-22T17:55:44.807018Z",
|
| 5 |
+
"args": [
|
| 6 |
+
"--pretrained_checkpoint",
|
| 7 |
+
"Stanford-ILIAD/prism-qwen25-extra-dinosiglip-224px-0_5b",
|
| 8 |
+
"--model_type",
|
| 9 |
+
"my_vla_qwen",
|
| 10 |
+
"--vla.type",
|
| 11 |
+
"myvla-qwen-224px+mx-mshab",
|
| 12 |
+
"--vla.expected_world_size",
|
| 13 |
+
"8",
|
| 14 |
+
"--vla.global_batch_size",
|
| 15 |
+
"512",
|
| 16 |
+
"--vla.per_device_batch_size",
|
| 17 |
+
"8",
|
| 18 |
+
"--vla.learning_rate",
|
| 19 |
+
"2e-5",
|
| 20 |
+
"--vla.freeze_vision_backbone",
|
| 21 |
+
"True",
|
| 22 |
+
"--vla.freeze_llm_backbone",
|
| 23 |
+
"False",
|
| 24 |
+
"--vla.use_flow_matching",
|
| 25 |
+
"False",
|
| 26 |
+
"--vla.compress_history",
|
| 27 |
+
"False",
|
| 28 |
+
"--vla.image_sequence_len",
|
| 29 |
+
"4",
|
| 30 |
+
"--vla.image_window_size",
|
| 31 |
+
"1",
|
| 32 |
+
"--vla.epochs",
|
| 33 |
+
"10",
|
| 34 |
+
"--save_interval",
|
| 35 |
+
"1000",
|
| 36 |
+
"--run_id",
|
| 37 |
+
"bl_multiview_depth_set_table"
|
| 38 |
+
],
|
| 39 |
+
"program": "/lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py",
|
| 40 |
+
"codePath": "vla-scripts/train.py",
|
| 41 |
+
"codePathLocal": "vla-scripts/train.py",
|
| 42 |
+
"git": {
|
| 43 |
+
"remote": "https://github.com/TRS07170/myvla.git",
|
| 44 |
+
"commit": "409e4c9a165115624c271028e9b3ee335991b747"
|
| 45 |
+
},
|
| 46 |
+
"email": "traysen879@gmail.com",
|
| 47 |
+
"root": "myvla_exp/bl_multiview_depth_set_table",
|
| 48 |
+
"host": "164-152-109-69",
|
| 49 |
+
"executable": "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/bin/python3.10",
|
| 50 |
+
"cpu_count": 240,
|
| 51 |
+
"cpu_count_logical": 240,
|
| 52 |
+
"gpu": "NVIDIA A100-SXM4-80GB",
|
| 53 |
+
"gpu_count": 8,
|
| 54 |
+
"disk": {
|
| 55 |
+
"/": {
|
| 56 |
+
"total": "20812690710528",
|
| 57 |
+
"used": "36965580800"
|
| 58 |
+
}
|
| 59 |
+
},
|
| 60 |
+
"memory": {
|
| 61 |
+
"total": "1902324936704"
|
| 62 |
+
},
|
| 63 |
+
"gpu_nvidia": [
|
| 64 |
+
{
|
| 65 |
+
"name": "NVIDIA A100-SXM4-80GB",
|
| 66 |
+
"memoryTotal": "85899345920",
|
| 67 |
+
"cudaCores": 6912,
|
| 68 |
+
"architecture": "Ampere",
|
| 69 |
+
"uuid": "GPU-47bfdc91-9dec-cf54-0e0a-aa57ab6fb106"
|
| 70 |
+
},
|
| 71 |
+
{
|
| 72 |
+
"name": "NVIDIA A100-SXM4-80GB",
|
| 73 |
+
"memoryTotal": "85899345920",
|
| 74 |
+
"cudaCores": 6912,
|
| 75 |
+
"architecture": "Ampere",
|
| 76 |
+
"uuid": "GPU-55a7184b-b6dc-a8b3-67d5-a65679215c83"
|
| 77 |
+
},
|
| 78 |
+
{
|
| 79 |
+
"name": "NVIDIA A100-SXM4-80GB",
|
| 80 |
+
"memoryTotal": "85899345920",
|
| 81 |
+
"cudaCores": 6912,
|
| 82 |
+
"architecture": "Ampere",
|
| 83 |
+
"uuid": "GPU-1de758e0-e4a9-e2e9-027c-17f65db8a69e"
|
| 84 |
+
},
|
| 85 |
+
{
|
| 86 |
+
"name": "NVIDIA A100-SXM4-80GB",
|
| 87 |
+
"memoryTotal": "85899345920",
|
| 88 |
+
"cudaCores": 6912,
|
| 89 |
+
"architecture": "Ampere",
|
| 90 |
+
"uuid": "GPU-d7f94efd-7e10-156f-fe37-e505ae7b62b1"
|
| 91 |
+
},
|
| 92 |
+
{
|
| 93 |
+
"name": "NVIDIA A100-SXM4-80GB",
|
| 94 |
+
"memoryTotal": "85899345920",
|
| 95 |
+
"cudaCores": 6912,
|
| 96 |
+
"architecture": "Ampere",
|
| 97 |
+
"uuid": "GPU-813530b2-64f0-5fa3-3568-3811977d3b92"
|
| 98 |
+
},
|
| 99 |
+
{
|
| 100 |
+
"name": "NVIDIA A100-SXM4-80GB",
|
| 101 |
+
"memoryTotal": "85899345920",
|
| 102 |
+
"cudaCores": 6912,
|
| 103 |
+
"architecture": "Ampere",
|
| 104 |
+
"uuid": "GPU-7eac47dc-0da1-f6b2-d261-8ab3a5d4ed03"
|
| 105 |
+
},
|
| 106 |
+
{
|
| 107 |
+
"name": "NVIDIA A100-SXM4-80GB",
|
| 108 |
+
"memoryTotal": "85899345920",
|
| 109 |
+
"cudaCores": 6912,
|
| 110 |
+
"architecture": "Ampere",
|
| 111 |
+
"uuid": "GPU-335150e5-634c-68e2-4930-656c95e62244"
|
| 112 |
+
},
|
| 113 |
+
{
|
| 114 |
+
"name": "NVIDIA A100-SXM4-80GB",
|
| 115 |
+
"memoryTotal": "85899345920",
|
| 116 |
+
"cudaCores": 6912,
|
| 117 |
+
"architecture": "Ampere",
|
| 118 |
+
"uuid": "GPU-b3ee08d0-187c-8f80-06d5-c46759764c41"
|
| 119 |
+
}
|
| 120 |
+
],
|
| 121 |
+
"cudaVersion": "12.4",
|
| 122 |
+
"writerId": "kigmkgl7ewlmvaaruo3ggp33h5rhvks0"
|
| 123 |
+
}
|
wandb/run-20250822_175544-mg58khw0/files/wandb-summary.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"VLA Train/Loss (Raw)":0.1435861736536026,"VLA Train/Step Time":6.651643753051758,"_step":7160,"_timestamp":1.7559304174719603e+09,"_wandb":{"runtime":45084},"_runtime":45084.391904516,"VLA Train/Loss":0.1435861736536026,"VLA Train/Auxiliary Loss":0,"VLA Train/Action Token Accuracy":0.6699029207229614,"VLA Train/Step":7160,"VLA Train/Epoch":10,"VLA Train/Learning Rate":2e-05,"VLA Train/L1 Loss":0.07942128307633732}
|
wandb/run-20250822_175544-mg58khw0/logs/debug-core.log
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2025-08-22T17:55:44.993792062Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpd1k24cho/port-4004464.txt","pid":4004464,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
|
| 2 |
+
{"time":"2025-08-22T17:55:44.995074839Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":4004464}
|
| 3 |
+
{"time":"2025-08-22T17:55:44.995034278Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-4004464-4005403-1358423967/socket","Net":"unix"}}
|
| 4 |
+
{"time":"2025-08-22T17:55:45.098967428Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
|
| 5 |
+
{"time":"2025-08-22T17:55:45.118162901Z","level":"INFO","msg":"handleInformInit: received","streamId":"mg58khw0","id":"1(@)"}
|
| 6 |
+
{"time":"2025-08-22T17:55:45.417996836Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"mg58khw0","id":"1(@)"}
|
| 7 |
+
{"time":"2025-08-23T06:27:10.450725106Z","level":"INFO","msg":"handleInformFinish: finish message received","streamId":"mg58khw0","id":"1(@)"}
|
| 8 |
+
{"time":"2025-08-23T06:27:10.45584368Z","level":"INFO","msg":"handleInformFinish: stream closed","streamId":"mg58khw0","id":"1(@)"}
|
| 9 |
+
{"time":"2025-08-23T06:30:42.448001203Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
|
| 10 |
+
{"time":"2025-08-23T06:30:42.448106914Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
|
| 11 |
+
{"time":"2025-08-23T06:30:42.448117954Z","level":"INFO","msg":"server is shutting down"}
|
| 12 |
+
{"time":"2025-08-23T06:30:42.448183167Z","level":"INFO","msg":"connection: closing","id":"1(@)"}
|
| 13 |
+
{"time":"2025-08-23T06:30:42.44832581Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
|
| 14 |
+
{"time":"2025-08-23T06:30:42.448204876Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-4004464-4005403-1358423967/socket","Net":"unix"}}
|
| 15 |
+
{"time":"2025-08-23T06:30:42.44833905Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
|
| 16 |
+
{"time":"2025-08-23T06:30:42.448423102Z","level":"INFO","msg":"server is closed"}
|
wandb/run-20250822_175544-mg58khw0/logs/debug-internal.log
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"time":"2025-08-22T17:55:45.122997378Z","level":"INFO","msg":"stream: starting","core version":"0.21.0"}
|
| 2 |
+
{"time":"2025-08-22T17:55:45.417778442Z","level":"INFO","msg":"stream: created new stream","id":"mg58khw0"}
|
| 3 |
+
{"time":"2025-08-22T17:55:45.417944745Z","level":"INFO","msg":"stream: started","id":"mg58khw0"}
|
| 4 |
+
{"time":"2025-08-22T17:55:45.417987887Z","level":"INFO","msg":"writer: Do: started","stream_id":"mg58khw0"}
|
| 5 |
+
{"time":"2025-08-22T17:55:45.418028658Z","level":"INFO","msg":"sender: started","stream_id":"mg58khw0"}
|
| 6 |
+
{"time":"2025-08-22T17:55:45.418052908Z","level":"INFO","msg":"handler: started","stream_id":"mg58khw0"}
|
| 7 |
+
{"time":"2025-08-22T19:05:01.607571042Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
|
| 8 |
+
{"time":"2025-08-23T06:27:10.330396586Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
|
| 9 |
+
{"time":"2025-08-23T06:27:10.442620306Z","level":"INFO","msg":"handler: operation stats","stats":{}}
|
| 10 |
+
{"time":"2025-08-23T06:27:10.450768217Z","level":"INFO","msg":"stream: closing","id":"mg58khw0"}
|
| 11 |
+
{"time":"2025-08-23T06:27:10.450887019Z","level":"INFO","msg":"handler: closed","stream_id":"mg58khw0"}
|
| 12 |
+
{"time":"2025-08-23T06:27:10.450965602Z","level":"INFO","msg":"sender: closed","stream_id":"mg58khw0"}
|
| 13 |
+
{"time":"2025-08-23T06:27:10.45093435Z","level":"INFO","msg":"writer: Close: closed","stream_id":"mg58khw0"}
|
| 14 |
+
{"time":"2025-08-23T06:27:10.45361464Z","level":"INFO","msg":"stream: closed","id":"mg58khw0"}
|
wandb/run-20250822_175544-mg58khw0/logs/debug.log
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
2025-08-22 17:55:44,851 INFO MainThread:4004464 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0
|
| 2 |
+
2025-08-22 17:55:44,851 INFO MainThread:4004464 [wandb_setup.py:_flush():80] Configure stats pid to 4004464
|
| 3 |
+
2025-08-22 17:55:44,851 INFO MainThread:4004464 [wandb_setup.py:_flush():80] Loading settings from /home/ubuntu/.config/wandb/settings
|
| 4 |
+
2025-08-22 17:55:44,852 INFO MainThread:4004464 [wandb_setup.py:_flush():80] Loading settings from /lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/wandb/settings
|
| 5 |
+
2025-08-22 17:55:44,852 INFO MainThread:4004464 [wandb_setup.py:_flush():80] Loading settings from environment variables
|
| 6 |
+
2025-08-22 17:55:44,852 INFO MainThread:4004464 [wandb_init.py:setup_run_log_directory():703] Logging user logs to myvla_exp/bl_multiview_depth_set_table/wandb/run-20250822_175544-mg58khw0/logs/debug.log
|
| 7 |
+
2025-08-22 17:55:44,852 INFO MainThread:4004464 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to myvla_exp/bl_multiview_depth_set_table/wandb/run-20250822_175544-mg58khw0/logs/debug-internal.log
|
| 8 |
+
2025-08-22 17:55:44,852 INFO MainThread:4004464 [wandb_init.py:init():830] calling init triggers
|
| 9 |
+
2025-08-22 17:55:44,852 INFO MainThread:4004464 [wandb_init.py:init():835] wandb.init called with sweep_config: {}
|
| 10 |
+
config: {'vla': {'type': 'myvla-qwen-224px+mx-mshab', 'vla_id': 'myvla-qwen-224px+mx-mshab', 'base_vlm': 'prism-qwen25-extra-dinosiglip-224px+0_5b', 'freeze_vision_backbone': True, 'freeze_llm_backbone': False, 'unfreeze_last_llm_layer': False, 'data_mix': 'bridge', 'shuffle_buffer_size': 256000, 'epochs': 10, 'max_steps': None, 'save_every_n_steps': 25000, 'expected_world_size': 8, 'global_batch_size': 512, 'per_device_batch_size': 8, 'learning_rate': 2e-05, 'weight_decay': 0.0, 'max_grad_norm': 1.0, 'lr_scheduler_type': 'constant', 'warmup_ratio': 0.0, 'train_strategy': 'fsdp-full-shard', 'action_tokenizer': 'extra_action_tokenizer', 'image_sequence_len': 4, 'use_wrist_image': True, 'compress_history': False, 'use_flow_matching': False, 'action_chunk_size': 8, 'enable_gradient_checkpointing': True, 'enable_mixed_precision_training': True, 'reduce_in_full_precision': True, 'image_window_size': 1}, 'model_type': 'my_vla_qwen', 'data_root_dir': '/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/.new_maniskill_data', 'run_root_dir': 'myvla_exp', 'pretrained_checkpoint': 'Stanford-ILIAD/prism-qwen25-extra-dinosiglip-224px-0_5b', 'is_resume': False, 'resume_step': None, 'resume_epoch': None, 'run_id': 'bl_multiview_depth_set_table', 'run_id_note': None, 'save_interval': 1000, 'image_aug': False, 'seed': 7, 'hf_token': '.hf_token', 'trackers': ['jsonl', 'wandb'], 'wandb_project': 'mshab_vla', 'wandb_entity': 'traysen879-uc-san-diego', 'global_pose': False, 'is_grasped': False, 'qpos': False, 'depth': False, 'segmentation': False, '_wandb': {}}
|
| 11 |
+
2025-08-22 17:55:44,852 INFO MainThread:4004464 [wandb_init.py:init():871] starting backend
|
| 12 |
+
2025-08-22 17:55:45,099 INFO MainThread:4004464 [wandb_init.py:init():874] sending inform_init request
|
| 13 |
+
2025-08-22 17:55:45,108 INFO MainThread:4004464 [wandb_init.py:init():882] backend started and connected
|
| 14 |
+
2025-08-22 17:55:45,114 INFO MainThread:4004464 [wandb_init.py:init():953] updated telemetry
|
| 15 |
+
2025-08-22 17:55:45,150 INFO MainThread:4004464 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout
|
| 16 |
+
2025-08-22 17:55:45,586 INFO MainThread:4004464 [wandb_init.py:init():1029] starting run threads in backend
|
| 17 |
+
2025-08-22 17:55:46,161 INFO MainThread:4004464 [wandb_run.py:_console_start():2458] atexit reg
|
| 18 |
+
2025-08-22 17:55:46,161 INFO MainThread:4004464 [wandb_run.py:_redirect():2306] redirect: wrap_raw
|
| 19 |
+
2025-08-22 17:55:46,163 INFO MainThread:4004464 [wandb_run.py:_redirect():2375] Wrapping output streams.
|
| 20 |
+
2025-08-22 17:55:46,163 INFO MainThread:4004464 [wandb_run.py:_redirect():2398] Redirects installed.
|
| 21 |
+
2025-08-22 17:55:46,173 INFO MainThread:4004464 [wandb_init.py:init():1075] run started, returning control to user process
|
| 22 |
+
2025-08-23 06:27:09,979 INFO MainThread:4004464 [wandb_run.py:_finish():2224] finishing run traysen879-uc-san-diego/mshab_vla/mg58khw0
|
| 23 |
+
2025-08-23 06:27:09,980 INFO MainThread:4004464 [wandb_run.py:_atexit_cleanup():2423] got exitcode: 0
|
| 24 |
+
2025-08-23 06:27:09,980 INFO MainThread:4004464 [wandb_run.py:_restore():2405] restore
|
| 25 |
+
2025-08-23 06:27:09,981 INFO MainThread:4004464 [wandb_run.py:_restore():2411] restore done
|
| 26 |
+
2025-08-23 06:27:10,444 INFO MainThread:4004464 [wandb_run.py:_footer_history_summary_info():3903] rendering history
|
| 27 |
+
2025-08-23 06:27:10,446 INFO MainThread:4004464 [wandb_run.py:_footer_history_summary_info():3935] rendering summary
|
| 28 |
+
2025-08-23 06:27:10,446 INFO MainThread:4004464 [wandb_run.py:_footer_sync_info():3864] logging synced files
|
wandb/run-20250822_175544-mg58khw0/run-mg58khw0.wandb
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9f171964c393d3a2a2a01822280a9c562b11ad093c2cefbe365d066562591d66
|
| 3 |
+
size 21823104
|