trs07170 commited on
Commit
b67468e
·
verified ·
1 Parent(s): 9c6d98e

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +1 -0
  2. bl_multiview_history_depth_set_table.jsonl +0 -0
  3. checkpoints/step-007160-epoch-10-loss=0.0736.pt +3 -0
  4. config.json +58 -0
  5. config.yaml +54 -0
  6. run-metrics.jsonl +1 -0
  7. wandb/debug-internal.log +22 -0
  8. wandb/debug.log +28 -0
  9. wandb/run-20250820_090050-3cqyp9vg/files/config.yaml +204 -0
  10. wandb/run-20250820_090050-3cqyp9vg/files/output.log +45 -0
  11. wandb/run-20250820_090050-3cqyp9vg/files/requirements.txt +144 -0
  12. wandb/run-20250820_090050-3cqyp9vg/files/wandb-metadata.json +119 -0
  13. wandb/run-20250820_090050-3cqyp9vg/files/wandb-summary.json +1 -0
  14. wandb/run-20250820_090050-3cqyp9vg/logs/debug-core.log +14 -0
  15. wandb/run-20250820_090050-3cqyp9vg/logs/debug-internal.log +12 -0
  16. wandb/run-20250820_090050-3cqyp9vg/logs/debug.log +22 -0
  17. wandb/run-20250820_090050-3cqyp9vg/run-3cqyp9vg.wandb +0 -0
  18. wandb/run-20250820_091147-qt1b7wpr/files/config.yaml +204 -0
  19. wandb/run-20250820_091147-qt1b7wpr/files/output.log +11 -0
  20. wandb/run-20250820_091147-qt1b7wpr/files/requirements.txt +144 -0
  21. wandb/run-20250820_091147-qt1b7wpr/files/wandb-metadata.json +119 -0
  22. wandb/run-20250820_091147-qt1b7wpr/files/wandb-summary.json +1 -0
  23. wandb/run-20250820_091147-qt1b7wpr/logs/debug-core.log +14 -0
  24. wandb/run-20250820_091147-qt1b7wpr/logs/debug-internal.log +12 -0
  25. wandb/run-20250820_091147-qt1b7wpr/logs/debug.log +22 -0
  26. wandb/run-20250820_091147-qt1b7wpr/run-qt1b7wpr.wandb +0 -0
  27. wandb/run-20250820_094544-v4zsb4rt/files/config.yaml +204 -0
  28. wandb/run-20250820_094544-v4zsb4rt/files/output.log +11 -0
  29. wandb/run-20250820_094544-v4zsb4rt/files/requirements.txt +144 -0
  30. wandb/run-20250820_094544-v4zsb4rt/files/wandb-metadata.json +119 -0
  31. wandb/run-20250820_094544-v4zsb4rt/files/wandb-summary.json +1 -0
  32. wandb/run-20250820_094544-v4zsb4rt/logs/debug-core.log +14 -0
  33. wandb/run-20250820_094544-v4zsb4rt/logs/debug-internal.log +12 -0
  34. wandb/run-20250820_094544-v4zsb4rt/logs/debug.log +22 -0
  35. wandb/run-20250820_094544-v4zsb4rt/run-v4zsb4rt.wandb +0 -0
  36. wandb/run-20250820_095138-in9qu6p9/files/config.yaml +204 -0
  37. wandb/run-20250820_095138-in9qu6p9/files/output.log +13 -0
  38. wandb/run-20250820_095138-in9qu6p9/files/requirements.txt +144 -0
  39. wandb/run-20250820_095138-in9qu6p9/files/wandb-metadata.json +119 -0
  40. wandb/run-20250820_095138-in9qu6p9/files/wandb-summary.json +1 -0
  41. wandb/run-20250820_095138-in9qu6p9/logs/debug-core.log +14 -0
  42. wandb/run-20250820_095138-in9qu6p9/logs/debug-internal.log +12 -0
  43. wandb/run-20250820_095138-in9qu6p9/logs/debug.log +22 -0
  44. wandb/run-20250820_095138-in9qu6p9/run-in9qu6p9.wandb +0 -0
  45. wandb/run-20250820_095524-3yyycq6f/files/config.yaml +204 -0
  46. wandb/run-20250820_095524-3yyycq6f/files/output.log +13 -0
  47. wandb/run-20250820_095524-3yyycq6f/files/requirements.txt +144 -0
  48. wandb/run-20250820_095524-3yyycq6f/files/wandb-metadata.json +119 -0
  49. wandb/run-20250820_095524-3yyycq6f/files/wandb-summary.json +1 -0
  50. wandb/run-20250820_095524-3yyycq6f/logs/debug-core.log +14 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ wandb/run-20250820_102642-5v6c0rms/run-5v6c0rms.wandb filter=lfs diff=lfs merge=lfs -text
bl_multiview_history_depth_set_table.jsonl ADDED
The diff for this file is too large to render. See raw diff
 
checkpoints/step-007160-epoch-10-loss=0.0736.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76b99149472d744b5a28f29807227ad854ae854a6933d0abd3e4b83e0b6ce9cb
3
+ size 4093057552
config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "data_root_dir": "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/.new_maniskill_data",
3
+ "global_pose": false,
4
+ "hf_token": ".hf_token",
5
+ "image_aug": false,
6
+ "is_grasped": false,
7
+ "is_resume": false,
8
+ "model_type": "my_vla_qwen",
9
+ "pretrained_checkpoint": "Stanford-ILIAD/prism-qwen25-extra-dinosiglip-224px-0_5b",
10
+ "qpos": false,
11
+ "resume_epoch": null,
12
+ "resume_step": null,
13
+ "run_id": "bl_multiview_history_depth_set_table",
14
+ "run_id_note": null,
15
+ "run_root_dir": "myvla_exp",
16
+ "save_interval": 1000,
17
+ "seed": 7,
18
+ "segmentation": false,
19
+ "trackers": [
20
+ "jsonl",
21
+ "wandb"
22
+ ],
23
+ "vla": {
24
+ "action_chunk_size": 8,
25
+ "action_tokenizer": "extra_action_tokenizer",
26
+ "base_vlm": "prism-qwen25-extra-dinosiglip-224px+0_5b",
27
+ "compress_history": false,
28
+ "data_mix": "bridge",
29
+ "enable_gradient_checkpointing": true,
30
+ "enable_mixed_precision_training": true,
31
+ "epochs": 10,
32
+ "expected_world_size": 4,
33
+ "freeze_llm_backbone": false,
34
+ "freeze_vision_backbone": true,
35
+ "global_batch_size": 512,
36
+ "image_sequence_len": 10,
37
+ "image_window_size": 4,
38
+ "learning_rate": 2e-05,
39
+ "lr_scheduler_type": "constant",
40
+ "max_grad_norm": 1.0,
41
+ "max_steps": null,
42
+ "per_device_batch_size": 8,
43
+ "reduce_in_full_precision": true,
44
+ "save_every_n_steps": 25000,
45
+ "shuffle_buffer_size": 256000,
46
+ "train_strategy": "fsdp-full-shard",
47
+ "type": "myvla-qwen-224px+mx-mshab",
48
+ "unfreeze_last_llm_layer": false,
49
+ "use_depth_image": true,
50
+ "use_flow_matching": false,
51
+ "use_wrist_image": true,
52
+ "vla_id": "myvla-qwen-224px+mx-mshab",
53
+ "warmup_ratio": 0.0,
54
+ "weight_decay": 0.0
55
+ },
56
+ "wandb_entity": "traysen879-uc-san-diego",
57
+ "wandb_project": "mshab_vla"
58
+ }
config.yaml ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ data_root_dir: /home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/.new_maniskill_data
2
+ global_pose: false
3
+ hf_token: .hf_token
4
+ image_aug: false
5
+ is_grasped: false
6
+ is_resume: false
7
+ model_type: my_vla_qwen
8
+ pretrained_checkpoint: Stanford-ILIAD/prism-qwen25-extra-dinosiglip-224px-0_5b
9
+ qpos: false
10
+ resume_epoch: null
11
+ resume_step: null
12
+ run_id: bl_multiview_history_depth_set_table
13
+ run_id_note: null
14
+ run_root_dir: myvla_exp
15
+ save_interval: 1000
16
+ seed: 7
17
+ segmentation: false
18
+ trackers:
19
+ - jsonl
20
+ - wandb
21
+ vla:
22
+ action_chunk_size: 8
23
+ action_tokenizer: extra_action_tokenizer
24
+ base_vlm: prism-qwen25-extra-dinosiglip-224px+0_5b
25
+ compress_history: false
26
+ data_mix: bridge
27
+ enable_gradient_checkpointing: true
28
+ enable_mixed_precision_training: true
29
+ epochs: 10
30
+ expected_world_size: 4
31
+ freeze_llm_backbone: false
32
+ freeze_vision_backbone: true
33
+ global_batch_size: 512
34
+ image_sequence_len: 10
35
+ image_window_size: 4
36
+ learning_rate: 2.0e-05
37
+ lr_scheduler_type: constant
38
+ max_grad_norm: 1.0
39
+ max_steps: null
40
+ per_device_batch_size: 8
41
+ reduce_in_full_precision: true
42
+ save_every_n_steps: 25000
43
+ shuffle_buffer_size: 256000
44
+ train_strategy: fsdp-full-shard
45
+ type: myvla-qwen-224px+mx-mshab
46
+ unfreeze_last_llm_layer: false
47
+ use_depth_image: true
48
+ use_flow_matching: false
49
+ use_wrist_image: true
50
+ vla_id: myvla-qwen-224px+mx-mshab
51
+ warmup_ratio: 0.0
52
+ weight_decay: 0.0
53
+ wandb_entity: traysen879-uc-san-diego
54
+ wandb_project: mshab_vla
run-metrics.jsonl ADDED
@@ -0,0 +1 @@
 
 
1
+ {"hparams": {"data_root_dir": "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/.new_maniskill_data", "global_pose": false, "hf_token": ".hf_token", "image_aug": false, "is_grasped": false, "is_resume": false, "model_type": "my_vla_qwen", "pretrained_checkpoint": "Stanford-ILIAD/prism-qwen25-extra-dinosiglip-224px-0_5b", "qpos": false, "resume_epoch": null, "resume_step": null, "run_id": "bl_multiview_history_depth_set_table", "run_id_note": null, "run_root_dir": "myvla_exp", "save_interval": 1000, "seed": 7, "segmentation": false, "trackers": ["jsonl", "wandb"], "vla": {"action_chunk_size": 8, "action_tokenizer": "extra_action_tokenizer", "base_vlm": "prism-qwen25-extra-dinosiglip-224px+0_5b", "compress_history": false, "data_mix": "bridge", "enable_gradient_checkpointing": true, "enable_mixed_precision_training": true, "epochs": 10, "expected_world_size": 4, "freeze_llm_backbone": false, "freeze_vision_backbone": true, "global_batch_size": 512, "image_sequence_len": 10, "image_window_size": 4, "learning_rate": 2e-05, "lr_scheduler_type": "constant", "max_grad_norm": 1.0, "max_steps": null, "per_device_batch_size": 8, "reduce_in_full_precision": true, "save_every_n_steps": 25000, "shuffle_buffer_size": 256000, "train_strategy": "fsdp-full-shard", "type": "myvla-qwen-224px+mx-mshab", "unfreeze_last_llm_layer": false, "use_depth_image": true, "use_flow_matching": false, "use_wrist_image": true, "vla_id": "myvla-qwen-224px+mx-mshab", "warmup_ratio": 0.0, "weight_decay": 0.0}, "wandb_entity": "traysen879-uc-san-diego", "wandb_project": "mshab_vla"}, "run_id": "bl_multiview_history_depth_set_table"}
wandb/debug-internal.log ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-08-20T10:26:43.204675859Z","level":"INFO","msg":"stream: starting","core version":"0.21.0"}
2
+ {"time":"2025-08-20T10:26:43.516851918Z","level":"INFO","msg":"stream: created new stream","id":"5v6c0rms"}
3
+ {"time":"2025-08-20T10:26:43.516991671Z","level":"INFO","msg":"stream: started","id":"5v6c0rms"}
4
+ {"time":"2025-08-20T10:26:43.517178725Z","level":"INFO","msg":"writer: Do: started","stream_id":"5v6c0rms"}
5
+ {"time":"2025-08-20T10:26:43.517063263Z","level":"INFO","msg":"handler: started","stream_id":"5v6c0rms"}
6
+ {"time":"2025-08-20T10:26:43.517088984Z","level":"INFO","msg":"sender: started","stream_id":"5v6c0rms"}
7
+ {"time":"2025-08-20T16:19:00.197037424Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)"}
8
+ {"time":"2025-08-20T18:26:46.626255579Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
9
+ {"time":"2025-08-20T20:34:33.882438305Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/traysen879-uc-san-diego/mshab_vla/5v6c0rms/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
10
+ {"time":"2025-08-21T07:33:39.302593268Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)"}
11
+ {"time":"2025-08-21T12:32:35.830938746Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/traysen879-uc-san-diego/mshab_vla/5v6c0rms/file_stream\": http2: server sent GOAWAY and closed the connection; LastStreamID=1, ErrCode=NO_ERROR, debug=\"server_shutting_down\""}
12
+ {"time":"2025-08-21T12:53:56.107366065Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
13
+ {"time":"2025-08-21T19:51:10.717177409Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/traysen879-uc-san-diego/mshab_vla/5v6c0rms/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
14
+ {"time":"2025-08-21T19:55:28.726667762Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
15
+ {"time":"2025-08-21T21:35:15.927046924Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/traysen879-uc-san-diego/mshab_vla/5v6c0rms/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
16
+ {"time":"2025-08-22T08:17:32.392853531Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
17
+ {"time":"2025-08-22T08:17:32.457515475Z","level":"INFO","msg":"handler: operation stats","stats":{}}
18
+ {"time":"2025-08-22T08:17:32.467508897Z","level":"INFO","msg":"stream: closing","id":"5v6c0rms"}
19
+ {"time":"2025-08-22T08:17:32.468012198Z","level":"INFO","msg":"handler: closed","stream_id":"5v6c0rms"}
20
+ {"time":"2025-08-22T08:17:32.468066299Z","level":"INFO","msg":"writer: Close: closed","stream_id":"5v6c0rms"}
21
+ {"time":"2025-08-22T08:17:32.4681065Z","level":"INFO","msg":"sender: closed","stream_id":"5v6c0rms"}
22
+ {"time":"2025-08-22T08:17:32.47079997Z","level":"INFO","msg":"stream: closed","id":"5v6c0rms"}
wandb/debug.log ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-08-20 10:26:42,933 INFO MainThread:3727471 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0
2
+ 2025-08-20 10:26:42,933 INFO MainThread:3727471 [wandb_setup.py:_flush():80] Configure stats pid to 3727471
3
+ 2025-08-20 10:26:42,933 INFO MainThread:3727471 [wandb_setup.py:_flush():80] Loading settings from /home/ubuntu/.config/wandb/settings
4
+ 2025-08-20 10:26:42,933 INFO MainThread:3727471 [wandb_setup.py:_flush():80] Loading settings from /lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/wandb/settings
5
+ 2025-08-20 10:26:42,934 INFO MainThread:3727471 [wandb_setup.py:_flush():80] Loading settings from environment variables
6
+ 2025-08-20 10:26:42,934 INFO MainThread:3727471 [wandb_init.py:setup_run_log_directory():703] Logging user logs to myvla_exp/bl_multiview_history_depth_set_table/wandb/run-20250820_102642-5v6c0rms/logs/debug.log
7
+ 2025-08-20 10:26:42,934 INFO MainThread:3727471 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to myvla_exp/bl_multiview_history_depth_set_table/wandb/run-20250820_102642-5v6c0rms/logs/debug-internal.log
8
+ 2025-08-20 10:26:42,934 INFO MainThread:3727471 [wandb_init.py:init():830] calling init triggers
9
+ 2025-08-20 10:26:42,934 INFO MainThread:3727471 [wandb_init.py:init():835] wandb.init called with sweep_config: {}
10
+ config: {'vla': {'type': 'myvla-qwen-224px+mx-mshab', 'vla_id': 'myvla-qwen-224px+mx-mshab', 'base_vlm': 'prism-qwen25-extra-dinosiglip-224px+0_5b', 'freeze_vision_backbone': True, 'freeze_llm_backbone': False, 'unfreeze_last_llm_layer': False, 'data_mix': 'bridge', 'shuffle_buffer_size': 256000, 'epochs': 10, 'max_steps': None, 'save_every_n_steps': 25000, 'expected_world_size': 4, 'global_batch_size': 512, 'per_device_batch_size': 8, 'learning_rate': 2e-05, 'weight_decay': 0.0, 'max_grad_norm': 1.0, 'lr_scheduler_type': 'constant', 'warmup_ratio': 0.0, 'train_strategy': 'fsdp-full-shard', 'action_tokenizer': 'extra_action_tokenizer', 'image_sequence_len': 10, 'use_wrist_image': True, 'use_depth_image': True, 'compress_history': False, 'use_flow_matching': False, 'action_chunk_size': 8, 'enable_gradient_checkpointing': True, 'enable_mixed_precision_training': True, 'reduce_in_full_precision': True, 'image_window_size': 4}, 'model_type': 'my_vla_qwen', 'data_root_dir': '/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/.new_maniskill_data', 'run_root_dir': 'myvla_exp', 'pretrained_checkpoint': 'Stanford-ILIAD/prism-qwen25-extra-dinosiglip-224px-0_5b', 'is_resume': False, 'resume_step': None, 'resume_epoch': None, 'run_id': 'bl_multiview_history_depth_set_table', 'run_id_note': None, 'save_interval': 1000, 'image_aug': False, 'seed': 7, 'hf_token': '.hf_token', 'trackers': ['jsonl', 'wandb'], 'wandb_project': 'mshab_vla', 'wandb_entity': 'traysen879-uc-san-diego', 'global_pose': False, 'is_grasped': False, 'qpos': False, 'segmentation': False, '_wandb': {}}
11
+ 2025-08-20 10:26:42,934 INFO MainThread:3727471 [wandb_init.py:init():871] starting backend
12
+ 2025-08-20 10:26:43,181 INFO MainThread:3727471 [wandb_init.py:init():874] sending inform_init request
13
+ 2025-08-20 10:26:43,190 INFO MainThread:3727471 [wandb_init.py:init():882] backend started and connected
14
+ 2025-08-20 10:26:43,201 INFO MainThread:3727471 [wandb_init.py:init():953] updated telemetry
15
+ 2025-08-20 10:26:43,238 INFO MainThread:3727471 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout
16
+ 2025-08-20 10:26:43,789 INFO MainThread:3727471 [wandb_init.py:init():1029] starting run threads in backend
17
+ 2025-08-20 10:26:44,280 INFO MainThread:3727471 [wandb_run.py:_console_start():2458] atexit reg
18
+ 2025-08-20 10:26:44,281 INFO MainThread:3727471 [wandb_run.py:_redirect():2306] redirect: wrap_raw
19
+ 2025-08-20 10:26:44,281 INFO MainThread:3727471 [wandb_run.py:_redirect():2375] Wrapping output streams.
20
+ 2025-08-20 10:26:44,282 INFO MainThread:3727471 [wandb_run.py:_redirect():2398] Redirects installed.
21
+ 2025-08-20 10:26:44,290 INFO MainThread:3727471 [wandb_init.py:init():1075] run started, returning control to user process
22
+ 2025-08-22 08:17:32,042 INFO MainThread:3727471 [wandb_run.py:_finish():2224] finishing run traysen879-uc-san-diego/mshab_vla/5v6c0rms
23
+ 2025-08-22 08:17:32,043 INFO MainThread:3727471 [wandb_run.py:_atexit_cleanup():2423] got exitcode: 0
24
+ 2025-08-22 08:17:32,044 INFO MainThread:3727471 [wandb_run.py:_restore():2405] restore
25
+ 2025-08-22 08:17:32,044 INFO MainThread:3727471 [wandb_run.py:_restore():2411] restore done
26
+ 2025-08-22 08:17:32,460 INFO MainThread:3727471 [wandb_run.py:_footer_history_summary_info():3903] rendering history
27
+ 2025-08-22 08:17:32,462 INFO MainThread:3727471 [wandb_run.py:_footer_history_summary_info():3935] rendering summary
28
+ 2025-08-22 08:17:32,463 INFO MainThread:3727471 [wandb_run.py:_footer_sync_info():3864] logging synced files
wandb/run-20250820_090050-3cqyp9vg/files/config.yaml ADDED
@@ -0,0 +1,204 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _wandb:
2
+ value:
3
+ cli_version: 0.21.0
4
+ e:
5
+ beqmkna8ab9p0pvi5lhmmalckxlanj5v:
6
+ args:
7
+ - --pretrained_checkpoint
8
+ - Stanford-ILIAD/prism-qwen25-extra-dinosiglip-224px-0_5b
9
+ - --model_type
10
+ - my_vla_qwen
11
+ - --vla.type
12
+ - myvla-qwen-224px+mx-mshab
13
+ - --vla.expected_world_size
14
+ - "4"
15
+ - --vla.global_batch_size
16
+ - "512"
17
+ - --vla.per_device_batch_size
18
+ - "16"
19
+ - --vla.learning_rate
20
+ - "2e-5"
21
+ - --vla.freeze_vision_backbone
22
+ - "True"
23
+ - --vla.freeze_llm_backbone
24
+ - "False"
25
+ - --vla.use_flow_matching
26
+ - "False"
27
+ - --vla.compress_history
28
+ - "False"
29
+ - --vla.epochs
30
+ - "10"
31
+ - --save_interval
32
+ - "1000"
33
+ - --run_id
34
+ - bl_multiview_history_depth_set_table
35
+ codePath: vla-scripts/train.py
36
+ codePathLocal: vla-scripts/train.py
37
+ cpu_count: 240
38
+ cpu_count_logical: 240
39
+ cudaVersion: "12.4"
40
+ disk:
41
+ /:
42
+ total: "20812690710528"
43
+ used: "36590596096"
44
+ email: traysen879@gmail.com
45
+ executable: /home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/bin/python3.10
46
+ git:
47
+ commit: 409e4c9a165115624c271028e9b3ee335991b747
48
+ remote: https://github.com/TRS07170/myvla.git
49
+ gpu: NVIDIA A100-SXM4-80GB
50
+ gpu_count: 8
51
+ gpu_nvidia:
52
+ - architecture: Ampere
53
+ cudaCores: 6912
54
+ memoryTotal: "85899345920"
55
+ name: NVIDIA A100-SXM4-80GB
56
+ uuid: GPU-47bfdc91-9dec-cf54-0e0a-aa57ab6fb106
57
+ - architecture: Ampere
58
+ cudaCores: 6912
59
+ memoryTotal: "85899345920"
60
+ name: NVIDIA A100-SXM4-80GB
61
+ uuid: GPU-55a7184b-b6dc-a8b3-67d5-a65679215c83
62
+ - architecture: Ampere
63
+ cudaCores: 6912
64
+ memoryTotal: "85899345920"
65
+ name: NVIDIA A100-SXM4-80GB
66
+ uuid: GPU-1de758e0-e4a9-e2e9-027c-17f65db8a69e
67
+ - architecture: Ampere
68
+ cudaCores: 6912
69
+ memoryTotal: "85899345920"
70
+ name: NVIDIA A100-SXM4-80GB
71
+ uuid: GPU-d7f94efd-7e10-156f-fe37-e505ae7b62b1
72
+ - architecture: Ampere
73
+ cudaCores: 6912
74
+ memoryTotal: "85899345920"
75
+ name: NVIDIA A100-SXM4-80GB
76
+ uuid: GPU-813530b2-64f0-5fa3-3568-3811977d3b92
77
+ - architecture: Ampere
78
+ cudaCores: 6912
79
+ memoryTotal: "85899345920"
80
+ name: NVIDIA A100-SXM4-80GB
81
+ uuid: GPU-7eac47dc-0da1-f6b2-d261-8ab3a5d4ed03
82
+ - architecture: Ampere
83
+ cudaCores: 6912
84
+ memoryTotal: "85899345920"
85
+ name: NVIDIA A100-SXM4-80GB
86
+ uuid: GPU-335150e5-634c-68e2-4930-656c95e62244
87
+ - architecture: Ampere
88
+ cudaCores: 6912
89
+ memoryTotal: "85899345920"
90
+ name: NVIDIA A100-SXM4-80GB
91
+ uuid: GPU-b3ee08d0-187c-8f80-06d5-c46759764c41
92
+ host: 164-152-109-69
93
+ memory:
94
+ total: "1902324936704"
95
+ os: Linux-6.8.0-60-generic-x86_64-with-glibc2.35
96
+ program: /lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py
97
+ python: CPython 3.10.18
98
+ root: myvla_exp/bl_multiview_history_depth_set_table
99
+ startedAt: "2025-08-20T09:00:50.038649Z"
100
+ writerId: beqmkna8ab9p0pvi5lhmmalckxlanj5v
101
+ m: []
102
+ python_version: 3.10.18
103
+ t:
104
+ "1":
105
+ - 1
106
+ - 2
107
+ - 3
108
+ - 11
109
+ - 41
110
+ - 49
111
+ - 63
112
+ - 71
113
+ "2":
114
+ - 1
115
+ - 2
116
+ - 3
117
+ - 11
118
+ - 41
119
+ - 49
120
+ - 63
121
+ - 71
122
+ "3":
123
+ - 13
124
+ - 16
125
+ "4": 3.10.18
126
+ "5": 0.21.0
127
+ "6": 4.40.1
128
+ "12": 0.21.0
129
+ "13": linux-x86_64
130
+ data_root_dir:
131
+ value: /home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/.new_maniskill_data
132
+ global_pose:
133
+ value: false
134
+ hf_token:
135
+ value: .hf_token
136
+ image_aug:
137
+ value: false
138
+ is_grasped:
139
+ value: false
140
+ is_resume:
141
+ value: false
142
+ model_type:
143
+ value: my_vla_qwen
144
+ pretrained_checkpoint:
145
+ value: Stanford-ILIAD/prism-qwen25-extra-dinosiglip-224px-0_5b
146
+ qpos:
147
+ value: false
148
+ resume_epoch:
149
+ value: null
150
+ resume_step:
151
+ value: null
152
+ run_id:
153
+ value: bl_multiview_history_depth_set_table
154
+ run_id_note:
155
+ value: null
156
+ run_root_dir:
157
+ value: myvla_exp
158
+ save_interval:
159
+ value: 1000
160
+ seed:
161
+ value: 7
162
+ segmentation:
163
+ value: false
164
+ trackers:
165
+ value:
166
+ - jsonl
167
+ - wandb
168
+ vla:
169
+ value:
170
+ action_chunk_size: 8
171
+ action_tokenizer: extra_action_tokenizer
172
+ base_vlm: prism-qwen25-extra-dinosiglip-224px+0_5b
173
+ compress_history: false
174
+ data_mix: bridge
175
+ enable_gradient_checkpointing: true
176
+ enable_mixed_precision_training: true
177
+ epochs: 10
178
+ expected_world_size: 4
179
+ freeze_llm_backbone: false
180
+ freeze_vision_backbone: true
181
+ global_batch_size: 512
182
+ image_sequence_len: 10
183
+ image_window_size: 4
184
+ learning_rate: 2e-05
185
+ lr_scheduler_type: constant
186
+ max_grad_norm: 1
187
+ max_steps: null
188
+ per_device_batch_size: 16
189
+ reduce_in_full_precision: true
190
+ save_every_n_steps: 25000
191
+ shuffle_buffer_size: 256000
192
+ train_strategy: fsdp-full-shard
193
+ type: myvla-qwen-224px+mx-mshab
194
+ unfreeze_last_llm_layer: false
195
+ use_depth_image: true
196
+ use_flow_matching: false
197
+ use_wrist_image: true
198
+ vla_id: myvla-qwen-224px+mx-mshab
199
+ warmup_ratio: 0
200
+ weight_decay: 0
201
+ wandb_entity:
202
+ value: traysen879-uc-san-diego
203
+ wandb_project:
204
+ value: mshab_vla
wandb/run-20250820_090050-3cqyp9vg/files/output.log ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 08/20 [09:00:51] INFO  | >> [*] Starting VLA Training Loop ]8;id=686782;file:///lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py\train.py]8;;\:]8;id=709047;file:///lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py#340\340]8;;\
2
+ Traceback (most recent call last):
3
+ File "/lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py", line 360, in <module>
4
+ train()
5
+ File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/draccus/argparsing.py", line 203, in wrapper_inner
6
+ response = fn(cfg, *args, **kwargs)
7
+ File "/lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py", line 341, in train
8
+ train_strategy.run_vla_training(
9
+ File "/lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/prismatic/training/strategies/base_strategy.py", line 342, in run_vla_training
10
+ output, aux_loss = self.vlm(
11
+ File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
12
+ return self._call_impl(*args, **kwargs)
13
+ File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
14
+ return forward_call(*args, **kwargs)
15
+ File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", line 849, in forward
16
+ output = self._fsdp_wrapped_module(*args, **kwargs)
17
+ File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
18
+ return self._call_impl(*args, **kwargs)
19
+ File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
20
+ return forward_call(*args, **kwargs)
21
+ File "/lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/prismatic/models/vlas/myvla.py", line 262, in forward
22
+ output = super().forward(
23
+ File "/lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/prismatic/models/vlms/prismatic.py", line 497, in forward
24
+ output = self.llm_backbone(
25
+ File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
26
+ return self._call_impl(*args, **kwargs)
27
+ File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
28
+ return forward_call(*args, **kwargs)
29
+ File "/lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/prismatic/models/backbones/llm/base_llm.py", line 221, in forward
30
+ output: CausalLMOutputWithPast = self.llm(
31
+ File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
32
+ return self._call_impl(*args, **kwargs)
33
+ File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
34
+ return forward_call(*args, **kwargs)
35
+ File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/transformers/models/qwen2/modeling_qwen2.py", line 1196, in forward
36
+ loss = loss_fct(shift_logits, shift_labels)
37
+ File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
38
+ return self._call_impl(*args, **kwargs)
39
+ File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
40
+ return forward_call(*args, **kwargs)
41
+ File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/torch/nn/modules/loss.py", line 1179, in forward
42
+ return F.cross_entropy(input, target, weight=self.weight,
43
+ File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/torch/nn/functional.py", line 3059, in cross_entropy
44
+ return torch._C._nn.cross_entropy_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index, label_smoothing)
45
+ torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 23.96 GiB. GPU 0 has a total capacity of 79.14 GiB of which 21.94 GiB is free. Including non-PyTorch memory, this process has 57.19 GiB memory in use. Of the allocated memory 53.35 GiB is allocated by PyTorch, and 1.49 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)
wandb/run-20250820_090050-3cqyp9vg/files/requirements.txt ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ setuptools==78.1.1
2
+ wheel==0.45.1
3
+ pip==25.1
4
+ sentencepiece==0.1.99
5
+ mpmath==1.3.0
6
+ libclang==18.1.1
7
+ flatbuffers==25.2.10
8
+ zipp==3.23.0
9
+ wrapt==1.14.1
10
+ urllib3==2.5.0
11
+ typing_extensions==4.14.1
12
+ typeguard==2.13.3
13
+ tqdm==4.67.1
14
+ toml==0.10.2
15
+ termcolor==3.1.0
16
+ tensorflow-io-gcs-filesystem==0.37.1
17
+ tensorflow-estimator==2.15.0
18
+ tensorboard-data-server==0.7.2
19
+ sympy==1.14.0
20
+ smmap==5.0.2
21
+ six==1.17.0
22
+ safetensors==0.5.3
23
+ regex==2025.7.34
24
+ PyYAML==6.0.2
25
+ pyparsing==3.2.3
26
+ Pygments==2.19.2
27
+ pyasn1==0.6.1
28
+ psutil==7.0.0
29
+ protobuf==4.21.12
30
+ platformdirs==4.3.8
31
+ pillow==11.3.0
32
+ packaging==25.0
33
+ opt_einsum==3.4.0
34
+ oauthlib==3.3.1
35
+ nvidia-nvtx-cu12==12.1.105
36
+ nvidia-nvjitlink-cu12==12.9.86
37
+ nvidia-nccl-cu12==2.19.3
38
+ nvidia-curand-cu12==10.3.2.106
39
+ nvidia-cufft-cu12==11.0.2.54
40
+ nvidia-cuda-runtime-cu12==12.1.105
41
+ nvidia-cuda-nvrtc-cu12==12.1.105
42
+ nvidia-cuda-cupti-cu12==12.1.105
43
+ nvidia-cublas-cu12==12.1.3.1
44
+ numpy==1.26.4
45
+ networkx==3.4.2
46
+ mypy_extensions==1.1.0
47
+ mergedeep==1.3.4
48
+ mdurl==0.1.2
49
+ MarkupSafe==3.0.2
50
+ Markdown==3.8.2
51
+ kiwisolver==1.4.8
52
+ keras==2.15.0
53
+ importlib_resources==6.5.2
54
+ idna==3.10
55
+ hf-xet==1.1.5
56
+ grpcio==1.74.0
57
+ gast==0.6.0
58
+ fsspec==2025.7.0
59
+ fonttools==4.59.0
60
+ filelock==3.18.0
61
+ etils==1.13.0
62
+ einops==0.8.1
63
+ cycler==0.12.1
64
+ click==8.2.1
65
+ charset-normalizer==3.4.2
66
+ certifi==2025.8.3
67
+ cachetools==5.5.2
68
+ attrs==25.3.0
69
+ annotated-types==0.7.0
70
+ absl-py==2.3.1
71
+ Werkzeug==3.1.3
72
+ typing-inspection==0.4.1
73
+ typing-inspect==0.9.0
74
+ triton==2.2.0
75
+ trimesh==4.7.1
76
+ tensorflow-metadata==1.17.2
77
+ tensorflow-addons==0.23.0
78
+ sentry-sdk==2.34.1
79
+ scipy==1.15.3
80
+ rsa==4.9.1
81
+ requests==2.32.4
82
+ pyyaml-include==1.4.1
83
+ python-dateutil==2.9.0.post0
84
+ pydantic_core==2.33.2
85
+ pyasn1_modules==0.4.2
86
+ promise==2.3
87
+ OpenEXR==3.3.5
88
+ nvidia-cusparse-cu12==12.1.0.106
89
+ nvidia-cudnn-cu12==8.9.2.26
90
+ ml-dtypes==0.2.0
91
+ markdown-it-py==3.0.0
92
+ jsonlines==4.0.0
93
+ json-numpy==2.1.1
94
+ Jinja2==3.1.6
95
+ h5py==3.14.0
96
+ google-pasta==0.2.0
97
+ gitdb==4.0.12
98
+ dm-tree==0.1.9
99
+ contourpy==1.3.2
100
+ astunparse==1.6.3
101
+ rich==14.1.0
102
+ requests-oauthlib==2.0.0
103
+ pydantic==2.11.7
104
+ nvidia-cusolver-cu12==11.4.5.107
105
+ matplotlib==3.10.5
106
+ huggingface-hub==0.34.3
107
+ google-auth==2.40.3
108
+ GitPython==3.1.45
109
+ draccus==0.8.0
110
+ wandb==0.21.0
111
+ torch==2.2.0
112
+ tokenizers==0.19.1
113
+ google-auth-oauthlib==1.2.2
114
+ array_record==0.7.2
115
+ transformers==4.40.1
116
+ torchvision==0.17.0
117
+ torchaudio==2.2.0
118
+ tensorboard==2.15.2
119
+ accelerate==1.9.0
120
+ timm==0.9.10
121
+ tensorflow-datasets==4.9.3
122
+ tensorflow==2.15.0
123
+ peft==0.11.1
124
+ tensorflow-graphics==2021.12.3
125
+ dlimp==0.0.1
126
+ openvla==0.0.3
127
+ ninja==1.11.1.4
128
+ flash-attn==2.5.5
129
+ autocommand==2.2.2
130
+ backports.tarfile==1.2.0
131
+ importlib_metadata==8.0.0
132
+ inflect==7.3.1
133
+ jaraco.collections==5.1.0
134
+ jaraco.context==5.3.0
135
+ jaraco.functools==4.0.1
136
+ jaraco.text==3.12.1
137
+ more-itertools==10.3.0
138
+ packaging==24.2
139
+ platformdirs==4.2.2
140
+ tomli==2.0.1
141
+ typeguard==4.3.0
142
+ typing_extensions==4.12.2
143
+ wheel==0.45.1
144
+ zipp==3.19.2
wandb/run-20250820_090050-3cqyp9vg/files/wandb-metadata.json ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-6.8.0-60-generic-x86_64-with-glibc2.35",
3
+ "python": "CPython 3.10.18",
4
+ "startedAt": "2025-08-20T09:00:50.038649Z",
5
+ "args": [
6
+ "--pretrained_checkpoint",
7
+ "Stanford-ILIAD/prism-qwen25-extra-dinosiglip-224px-0_5b",
8
+ "--model_type",
9
+ "my_vla_qwen",
10
+ "--vla.type",
11
+ "myvla-qwen-224px+mx-mshab",
12
+ "--vla.expected_world_size",
13
+ "4",
14
+ "--vla.global_batch_size",
15
+ "512",
16
+ "--vla.per_device_batch_size",
17
+ "16",
18
+ "--vla.learning_rate",
19
+ "2e-5",
20
+ "--vla.freeze_vision_backbone",
21
+ "True",
22
+ "--vla.freeze_llm_backbone",
23
+ "False",
24
+ "--vla.use_flow_matching",
25
+ "False",
26
+ "--vla.compress_history",
27
+ "False",
28
+ "--vla.epochs",
29
+ "10",
30
+ "--save_interval",
31
+ "1000",
32
+ "--run_id",
33
+ "bl_multiview_history_depth_set_table"
34
+ ],
35
+ "program": "/lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py",
36
+ "codePath": "vla-scripts/train.py",
37
+ "codePathLocal": "vla-scripts/train.py",
38
+ "git": {
39
+ "remote": "https://github.com/TRS07170/myvla.git",
40
+ "commit": "409e4c9a165115624c271028e9b3ee335991b747"
41
+ },
42
+ "email": "traysen879@gmail.com",
43
+ "root": "myvla_exp/bl_multiview_history_depth_set_table",
44
+ "host": "164-152-109-69",
45
+ "executable": "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/bin/python3.10",
46
+ "cpu_count": 240,
47
+ "cpu_count_logical": 240,
48
+ "gpu": "NVIDIA A100-SXM4-80GB",
49
+ "gpu_count": 8,
50
+ "disk": {
51
+ "/": {
52
+ "total": "20812690710528",
53
+ "used": "36590596096"
54
+ }
55
+ },
56
+ "memory": {
57
+ "total": "1902324936704"
58
+ },
59
+ "gpu_nvidia": [
60
+ {
61
+ "name": "NVIDIA A100-SXM4-80GB",
62
+ "memoryTotal": "85899345920",
63
+ "cudaCores": 6912,
64
+ "architecture": "Ampere",
65
+ "uuid": "GPU-47bfdc91-9dec-cf54-0e0a-aa57ab6fb106"
66
+ },
67
+ {
68
+ "name": "NVIDIA A100-SXM4-80GB",
69
+ "memoryTotal": "85899345920",
70
+ "cudaCores": 6912,
71
+ "architecture": "Ampere",
72
+ "uuid": "GPU-55a7184b-b6dc-a8b3-67d5-a65679215c83"
73
+ },
74
+ {
75
+ "name": "NVIDIA A100-SXM4-80GB",
76
+ "memoryTotal": "85899345920",
77
+ "cudaCores": 6912,
78
+ "architecture": "Ampere",
79
+ "uuid": "GPU-1de758e0-e4a9-e2e9-027c-17f65db8a69e"
80
+ },
81
+ {
82
+ "name": "NVIDIA A100-SXM4-80GB",
83
+ "memoryTotal": "85899345920",
84
+ "cudaCores": 6912,
85
+ "architecture": "Ampere",
86
+ "uuid": "GPU-d7f94efd-7e10-156f-fe37-e505ae7b62b1"
87
+ },
88
+ {
89
+ "name": "NVIDIA A100-SXM4-80GB",
90
+ "memoryTotal": "85899345920",
91
+ "cudaCores": 6912,
92
+ "architecture": "Ampere",
93
+ "uuid": "GPU-813530b2-64f0-5fa3-3568-3811977d3b92"
94
+ },
95
+ {
96
+ "name": "NVIDIA A100-SXM4-80GB",
97
+ "memoryTotal": "85899345920",
98
+ "cudaCores": 6912,
99
+ "architecture": "Ampere",
100
+ "uuid": "GPU-7eac47dc-0da1-f6b2-d261-8ab3a5d4ed03"
101
+ },
102
+ {
103
+ "name": "NVIDIA A100-SXM4-80GB",
104
+ "memoryTotal": "85899345920",
105
+ "cudaCores": 6912,
106
+ "architecture": "Ampere",
107
+ "uuid": "GPU-335150e5-634c-68e2-4930-656c95e62244"
108
+ },
109
+ {
110
+ "name": "NVIDIA A100-SXM4-80GB",
111
+ "memoryTotal": "85899345920",
112
+ "cudaCores": 6912,
113
+ "architecture": "Ampere",
114
+ "uuid": "GPU-b3ee08d0-187c-8f80-06d5-c46759764c41"
115
+ }
116
+ ],
117
+ "cudaVersion": "12.4",
118
+ "writerId": "beqmkna8ab9p0pvi5lhmmalckxlanj5v"
119
+ }
wandb/run-20250820_090050-3cqyp9vg/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"_wandb":{"runtime":5},"_runtime":5}
wandb/run-20250820_090050-3cqyp9vg/logs/debug-core.log ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-08-20T09:00:50.318936347Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmp_xbsp_gd/port-3716149.txt","pid":3716149,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
2
+ {"time":"2025-08-20T09:00:50.320251545Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-3716149-3717420-2439034435/socket","Net":"unix"}}
3
+ {"time":"2025-08-20T09:00:50.320360967Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":3716149}
4
+ {"time":"2025-08-20T09:00:50.339976746Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
5
+ {"time":"2025-08-20T09:00:50.355069096Z","level":"INFO","msg":"handleInformInit: received","streamId":"3cqyp9vg","id":"1(@)"}
6
+ {"time":"2025-08-20T09:00:50.652012124Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"3cqyp9vg","id":"1(@)"}
7
+ {"time":"2025-08-20T09:00:56.549737027Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
8
+ {"time":"2025-08-20T09:00:56.549990573Z","level":"INFO","msg":"server is shutting down"}
9
+ {"time":"2025-08-20T09:00:56.549964273Z","level":"INFO","msg":"connection: closing","id":"1(@)"}
10
+ {"time":"2025-08-20T09:00:56.550106895Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-3716149-3717420-2439034435/socket","Net":"unix"}}
11
+ {"time":"2025-08-20T09:00:56.550226089Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
12
+ {"time":"2025-08-20T09:00:57.066086131Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
13
+ {"time":"2025-08-20T09:00:57.066135043Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
14
+ {"time":"2025-08-20T09:00:57.066147462Z","level":"INFO","msg":"server is closed"}
wandb/run-20250820_090050-3cqyp9vg/logs/debug-internal.log ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-08-20T09:00:50.35935463Z","level":"INFO","msg":"stream: starting","core version":"0.21.0"}
2
+ {"time":"2025-08-20T09:00:50.651655947Z","level":"INFO","msg":"stream: created new stream","id":"3cqyp9vg"}
3
+ {"time":"2025-08-20T09:00:50.651899322Z","level":"INFO","msg":"stream: started","id":"3cqyp9vg"}
4
+ {"time":"2025-08-20T09:00:50.651968385Z","level":"INFO","msg":"handler: started","stream_id":"3cqyp9vg"}
5
+ {"time":"2025-08-20T09:00:50.651942224Z","level":"INFO","msg":"writer: Do: started","stream_id":"3cqyp9vg"}
6
+ {"time":"2025-08-20T09:00:50.652025325Z","level":"INFO","msg":"sender: started","stream_id":"3cqyp9vg"}
7
+ {"time":"2025-08-20T09:00:56.550049694Z","level":"INFO","msg":"stream: closing","id":"3cqyp9vg"}
8
+ {"time":"2025-08-20T09:00:56.863554095Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
9
+ {"time":"2025-08-20T09:00:57.058314771Z","level":"INFO","msg":"handler: closed","stream_id":"3cqyp9vg"}
10
+ {"time":"2025-08-20T09:00:57.058449844Z","level":"INFO","msg":"writer: Close: closed","stream_id":"3cqyp9vg"}
11
+ {"time":"2025-08-20T09:00:57.058507535Z","level":"INFO","msg":"sender: closed","stream_id":"3cqyp9vg"}
12
+ {"time":"2025-08-20T09:00:57.063852083Z","level":"INFO","msg":"stream: closed","id":"3cqyp9vg"}
wandb/run-20250820_090050-3cqyp9vg/logs/debug.log ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-08-20 09:00:50,078 INFO MainThread:3716149 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0
2
+ 2025-08-20 09:00:50,079 INFO MainThread:3716149 [wandb_setup.py:_flush():80] Configure stats pid to 3716149
3
+ 2025-08-20 09:00:50,079 INFO MainThread:3716149 [wandb_setup.py:_flush():80] Loading settings from /home/ubuntu/.config/wandb/settings
4
+ 2025-08-20 09:00:50,081 INFO MainThread:3716149 [wandb_setup.py:_flush():80] Loading settings from /lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/wandb/settings
5
+ 2025-08-20 09:00:50,081 INFO MainThread:3716149 [wandb_setup.py:_flush():80] Loading settings from environment variables
6
+ 2025-08-20 09:00:50,084 INFO MainThread:3716149 [wandb_init.py:setup_run_log_directory():703] Logging user logs to myvla_exp/bl_multiview_history_depth_set_table/wandb/run-20250820_090050-3cqyp9vg/logs/debug.log
7
+ 2025-08-20 09:00:50,086 INFO MainThread:3716149 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to myvla_exp/bl_multiview_history_depth_set_table/wandb/run-20250820_090050-3cqyp9vg/logs/debug-internal.log
8
+ 2025-08-20 09:00:50,088 INFO MainThread:3716149 [wandb_init.py:init():830] calling init triggers
9
+ 2025-08-20 09:00:50,090 INFO MainThread:3716149 [wandb_init.py:init():835] wandb.init called with sweep_config: {}
10
+ config: {'vla': {'type': 'myvla-qwen-224px+mx-mshab', 'vla_id': 'myvla-qwen-224px+mx-mshab', 'base_vlm': 'prism-qwen25-extra-dinosiglip-224px+0_5b', 'freeze_vision_backbone': True, 'freeze_llm_backbone': False, 'unfreeze_last_llm_layer': False, 'data_mix': 'bridge', 'shuffle_buffer_size': 256000, 'epochs': 10, 'max_steps': None, 'save_every_n_steps': 25000, 'expected_world_size': 4, 'global_batch_size': 512, 'per_device_batch_size': 16, 'learning_rate': 2e-05, 'weight_decay': 0.0, 'max_grad_norm': 1.0, 'lr_scheduler_type': 'constant', 'warmup_ratio': 0.0, 'train_strategy': 'fsdp-full-shard', 'action_tokenizer': 'extra_action_tokenizer', 'image_sequence_len': 10, 'use_wrist_image': True, 'use_depth_image': True, 'compress_history': False, 'use_flow_matching': False, 'action_chunk_size': 8, 'enable_gradient_checkpointing': True, 'enable_mixed_precision_training': True, 'reduce_in_full_precision': True, 'image_window_size': 4}, 'model_type': 'my_vla_qwen', 'data_root_dir': '/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/.new_maniskill_data', 'run_root_dir': 'myvla_exp', 'pretrained_checkpoint': 'Stanford-ILIAD/prism-qwen25-extra-dinosiglip-224px-0_5b', 'is_resume': False, 'resume_step': None, 'resume_epoch': None, 'run_id': 'bl_multiview_history_depth_set_table', 'run_id_note': None, 'save_interval': 1000, 'image_aug': False, 'seed': 7, 'hf_token': '.hf_token', 'trackers': ['jsonl', 'wandb'], 'wandb_project': 'mshab_vla', 'wandb_entity': 'traysen879-uc-san-diego', 'global_pose': False, 'is_grasped': False, 'qpos': False, 'segmentation': False, '_wandb': {}}
11
+ 2025-08-20 09:00:50,092 INFO MainThread:3716149 [wandb_init.py:init():871] starting backend
12
+ 2025-08-20 09:00:50,340 INFO MainThread:3716149 [wandb_init.py:init():874] sending inform_init request
13
+ 2025-08-20 09:00:50,349 INFO MainThread:3716149 [wandb_init.py:init():882] backend started and connected
14
+ 2025-08-20 09:00:50,355 INFO MainThread:3716149 [wandb_init.py:init():953] updated telemetry
15
+ 2025-08-20 09:00:50,391 INFO MainThread:3716149 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout
16
+ 2025-08-20 09:00:50,848 INFO MainThread:3716149 [wandb_init.py:init():1029] starting run threads in backend
17
+ 2025-08-20 09:00:51,348 INFO MainThread:3716149 [wandb_run.py:_console_start():2458] atexit reg
18
+ 2025-08-20 09:00:51,354 INFO MainThread:3716149 [wandb_run.py:_redirect():2306] redirect: wrap_raw
19
+ 2025-08-20 09:00:51,355 INFO MainThread:3716149 [wandb_run.py:_redirect():2375] Wrapping output streams.
20
+ 2025-08-20 09:00:51,355 INFO MainThread:3716149 [wandb_run.py:_redirect():2398] Redirects installed.
21
+ 2025-08-20 09:00:51,365 INFO MainThread:3716149 [wandb_init.py:init():1075] run started, returning control to user process
22
+ 2025-08-20 09:00:56,545 INFO MsgRouterThr:3716149 [mailbox.py:close():129] [no run ID] Closing mailbox, abandoning 1 handles.
wandb/run-20250820_090050-3cqyp9vg/run-3cqyp9vg.wandb ADDED
Binary file (12 kB). View file
 
wandb/run-20250820_091147-qt1b7wpr/files/config.yaml ADDED
@@ -0,0 +1,204 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _wandb:
2
+ value:
3
+ cli_version: 0.21.0
4
+ e:
5
+ ns7xudltxb04d0a18uyfc7iv6bgtzle9:
6
+ args:
7
+ - --pretrained_checkpoint
8
+ - Stanford-ILIAD/prism-qwen25-extra-dinosiglip-224px-0_5b
9
+ - --model_type
10
+ - my_vla_qwen
11
+ - --vla.type
12
+ - myvla-qwen-224px+mx-mshab
13
+ - --vla.expected_world_size
14
+ - "4"
15
+ - --vla.global_batch_size
16
+ - "512"
17
+ - --vla.per_device_batch_size
18
+ - "8"
19
+ - --vla.learning_rate
20
+ - "2e-5"
21
+ - --vla.freeze_vision_backbone
22
+ - "True"
23
+ - --vla.freeze_llm_backbone
24
+ - "False"
25
+ - --vla.use_flow_matching
26
+ - "False"
27
+ - --vla.compress_history
28
+ - "False"
29
+ - --vla.epochs
30
+ - "10"
31
+ - --save_interval
32
+ - "1000"
33
+ - --run_id
34
+ - bl_multiview_history_depth_set_table
35
+ codePath: vla-scripts/train.py
36
+ codePathLocal: vla-scripts/train.py
37
+ cpu_count: 240
38
+ cpu_count_logical: 240
39
+ cudaVersion: "12.4"
40
+ disk:
41
+ /:
42
+ total: "20812690710528"
43
+ used: "36590649344"
44
+ email: traysen879@gmail.com
45
+ executable: /home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/bin/python3.10
46
+ git:
47
+ commit: 409e4c9a165115624c271028e9b3ee335991b747
48
+ remote: https://github.com/TRS07170/myvla.git
49
+ gpu: NVIDIA A100-SXM4-80GB
50
+ gpu_count: 8
51
+ gpu_nvidia:
52
+ - architecture: Ampere
53
+ cudaCores: 6912
54
+ memoryTotal: "85899345920"
55
+ name: NVIDIA A100-SXM4-80GB
56
+ uuid: GPU-47bfdc91-9dec-cf54-0e0a-aa57ab6fb106
57
+ - architecture: Ampere
58
+ cudaCores: 6912
59
+ memoryTotal: "85899345920"
60
+ name: NVIDIA A100-SXM4-80GB
61
+ uuid: GPU-55a7184b-b6dc-a8b3-67d5-a65679215c83
62
+ - architecture: Ampere
63
+ cudaCores: 6912
64
+ memoryTotal: "85899345920"
65
+ name: NVIDIA A100-SXM4-80GB
66
+ uuid: GPU-1de758e0-e4a9-e2e9-027c-17f65db8a69e
67
+ - architecture: Ampere
68
+ cudaCores: 6912
69
+ memoryTotal: "85899345920"
70
+ name: NVIDIA A100-SXM4-80GB
71
+ uuid: GPU-d7f94efd-7e10-156f-fe37-e505ae7b62b1
72
+ - architecture: Ampere
73
+ cudaCores: 6912
74
+ memoryTotal: "85899345920"
75
+ name: NVIDIA A100-SXM4-80GB
76
+ uuid: GPU-813530b2-64f0-5fa3-3568-3811977d3b92
77
+ - architecture: Ampere
78
+ cudaCores: 6912
79
+ memoryTotal: "85899345920"
80
+ name: NVIDIA A100-SXM4-80GB
81
+ uuid: GPU-7eac47dc-0da1-f6b2-d261-8ab3a5d4ed03
82
+ - architecture: Ampere
83
+ cudaCores: 6912
84
+ memoryTotal: "85899345920"
85
+ name: NVIDIA A100-SXM4-80GB
86
+ uuid: GPU-335150e5-634c-68e2-4930-656c95e62244
87
+ - architecture: Ampere
88
+ cudaCores: 6912
89
+ memoryTotal: "85899345920"
90
+ name: NVIDIA A100-SXM4-80GB
91
+ uuid: GPU-b3ee08d0-187c-8f80-06d5-c46759764c41
92
+ host: 164-152-109-69
93
+ memory:
94
+ total: "1902324936704"
95
+ os: Linux-6.8.0-60-generic-x86_64-with-glibc2.35
96
+ program: /lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py
97
+ python: CPython 3.10.18
98
+ root: myvla_exp/bl_multiview_history_depth_set_table
99
+ startedAt: "2025-08-20T09:11:47.878963Z"
100
+ writerId: ns7xudltxb04d0a18uyfc7iv6bgtzle9
101
+ m: []
102
+ python_version: 3.10.18
103
+ t:
104
+ "1":
105
+ - 1
106
+ - 2
107
+ - 3
108
+ - 11
109
+ - 41
110
+ - 49
111
+ - 63
112
+ - 71
113
+ "2":
114
+ - 1
115
+ - 2
116
+ - 3
117
+ - 11
118
+ - 41
119
+ - 49
120
+ - 63
121
+ - 71
122
+ "3":
123
+ - 13
124
+ - 16
125
+ "4": 3.10.18
126
+ "5": 0.21.0
127
+ "6": 4.40.1
128
+ "12": 0.21.0
129
+ "13": linux-x86_64
130
+ data_root_dir:
131
+ value: /home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/.new_maniskill_data
132
+ global_pose:
133
+ value: false
134
+ hf_token:
135
+ value: .hf_token
136
+ image_aug:
137
+ value: false
138
+ is_grasped:
139
+ value: false
140
+ is_resume:
141
+ value: false
142
+ model_type:
143
+ value: my_vla_qwen
144
+ pretrained_checkpoint:
145
+ value: Stanford-ILIAD/prism-qwen25-extra-dinosiglip-224px-0_5b
146
+ qpos:
147
+ value: false
148
+ resume_epoch:
149
+ value: null
150
+ resume_step:
151
+ value: null
152
+ run_id:
153
+ value: bl_multiview_history_depth_set_table
154
+ run_id_note:
155
+ value: null
156
+ run_root_dir:
157
+ value: myvla_exp
158
+ save_interval:
159
+ value: 1000
160
+ seed:
161
+ value: 7
162
+ segmentation:
163
+ value: false
164
+ trackers:
165
+ value:
166
+ - jsonl
167
+ - wandb
168
+ vla:
169
+ value:
170
+ action_chunk_size: 8
171
+ action_tokenizer: extra_action_tokenizer
172
+ base_vlm: prism-qwen25-extra-dinosiglip-224px+0_5b
173
+ compress_history: false
174
+ data_mix: bridge
175
+ enable_gradient_checkpointing: true
176
+ enable_mixed_precision_training: true
177
+ epochs: 10
178
+ expected_world_size: 4
179
+ freeze_llm_backbone: false
180
+ freeze_vision_backbone: true
181
+ global_batch_size: 512
182
+ image_sequence_len: 10
183
+ image_window_size: 4
184
+ learning_rate: 2e-05
185
+ lr_scheduler_type: constant
186
+ max_grad_norm: 1
187
+ max_steps: null
188
+ per_device_batch_size: 8
189
+ reduce_in_full_precision: true
190
+ save_every_n_steps: 25000
191
+ shuffle_buffer_size: 256000
192
+ train_strategy: fsdp-full-shard
193
+ type: myvla-qwen-224px+mx-mshab
194
+ unfreeze_last_llm_layer: false
195
+ use_depth_image: true
196
+ use_flow_matching: false
197
+ use_wrist_image: true
198
+ vla_id: myvla-qwen-224px+mx-mshab
199
+ warmup_ratio: 0
200
+ weight_decay: 0
201
+ wandb_entity:
202
+ value: traysen879-uc-san-diego
203
+ wandb_project:
204
+ value: mshab_vla
wandb/run-20250820_091147-qt1b7wpr/files/output.log ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 08/20 [09:11:49] INFO  | >> [*] Starting VLA Training Loop ]8;id=686782;file:///lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py\train.py]8;;\:]8;id=709047;file:///lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py#340\340]8;;\
2
+ Traceback (most recent call last):
3
+ File "/lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py", line 360, in <module>
4
+ train()
5
+ File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/draccus/argparsing.py", line 203, in wrapper_inner
6
+ response = fn(cfg, *args, **kwargs)
7
+ File "/lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py", line 341, in train
8
+ train_strategy.run_vla_training(
9
+ File "/lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/prismatic/training/strategies/base_strategy.py", line 367, in run_vla_training
10
+ normalized_loss = (loss + aux_loss) / self.grad_accumulation_steps
11
+ TypeError: unsupported operand type(s) for +: 'Tensor' and 'NoneType'
wandb/run-20250820_091147-qt1b7wpr/files/requirements.txt ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ setuptools==78.1.1
2
+ wheel==0.45.1
3
+ pip==25.1
4
+ sentencepiece==0.1.99
5
+ mpmath==1.3.0
6
+ libclang==18.1.1
7
+ flatbuffers==25.2.10
8
+ zipp==3.23.0
9
+ wrapt==1.14.1
10
+ urllib3==2.5.0
11
+ typing_extensions==4.14.1
12
+ typeguard==2.13.3
13
+ tqdm==4.67.1
14
+ toml==0.10.2
15
+ termcolor==3.1.0
16
+ tensorflow-io-gcs-filesystem==0.37.1
17
+ tensorflow-estimator==2.15.0
18
+ tensorboard-data-server==0.7.2
19
+ sympy==1.14.0
20
+ smmap==5.0.2
21
+ six==1.17.0
22
+ safetensors==0.5.3
23
+ regex==2025.7.34
24
+ PyYAML==6.0.2
25
+ pyparsing==3.2.3
26
+ Pygments==2.19.2
27
+ pyasn1==0.6.1
28
+ psutil==7.0.0
29
+ protobuf==4.21.12
30
+ platformdirs==4.3.8
31
+ pillow==11.3.0
32
+ packaging==25.0
33
+ opt_einsum==3.4.0
34
+ oauthlib==3.3.1
35
+ nvidia-nvtx-cu12==12.1.105
36
+ nvidia-nvjitlink-cu12==12.9.86
37
+ nvidia-nccl-cu12==2.19.3
38
+ nvidia-curand-cu12==10.3.2.106
39
+ nvidia-cufft-cu12==11.0.2.54
40
+ nvidia-cuda-runtime-cu12==12.1.105
41
+ nvidia-cuda-nvrtc-cu12==12.1.105
42
+ nvidia-cuda-cupti-cu12==12.1.105
43
+ nvidia-cublas-cu12==12.1.3.1
44
+ numpy==1.26.4
45
+ networkx==3.4.2
46
+ mypy_extensions==1.1.0
47
+ mergedeep==1.3.4
48
+ mdurl==0.1.2
49
+ MarkupSafe==3.0.2
50
+ Markdown==3.8.2
51
+ kiwisolver==1.4.8
52
+ keras==2.15.0
53
+ importlib_resources==6.5.2
54
+ idna==3.10
55
+ hf-xet==1.1.5
56
+ grpcio==1.74.0
57
+ gast==0.6.0
58
+ fsspec==2025.7.0
59
+ fonttools==4.59.0
60
+ filelock==3.18.0
61
+ etils==1.13.0
62
+ einops==0.8.1
63
+ cycler==0.12.1
64
+ click==8.2.1
65
+ charset-normalizer==3.4.2
66
+ certifi==2025.8.3
67
+ cachetools==5.5.2
68
+ attrs==25.3.0
69
+ annotated-types==0.7.0
70
+ absl-py==2.3.1
71
+ Werkzeug==3.1.3
72
+ typing-inspection==0.4.1
73
+ typing-inspect==0.9.0
74
+ triton==2.2.0
75
+ trimesh==4.7.1
76
+ tensorflow-metadata==1.17.2
77
+ tensorflow-addons==0.23.0
78
+ sentry-sdk==2.34.1
79
+ scipy==1.15.3
80
+ rsa==4.9.1
81
+ requests==2.32.4
82
+ pyyaml-include==1.4.1
83
+ python-dateutil==2.9.0.post0
84
+ pydantic_core==2.33.2
85
+ pyasn1_modules==0.4.2
86
+ promise==2.3
87
+ OpenEXR==3.3.5
88
+ nvidia-cusparse-cu12==12.1.0.106
89
+ nvidia-cudnn-cu12==8.9.2.26
90
+ ml-dtypes==0.2.0
91
+ markdown-it-py==3.0.0
92
+ jsonlines==4.0.0
93
+ json-numpy==2.1.1
94
+ Jinja2==3.1.6
95
+ h5py==3.14.0
96
+ google-pasta==0.2.0
97
+ gitdb==4.0.12
98
+ dm-tree==0.1.9
99
+ contourpy==1.3.2
100
+ astunparse==1.6.3
101
+ rich==14.1.0
102
+ requests-oauthlib==2.0.0
103
+ pydantic==2.11.7
104
+ nvidia-cusolver-cu12==11.4.5.107
105
+ matplotlib==3.10.5
106
+ huggingface-hub==0.34.3
107
+ google-auth==2.40.3
108
+ GitPython==3.1.45
109
+ draccus==0.8.0
110
+ wandb==0.21.0
111
+ torch==2.2.0
112
+ tokenizers==0.19.1
113
+ google-auth-oauthlib==1.2.2
114
+ array_record==0.7.2
115
+ transformers==4.40.1
116
+ torchvision==0.17.0
117
+ torchaudio==2.2.0
118
+ tensorboard==2.15.2
119
+ accelerate==1.9.0
120
+ timm==0.9.10
121
+ tensorflow-datasets==4.9.3
122
+ tensorflow==2.15.0
123
+ peft==0.11.1
124
+ tensorflow-graphics==2021.12.3
125
+ dlimp==0.0.1
126
+ openvla==0.0.3
127
+ ninja==1.11.1.4
128
+ flash-attn==2.5.5
129
+ autocommand==2.2.2
130
+ backports.tarfile==1.2.0
131
+ importlib_metadata==8.0.0
132
+ inflect==7.3.1
133
+ jaraco.collections==5.1.0
134
+ jaraco.context==5.3.0
135
+ jaraco.functools==4.0.1
136
+ jaraco.text==3.12.1
137
+ more-itertools==10.3.0
138
+ packaging==24.2
139
+ platformdirs==4.2.2
140
+ tomli==2.0.1
141
+ typeguard==4.3.0
142
+ typing_extensions==4.12.2
143
+ wheel==0.45.1
144
+ zipp==3.19.2
wandb/run-20250820_091147-qt1b7wpr/files/wandb-metadata.json ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-6.8.0-60-generic-x86_64-with-glibc2.35",
3
+ "python": "CPython 3.10.18",
4
+ "startedAt": "2025-08-20T09:11:47.878963Z",
5
+ "args": [
6
+ "--pretrained_checkpoint",
7
+ "Stanford-ILIAD/prism-qwen25-extra-dinosiglip-224px-0_5b",
8
+ "--model_type",
9
+ "my_vla_qwen",
10
+ "--vla.type",
11
+ "myvla-qwen-224px+mx-mshab",
12
+ "--vla.expected_world_size",
13
+ "4",
14
+ "--vla.global_batch_size",
15
+ "512",
16
+ "--vla.per_device_batch_size",
17
+ "8",
18
+ "--vla.learning_rate",
19
+ "2e-5",
20
+ "--vla.freeze_vision_backbone",
21
+ "True",
22
+ "--vla.freeze_llm_backbone",
23
+ "False",
24
+ "--vla.use_flow_matching",
25
+ "False",
26
+ "--vla.compress_history",
27
+ "False",
28
+ "--vla.epochs",
29
+ "10",
30
+ "--save_interval",
31
+ "1000",
32
+ "--run_id",
33
+ "bl_multiview_history_depth_set_table"
34
+ ],
35
+ "program": "/lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py",
36
+ "codePath": "vla-scripts/train.py",
37
+ "codePathLocal": "vla-scripts/train.py",
38
+ "git": {
39
+ "remote": "https://github.com/TRS07170/myvla.git",
40
+ "commit": "409e4c9a165115624c271028e9b3ee335991b747"
41
+ },
42
+ "email": "traysen879@gmail.com",
43
+ "root": "myvla_exp/bl_multiview_history_depth_set_table",
44
+ "host": "164-152-109-69",
45
+ "executable": "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/bin/python3.10",
46
+ "cpu_count": 240,
47
+ "cpu_count_logical": 240,
48
+ "gpu": "NVIDIA A100-SXM4-80GB",
49
+ "gpu_count": 8,
50
+ "disk": {
51
+ "/": {
52
+ "total": "20812690710528",
53
+ "used": "36590649344"
54
+ }
55
+ },
56
+ "memory": {
57
+ "total": "1902324936704"
58
+ },
59
+ "gpu_nvidia": [
60
+ {
61
+ "name": "NVIDIA A100-SXM4-80GB",
62
+ "memoryTotal": "85899345920",
63
+ "cudaCores": 6912,
64
+ "architecture": "Ampere",
65
+ "uuid": "GPU-47bfdc91-9dec-cf54-0e0a-aa57ab6fb106"
66
+ },
67
+ {
68
+ "name": "NVIDIA A100-SXM4-80GB",
69
+ "memoryTotal": "85899345920",
70
+ "cudaCores": 6912,
71
+ "architecture": "Ampere",
72
+ "uuid": "GPU-55a7184b-b6dc-a8b3-67d5-a65679215c83"
73
+ },
74
+ {
75
+ "name": "NVIDIA A100-SXM4-80GB",
76
+ "memoryTotal": "85899345920",
77
+ "cudaCores": 6912,
78
+ "architecture": "Ampere",
79
+ "uuid": "GPU-1de758e0-e4a9-e2e9-027c-17f65db8a69e"
80
+ },
81
+ {
82
+ "name": "NVIDIA A100-SXM4-80GB",
83
+ "memoryTotal": "85899345920",
84
+ "cudaCores": 6912,
85
+ "architecture": "Ampere",
86
+ "uuid": "GPU-d7f94efd-7e10-156f-fe37-e505ae7b62b1"
87
+ },
88
+ {
89
+ "name": "NVIDIA A100-SXM4-80GB",
90
+ "memoryTotal": "85899345920",
91
+ "cudaCores": 6912,
92
+ "architecture": "Ampere",
93
+ "uuid": "GPU-813530b2-64f0-5fa3-3568-3811977d3b92"
94
+ },
95
+ {
96
+ "name": "NVIDIA A100-SXM4-80GB",
97
+ "memoryTotal": "85899345920",
98
+ "cudaCores": 6912,
99
+ "architecture": "Ampere",
100
+ "uuid": "GPU-7eac47dc-0da1-f6b2-d261-8ab3a5d4ed03"
101
+ },
102
+ {
103
+ "name": "NVIDIA A100-SXM4-80GB",
104
+ "memoryTotal": "85899345920",
105
+ "cudaCores": 6912,
106
+ "architecture": "Ampere",
107
+ "uuid": "GPU-335150e5-634c-68e2-4930-656c95e62244"
108
+ },
109
+ {
110
+ "name": "NVIDIA A100-SXM4-80GB",
111
+ "memoryTotal": "85899345920",
112
+ "cudaCores": 6912,
113
+ "architecture": "Ampere",
114
+ "uuid": "GPU-b3ee08d0-187c-8f80-06d5-c46759764c41"
115
+ }
116
+ ],
117
+ "cudaVersion": "12.4",
118
+ "writerId": "ns7xudltxb04d0a18uyfc7iv6bgtzle9"
119
+ }
wandb/run-20250820_091147-qt1b7wpr/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"_wandb":{"runtime":17},"_runtime":17}
wandb/run-20250820_091147-qt1b7wpr/logs/debug-core.log ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-08-20T09:11:48.070858415Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpkvexv_g_/port-3718336.txt","pid":3718336,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
2
+ {"time":"2025-08-20T09:11:48.072184654Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":3718336}
3
+ {"time":"2025-08-20T09:11:48.072124353Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-3718336-3718637-3969663796/socket","Net":"unix"}}
4
+ {"time":"2025-08-20T09:11:48.174906593Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
5
+ {"time":"2025-08-20T09:11:48.19028357Z","level":"INFO","msg":"handleInformInit: received","streamId":"qt1b7wpr","id":"1(@)"}
6
+ {"time":"2025-08-20T09:11:48.483840285Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"qt1b7wpr","id":"1(@)"}
7
+ {"time":"2025-08-20T09:12:06.693009732Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
8
+ {"time":"2025-08-20T09:12:06.69337768Z","level":"INFO","msg":"server is shutting down"}
9
+ {"time":"2025-08-20T09:12:06.693489192Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-3718336-3718637-3969663796/socket","Net":"unix"}}
10
+ {"time":"2025-08-20T09:12:06.693662267Z","level":"INFO","msg":"connection: closing","id":"1(@)"}
11
+ {"time":"2025-08-20T09:12:06.693698588Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
12
+ {"time":"2025-08-20T09:12:07.063511858Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
13
+ {"time":"2025-08-20T09:12:07.063548929Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
14
+ {"time":"2025-08-20T09:12:07.063559649Z","level":"INFO","msg":"server is closed"}
wandb/run-20250820_091147-qt1b7wpr/logs/debug-internal.log ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-08-20T09:11:48.19393641Z","level":"INFO","msg":"stream: starting","core version":"0.21.0"}
2
+ {"time":"2025-08-20T09:11:48.483544178Z","level":"INFO","msg":"stream: created new stream","id":"qt1b7wpr"}
3
+ {"time":"2025-08-20T09:11:48.483667061Z","level":"INFO","msg":"stream: started","id":"qt1b7wpr"}
4
+ {"time":"2025-08-20T09:11:48.483708542Z","level":"INFO","msg":"writer: Do: started","stream_id":"qt1b7wpr"}
5
+ {"time":"2025-08-20T09:11:48.483745883Z","level":"INFO","msg":"sender: started","stream_id":"qt1b7wpr"}
6
+ {"time":"2025-08-20T09:11:48.483736392Z","level":"INFO","msg":"handler: started","stream_id":"qt1b7wpr"}
7
+ {"time":"2025-08-20T09:12:06.693274498Z","level":"INFO","msg":"stream: closing","id":"qt1b7wpr"}
8
+ {"time":"2025-08-20T09:12:06.954966462Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
9
+ {"time":"2025-08-20T09:12:07.056140267Z","level":"INFO","msg":"handler: closed","stream_id":"qt1b7wpr"}
10
+ {"time":"2025-08-20T09:12:07.056225088Z","level":"INFO","msg":"writer: Close: closed","stream_id":"qt1b7wpr"}
11
+ {"time":"2025-08-20T09:12:07.056243458Z","level":"INFO","msg":"sender: closed","stream_id":"qt1b7wpr"}
12
+ {"time":"2025-08-20T09:12:07.061147966Z","level":"INFO","msg":"stream: closed","id":"qt1b7wpr"}
wandb/run-20250820_091147-qt1b7wpr/logs/debug.log ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-08-20 09:11:47,921 INFO MainThread:3718336 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0
2
+ 2025-08-20 09:11:47,921 INFO MainThread:3718336 [wandb_setup.py:_flush():80] Configure stats pid to 3718336
3
+ 2025-08-20 09:11:47,923 INFO MainThread:3718336 [wandb_setup.py:_flush():80] Loading settings from /home/ubuntu/.config/wandb/settings
4
+ 2025-08-20 09:11:47,923 INFO MainThread:3718336 [wandb_setup.py:_flush():80] Loading settings from /lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/wandb/settings
5
+ 2025-08-20 09:11:47,926 INFO MainThread:3718336 [wandb_setup.py:_flush():80] Loading settings from environment variables
6
+ 2025-08-20 09:11:47,926 INFO MainThread:3718336 [wandb_init.py:setup_run_log_directory():703] Logging user logs to myvla_exp/bl_multiview_history_depth_set_table/wandb/run-20250820_091147-qt1b7wpr/logs/debug.log
7
+ 2025-08-20 09:11:47,928 INFO MainThread:3718336 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to myvla_exp/bl_multiview_history_depth_set_table/wandb/run-20250820_091147-qt1b7wpr/logs/debug-internal.log
8
+ 2025-08-20 09:11:47,930 INFO MainThread:3718336 [wandb_init.py:init():830] calling init triggers
9
+ 2025-08-20 09:11:47,932 INFO MainThread:3718336 [wandb_init.py:init():835] wandb.init called with sweep_config: {}
10
+ config: {'vla': {'type': 'myvla-qwen-224px+mx-mshab', 'vla_id': 'myvla-qwen-224px+mx-mshab', 'base_vlm': 'prism-qwen25-extra-dinosiglip-224px+0_5b', 'freeze_vision_backbone': True, 'freeze_llm_backbone': False, 'unfreeze_last_llm_layer': False, 'data_mix': 'bridge', 'shuffle_buffer_size': 256000, 'epochs': 10, 'max_steps': None, 'save_every_n_steps': 25000, 'expected_world_size': 4, 'global_batch_size': 512, 'per_device_batch_size': 8, 'learning_rate': 2e-05, 'weight_decay': 0.0, 'max_grad_norm': 1.0, 'lr_scheduler_type': 'constant', 'warmup_ratio': 0.0, 'train_strategy': 'fsdp-full-shard', 'action_tokenizer': 'extra_action_tokenizer', 'image_sequence_len': 10, 'use_wrist_image': True, 'use_depth_image': True, 'compress_history': False, 'use_flow_matching': False, 'action_chunk_size': 8, 'enable_gradient_checkpointing': True, 'enable_mixed_precision_training': True, 'reduce_in_full_precision': True, 'image_window_size': 4}, 'model_type': 'my_vla_qwen', 'data_root_dir': '/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/.new_maniskill_data', 'run_root_dir': 'myvla_exp', 'pretrained_checkpoint': 'Stanford-ILIAD/prism-qwen25-extra-dinosiglip-224px-0_5b', 'is_resume': False, 'resume_step': None, 'resume_epoch': None, 'run_id': 'bl_multiview_history_depth_set_table', 'run_id_note': None, 'save_interval': 1000, 'image_aug': False, 'seed': 7, 'hf_token': '.hf_token', 'trackers': ['jsonl', 'wandb'], 'wandb_project': 'mshab_vla', 'wandb_entity': 'traysen879-uc-san-diego', 'global_pose': False, 'is_grasped': False, 'qpos': False, 'segmentation': False, '_wandb': {}}
11
+ 2025-08-20 09:11:47,934 INFO MainThread:3718336 [wandb_init.py:init():871] starting backend
12
+ 2025-08-20 09:11:48,175 INFO MainThread:3718336 [wandb_init.py:init():874] sending inform_init request
13
+ 2025-08-20 09:11:48,183 INFO MainThread:3718336 [wandb_init.py:init():882] backend started and connected
14
+ 2025-08-20 09:11:48,187 INFO MainThread:3718336 [wandb_init.py:init():953] updated telemetry
15
+ 2025-08-20 09:11:48,216 INFO MainThread:3718336 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout
16
+ 2025-08-20 09:11:48,783 INFO MainThread:3718336 [wandb_init.py:init():1029] starting run threads in backend
17
+ 2025-08-20 09:11:49,363 INFO MainThread:3718336 [wandb_run.py:_console_start():2458] atexit reg
18
+ 2025-08-20 09:11:49,364 INFO MainThread:3718336 [wandb_run.py:_redirect():2306] redirect: wrap_raw
19
+ 2025-08-20 09:11:49,366 INFO MainThread:3718336 [wandb_run.py:_redirect():2375] Wrapping output streams.
20
+ 2025-08-20 09:11:49,366 INFO MainThread:3718336 [wandb_run.py:_redirect():2398] Redirects installed.
21
+ 2025-08-20 09:11:49,377 INFO MainThread:3718336 [wandb_init.py:init():1075] run started, returning control to user process
22
+ 2025-08-20 09:12:06,691 INFO MsgRouterThr:3718336 [mailbox.py:close():129] [no run ID] Closing mailbox, abandoning 1 handles.
wandb/run-20250820_091147-qt1b7wpr/run-qt1b7wpr.wandb ADDED
Binary file (9.75 kB). View file
 
wandb/run-20250820_094544-v4zsb4rt/files/config.yaml ADDED
@@ -0,0 +1,204 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _wandb:
2
+ value:
3
+ cli_version: 0.21.0
4
+ e:
5
+ g4ijpwdw40101qp7o8xly8dy5734ui43:
6
+ args:
7
+ - --pretrained_checkpoint
8
+ - Stanford-ILIAD/prism-qwen25-extra-dinosiglip-224px-0_5b
9
+ - --model_type
10
+ - my_vla_qwen
11
+ - --vla.type
12
+ - myvla-qwen-224px+mx-mshab
13
+ - --vla.expected_world_size
14
+ - "4"
15
+ - --vla.global_batch_size
16
+ - "512"
17
+ - --vla.per_device_batch_size
18
+ - "8"
19
+ - --vla.learning_rate
20
+ - "2e-5"
21
+ - --vla.freeze_vision_backbone
22
+ - "True"
23
+ - --vla.freeze_llm_backbone
24
+ - "False"
25
+ - --vla.use_flow_matching
26
+ - "False"
27
+ - --vla.compress_history
28
+ - "False"
29
+ - --vla.epochs
30
+ - "10"
31
+ - --save_interval
32
+ - "1000"
33
+ - --run_id
34
+ - bl_multiview_history_depth_set_table
35
+ codePath: vla-scripts/train.py
36
+ codePathLocal: vla-scripts/train.py
37
+ cpu_count: 240
38
+ cpu_count_logical: 240
39
+ cudaVersion: "12.4"
40
+ disk:
41
+ /:
42
+ total: "20812690710528"
43
+ used: "36591529984"
44
+ email: traysen879@gmail.com
45
+ executable: /home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/bin/python3.10
46
+ git:
47
+ commit: 409e4c9a165115624c271028e9b3ee335991b747
48
+ remote: https://github.com/TRS07170/myvla.git
49
+ gpu: NVIDIA A100-SXM4-80GB
50
+ gpu_count: 8
51
+ gpu_nvidia:
52
+ - architecture: Ampere
53
+ cudaCores: 6912
54
+ memoryTotal: "85899345920"
55
+ name: NVIDIA A100-SXM4-80GB
56
+ uuid: GPU-47bfdc91-9dec-cf54-0e0a-aa57ab6fb106
57
+ - architecture: Ampere
58
+ cudaCores: 6912
59
+ memoryTotal: "85899345920"
60
+ name: NVIDIA A100-SXM4-80GB
61
+ uuid: GPU-55a7184b-b6dc-a8b3-67d5-a65679215c83
62
+ - architecture: Ampere
63
+ cudaCores: 6912
64
+ memoryTotal: "85899345920"
65
+ name: NVIDIA A100-SXM4-80GB
66
+ uuid: GPU-1de758e0-e4a9-e2e9-027c-17f65db8a69e
67
+ - architecture: Ampere
68
+ cudaCores: 6912
69
+ memoryTotal: "85899345920"
70
+ name: NVIDIA A100-SXM4-80GB
71
+ uuid: GPU-d7f94efd-7e10-156f-fe37-e505ae7b62b1
72
+ - architecture: Ampere
73
+ cudaCores: 6912
74
+ memoryTotal: "85899345920"
75
+ name: NVIDIA A100-SXM4-80GB
76
+ uuid: GPU-813530b2-64f0-5fa3-3568-3811977d3b92
77
+ - architecture: Ampere
78
+ cudaCores: 6912
79
+ memoryTotal: "85899345920"
80
+ name: NVIDIA A100-SXM4-80GB
81
+ uuid: GPU-7eac47dc-0da1-f6b2-d261-8ab3a5d4ed03
82
+ - architecture: Ampere
83
+ cudaCores: 6912
84
+ memoryTotal: "85899345920"
85
+ name: NVIDIA A100-SXM4-80GB
86
+ uuid: GPU-335150e5-634c-68e2-4930-656c95e62244
87
+ - architecture: Ampere
88
+ cudaCores: 6912
89
+ memoryTotal: "85899345920"
90
+ name: NVIDIA A100-SXM4-80GB
91
+ uuid: GPU-b3ee08d0-187c-8f80-06d5-c46759764c41
92
+ host: 164-152-109-69
93
+ memory:
94
+ total: "1902324936704"
95
+ os: Linux-6.8.0-60-generic-x86_64-with-glibc2.35
96
+ program: /lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py
97
+ python: CPython 3.10.18
98
+ root: myvla_exp/bl_multiview_history_depth_set_table
99
+ startedAt: "2025-08-20T09:45:44.966050Z"
100
+ writerId: g4ijpwdw40101qp7o8xly8dy5734ui43
101
+ m: []
102
+ python_version: 3.10.18
103
+ t:
104
+ "1":
105
+ - 1
106
+ - 2
107
+ - 3
108
+ - 11
109
+ - 41
110
+ - 49
111
+ - 63
112
+ - 71
113
+ "2":
114
+ - 1
115
+ - 2
116
+ - 3
117
+ - 11
118
+ - 41
119
+ - 49
120
+ - 63
121
+ - 71
122
+ "3":
123
+ - 13
124
+ - 16
125
+ "4": 3.10.18
126
+ "5": 0.21.0
127
+ "6": 4.40.1
128
+ "12": 0.21.0
129
+ "13": linux-x86_64
130
+ data_root_dir:
131
+ value: /home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/.new_maniskill_data
132
+ global_pose:
133
+ value: false
134
+ hf_token:
135
+ value: .hf_token
136
+ image_aug:
137
+ value: false
138
+ is_grasped:
139
+ value: false
140
+ is_resume:
141
+ value: false
142
+ model_type:
143
+ value: my_vla_qwen
144
+ pretrained_checkpoint:
145
+ value: Stanford-ILIAD/prism-qwen25-extra-dinosiglip-224px-0_5b
146
+ qpos:
147
+ value: false
148
+ resume_epoch:
149
+ value: null
150
+ resume_step:
151
+ value: null
152
+ run_id:
153
+ value: bl_multiview_history_depth_set_table
154
+ run_id_note:
155
+ value: null
156
+ run_root_dir:
157
+ value: myvla_exp
158
+ save_interval:
159
+ value: 1000
160
+ seed:
161
+ value: 7
162
+ segmentation:
163
+ value: false
164
+ trackers:
165
+ value:
166
+ - jsonl
167
+ - wandb
168
+ vla:
169
+ value:
170
+ action_chunk_size: 8
171
+ action_tokenizer: extra_action_tokenizer
172
+ base_vlm: prism-qwen25-extra-dinosiglip-224px+0_5b
173
+ compress_history: false
174
+ data_mix: bridge
175
+ enable_gradient_checkpointing: true
176
+ enable_mixed_precision_training: true
177
+ epochs: 10
178
+ expected_world_size: 4
179
+ freeze_llm_backbone: false
180
+ freeze_vision_backbone: true
181
+ global_batch_size: 512
182
+ image_sequence_len: 10
183
+ image_window_size: 4
184
+ learning_rate: 2e-05
185
+ lr_scheduler_type: constant
186
+ max_grad_norm: 1
187
+ max_steps: null
188
+ per_device_batch_size: 8
189
+ reduce_in_full_precision: true
190
+ save_every_n_steps: 25000
191
+ shuffle_buffer_size: 256000
192
+ train_strategy: fsdp-full-shard
193
+ type: myvla-qwen-224px+mx-mshab
194
+ unfreeze_last_llm_layer: false
195
+ use_depth_image: true
196
+ use_flow_matching: false
197
+ use_wrist_image: true
198
+ vla_id: myvla-qwen-224px+mx-mshab
199
+ warmup_ratio: 0
200
+ weight_decay: 0
201
+ wandb_entity:
202
+ value: traysen879-uc-san-diego
203
+ wandb_project:
204
+ value: mshab_vla
wandb/run-20250820_094544-v4zsb4rt/files/output.log ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 08/20 [09:45:46] INFO  | >> [*] Starting VLA Training Loop ]8;id=686782;file:///lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py\train.py]8;;\:]8;id=709047;file:///lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py#340\340]8;;\
2
+ Traceback (most recent call last):
3
+ File "/lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py", line 360, in <module>
4
+ train()
5
+ File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/draccus/argparsing.py", line 203, in wrapper_inner
6
+ response = fn(cfg, *args, **kwargs)
7
+ File "/lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py", line 341, in train
8
+ train_strategy.run_vla_training(
9
+ File "/lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/prismatic/training/strategies/base_strategy.py", line 370, in run_vla_training
10
+ normalized_loss = loss + aux_loss
11
+ TypeError: unsupported operand type(s) for +: 'Tensor' and 'NoneType'
wandb/run-20250820_094544-v4zsb4rt/files/requirements.txt ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ setuptools==78.1.1
2
+ wheel==0.45.1
3
+ pip==25.1
4
+ sentencepiece==0.1.99
5
+ mpmath==1.3.0
6
+ libclang==18.1.1
7
+ flatbuffers==25.2.10
8
+ zipp==3.23.0
9
+ wrapt==1.14.1
10
+ urllib3==2.5.0
11
+ typing_extensions==4.14.1
12
+ typeguard==2.13.3
13
+ tqdm==4.67.1
14
+ toml==0.10.2
15
+ termcolor==3.1.0
16
+ tensorflow-io-gcs-filesystem==0.37.1
17
+ tensorflow-estimator==2.15.0
18
+ tensorboard-data-server==0.7.2
19
+ sympy==1.14.0
20
+ smmap==5.0.2
21
+ six==1.17.0
22
+ safetensors==0.5.3
23
+ regex==2025.7.34
24
+ PyYAML==6.0.2
25
+ pyparsing==3.2.3
26
+ Pygments==2.19.2
27
+ pyasn1==0.6.1
28
+ psutil==7.0.0
29
+ protobuf==4.21.12
30
+ platformdirs==4.3.8
31
+ pillow==11.3.0
32
+ packaging==25.0
33
+ opt_einsum==3.4.0
34
+ oauthlib==3.3.1
35
+ nvidia-nvtx-cu12==12.1.105
36
+ nvidia-nvjitlink-cu12==12.9.86
37
+ nvidia-nccl-cu12==2.19.3
38
+ nvidia-curand-cu12==10.3.2.106
39
+ nvidia-cufft-cu12==11.0.2.54
40
+ nvidia-cuda-runtime-cu12==12.1.105
41
+ nvidia-cuda-nvrtc-cu12==12.1.105
42
+ nvidia-cuda-cupti-cu12==12.1.105
43
+ nvidia-cublas-cu12==12.1.3.1
44
+ numpy==1.26.4
45
+ networkx==3.4.2
46
+ mypy_extensions==1.1.0
47
+ mergedeep==1.3.4
48
+ mdurl==0.1.2
49
+ MarkupSafe==3.0.2
50
+ Markdown==3.8.2
51
+ kiwisolver==1.4.8
52
+ keras==2.15.0
53
+ importlib_resources==6.5.2
54
+ idna==3.10
55
+ hf-xet==1.1.5
56
+ grpcio==1.74.0
57
+ gast==0.6.0
58
+ fsspec==2025.7.0
59
+ fonttools==4.59.0
60
+ filelock==3.18.0
61
+ etils==1.13.0
62
+ einops==0.8.1
63
+ cycler==0.12.1
64
+ click==8.2.1
65
+ charset-normalizer==3.4.2
66
+ certifi==2025.8.3
67
+ cachetools==5.5.2
68
+ attrs==25.3.0
69
+ annotated-types==0.7.0
70
+ absl-py==2.3.1
71
+ Werkzeug==3.1.3
72
+ typing-inspection==0.4.1
73
+ typing-inspect==0.9.0
74
+ triton==2.2.0
75
+ trimesh==4.7.1
76
+ tensorflow-metadata==1.17.2
77
+ tensorflow-addons==0.23.0
78
+ sentry-sdk==2.34.1
79
+ scipy==1.15.3
80
+ rsa==4.9.1
81
+ requests==2.32.4
82
+ pyyaml-include==1.4.1
83
+ python-dateutil==2.9.0.post0
84
+ pydantic_core==2.33.2
85
+ pyasn1_modules==0.4.2
86
+ promise==2.3
87
+ OpenEXR==3.3.5
88
+ nvidia-cusparse-cu12==12.1.0.106
89
+ nvidia-cudnn-cu12==8.9.2.26
90
+ ml-dtypes==0.2.0
91
+ markdown-it-py==3.0.0
92
+ jsonlines==4.0.0
93
+ json-numpy==2.1.1
94
+ Jinja2==3.1.6
95
+ h5py==3.14.0
96
+ google-pasta==0.2.0
97
+ gitdb==4.0.12
98
+ dm-tree==0.1.9
99
+ contourpy==1.3.2
100
+ astunparse==1.6.3
101
+ rich==14.1.0
102
+ requests-oauthlib==2.0.0
103
+ pydantic==2.11.7
104
+ nvidia-cusolver-cu12==11.4.5.107
105
+ matplotlib==3.10.5
106
+ huggingface-hub==0.34.3
107
+ google-auth==2.40.3
108
+ GitPython==3.1.45
109
+ draccus==0.8.0
110
+ wandb==0.21.0
111
+ torch==2.2.0
112
+ tokenizers==0.19.1
113
+ google-auth-oauthlib==1.2.2
114
+ array_record==0.7.2
115
+ transformers==4.40.1
116
+ torchvision==0.17.0
117
+ torchaudio==2.2.0
118
+ tensorboard==2.15.2
119
+ accelerate==1.9.0
120
+ timm==0.9.10
121
+ tensorflow-datasets==4.9.3
122
+ tensorflow==2.15.0
123
+ peft==0.11.1
124
+ tensorflow-graphics==2021.12.3
125
+ dlimp==0.0.1
126
+ openvla==0.0.3
127
+ ninja==1.11.1.4
128
+ flash-attn==2.5.5
129
+ autocommand==2.2.2
130
+ backports.tarfile==1.2.0
131
+ importlib_metadata==8.0.0
132
+ inflect==7.3.1
133
+ jaraco.collections==5.1.0
134
+ jaraco.context==5.3.0
135
+ jaraco.functools==4.0.1
136
+ jaraco.text==3.12.1
137
+ more-itertools==10.3.0
138
+ packaging==24.2
139
+ platformdirs==4.2.2
140
+ tomli==2.0.1
141
+ typeguard==4.3.0
142
+ typing_extensions==4.12.2
143
+ wheel==0.45.1
144
+ zipp==3.19.2
wandb/run-20250820_094544-v4zsb4rt/files/wandb-metadata.json ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-6.8.0-60-generic-x86_64-with-glibc2.35",
3
+ "python": "CPython 3.10.18",
4
+ "startedAt": "2025-08-20T09:45:44.966050Z",
5
+ "args": [
6
+ "--pretrained_checkpoint",
7
+ "Stanford-ILIAD/prism-qwen25-extra-dinosiglip-224px-0_5b",
8
+ "--model_type",
9
+ "my_vla_qwen",
10
+ "--vla.type",
11
+ "myvla-qwen-224px+mx-mshab",
12
+ "--vla.expected_world_size",
13
+ "4",
14
+ "--vla.global_batch_size",
15
+ "512",
16
+ "--vla.per_device_batch_size",
17
+ "8",
18
+ "--vla.learning_rate",
19
+ "2e-5",
20
+ "--vla.freeze_vision_backbone",
21
+ "True",
22
+ "--vla.freeze_llm_backbone",
23
+ "False",
24
+ "--vla.use_flow_matching",
25
+ "False",
26
+ "--vla.compress_history",
27
+ "False",
28
+ "--vla.epochs",
29
+ "10",
30
+ "--save_interval",
31
+ "1000",
32
+ "--run_id",
33
+ "bl_multiview_history_depth_set_table"
34
+ ],
35
+ "program": "/lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py",
36
+ "codePath": "vla-scripts/train.py",
37
+ "codePathLocal": "vla-scripts/train.py",
38
+ "git": {
39
+ "remote": "https://github.com/TRS07170/myvla.git",
40
+ "commit": "409e4c9a165115624c271028e9b3ee335991b747"
41
+ },
42
+ "email": "traysen879@gmail.com",
43
+ "root": "myvla_exp/bl_multiview_history_depth_set_table",
44
+ "host": "164-152-109-69",
45
+ "executable": "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/bin/python3.10",
46
+ "cpu_count": 240,
47
+ "cpu_count_logical": 240,
48
+ "gpu": "NVIDIA A100-SXM4-80GB",
49
+ "gpu_count": 8,
50
+ "disk": {
51
+ "/": {
52
+ "total": "20812690710528",
53
+ "used": "36591529984"
54
+ }
55
+ },
56
+ "memory": {
57
+ "total": "1902324936704"
58
+ },
59
+ "gpu_nvidia": [
60
+ {
61
+ "name": "NVIDIA A100-SXM4-80GB",
62
+ "memoryTotal": "85899345920",
63
+ "cudaCores": 6912,
64
+ "architecture": "Ampere",
65
+ "uuid": "GPU-47bfdc91-9dec-cf54-0e0a-aa57ab6fb106"
66
+ },
67
+ {
68
+ "name": "NVIDIA A100-SXM4-80GB",
69
+ "memoryTotal": "85899345920",
70
+ "cudaCores": 6912,
71
+ "architecture": "Ampere",
72
+ "uuid": "GPU-55a7184b-b6dc-a8b3-67d5-a65679215c83"
73
+ },
74
+ {
75
+ "name": "NVIDIA A100-SXM4-80GB",
76
+ "memoryTotal": "85899345920",
77
+ "cudaCores": 6912,
78
+ "architecture": "Ampere",
79
+ "uuid": "GPU-1de758e0-e4a9-e2e9-027c-17f65db8a69e"
80
+ },
81
+ {
82
+ "name": "NVIDIA A100-SXM4-80GB",
83
+ "memoryTotal": "85899345920",
84
+ "cudaCores": 6912,
85
+ "architecture": "Ampere",
86
+ "uuid": "GPU-d7f94efd-7e10-156f-fe37-e505ae7b62b1"
87
+ },
88
+ {
89
+ "name": "NVIDIA A100-SXM4-80GB",
90
+ "memoryTotal": "85899345920",
91
+ "cudaCores": 6912,
92
+ "architecture": "Ampere",
93
+ "uuid": "GPU-813530b2-64f0-5fa3-3568-3811977d3b92"
94
+ },
95
+ {
96
+ "name": "NVIDIA A100-SXM4-80GB",
97
+ "memoryTotal": "85899345920",
98
+ "cudaCores": 6912,
99
+ "architecture": "Ampere",
100
+ "uuid": "GPU-7eac47dc-0da1-f6b2-d261-8ab3a5d4ed03"
101
+ },
102
+ {
103
+ "name": "NVIDIA A100-SXM4-80GB",
104
+ "memoryTotal": "85899345920",
105
+ "cudaCores": 6912,
106
+ "architecture": "Ampere",
107
+ "uuid": "GPU-335150e5-634c-68e2-4930-656c95e62244"
108
+ },
109
+ {
110
+ "name": "NVIDIA A100-SXM4-80GB",
111
+ "memoryTotal": "85899345920",
112
+ "cudaCores": 6912,
113
+ "architecture": "Ampere",
114
+ "uuid": "GPU-b3ee08d0-187c-8f80-06d5-c46759764c41"
115
+ }
116
+ ],
117
+ "cudaVersion": "12.4",
118
+ "writerId": "g4ijpwdw40101qp7o8xly8dy5734ui43"
119
+ }
wandb/run-20250820_094544-v4zsb4rt/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"_wandb":{"runtime":4},"_runtime":4}
wandb/run-20250820_094544-v4zsb4rt/logs/debug-core.log ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-08-20T09:45:45.174640913Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpajeirdwt/port-3720956.txt","pid":3720956,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
2
+ {"time":"2025-08-20T09:45:45.175809008Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-3720956-3721259-1781631432/socket","Net":"unix"}}
3
+ {"time":"2025-08-20T09:45:45.175845869Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":3720956}
4
+ {"time":"2025-08-20T09:45:45.282055099Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
5
+ {"time":"2025-08-20T09:45:45.297889706Z","level":"INFO","msg":"handleInformInit: received","streamId":"v4zsb4rt","id":"1(@)"}
6
+ {"time":"2025-08-20T09:45:45.599182755Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"v4zsb4rt","id":"1(@)"}
7
+ {"time":"2025-08-20T09:45:50.517475416Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
8
+ {"time":"2025-08-20T09:45:50.517551897Z","level":"INFO","msg":"connection: closing","id":"1(@)"}
9
+ {"time":"2025-08-20T09:45:50.517592208Z","level":"INFO","msg":"server is shutting down"}
10
+ {"time":"2025-08-20T09:45:50.51768034Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
11
+ {"time":"2025-08-20T09:45:50.517838543Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-3720956-3721259-1781631432/socket","Net":"unix"}}
12
+ {"time":"2025-08-20T09:45:51.092657793Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
13
+ {"time":"2025-08-20T09:45:51.092700164Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
14
+ {"time":"2025-08-20T09:45:51.092711254Z","level":"INFO","msg":"server is closed"}
wandb/run-20250820_094544-v4zsb4rt/logs/debug-internal.log ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-08-20T09:45:45.302214181Z","level":"INFO","msg":"stream: starting","core version":"0.21.0"}
2
+ {"time":"2025-08-20T09:45:45.598969981Z","level":"INFO","msg":"stream: created new stream","id":"v4zsb4rt"}
3
+ {"time":"2025-08-20T09:45:45.599115765Z","level":"INFO","msg":"stream: started","id":"v4zsb4rt"}
4
+ {"time":"2025-08-20T09:45:45.599177585Z","level":"INFO","msg":"handler: started","stream_id":"v4zsb4rt"}
5
+ {"time":"2025-08-20T09:45:45.599261048Z","level":"INFO","msg":"sender: started","stream_id":"v4zsb4rt"}
6
+ {"time":"2025-08-20T09:45:45.599154334Z","level":"INFO","msg":"writer: Do: started","stream_id":"v4zsb4rt"}
7
+ {"time":"2025-08-20T09:45:50.517603438Z","level":"INFO","msg":"stream: closing","id":"v4zsb4rt"}
8
+ {"time":"2025-08-20T09:45:51.014498279Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
9
+ {"time":"2025-08-20T09:45:51.086917937Z","level":"INFO","msg":"handler: closed","stream_id":"v4zsb4rt"}
10
+ {"time":"2025-08-20T09:45:51.087014889Z","level":"INFO","msg":"writer: Close: closed","stream_id":"v4zsb4rt"}
11
+ {"time":"2025-08-20T09:45:51.08706522Z","level":"INFO","msg":"sender: closed","stream_id":"v4zsb4rt"}
12
+ {"time":"2025-08-20T09:45:51.089613696Z","level":"INFO","msg":"stream: closed","id":"v4zsb4rt"}
wandb/run-20250820_094544-v4zsb4rt/logs/debug.log ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-08-20 09:45:45,020 INFO MainThread:3720956 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0
2
+ 2025-08-20 09:45:45,020 INFO MainThread:3720956 [wandb_setup.py:_flush():80] Configure stats pid to 3720956
3
+ 2025-08-20 09:45:45,020 INFO MainThread:3720956 [wandb_setup.py:_flush():80] Loading settings from /home/ubuntu/.config/wandb/settings
4
+ 2025-08-20 09:45:45,023 INFO MainThread:3720956 [wandb_setup.py:_flush():80] Loading settings from /lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/wandb/settings
5
+ 2025-08-20 09:45:45,026 INFO MainThread:3720956 [wandb_setup.py:_flush():80] Loading settings from environment variables
6
+ 2025-08-20 09:45:45,028 INFO MainThread:3720956 [wandb_init.py:setup_run_log_directory():703] Logging user logs to myvla_exp/bl_multiview_history_depth_set_table/wandb/run-20250820_094544-v4zsb4rt/logs/debug.log
7
+ 2025-08-20 09:45:45,030 INFO MainThread:3720956 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to myvla_exp/bl_multiview_history_depth_set_table/wandb/run-20250820_094544-v4zsb4rt/logs/debug-internal.log
8
+ 2025-08-20 09:45:45,032 INFO MainThread:3720956 [wandb_init.py:init():830] calling init triggers
9
+ 2025-08-20 09:45:45,034 INFO MainThread:3720956 [wandb_init.py:init():835] wandb.init called with sweep_config: {}
10
+ config: {'vla': {'type': 'myvla-qwen-224px+mx-mshab', 'vla_id': 'myvla-qwen-224px+mx-mshab', 'base_vlm': 'prism-qwen25-extra-dinosiglip-224px+0_5b', 'freeze_vision_backbone': True, 'freeze_llm_backbone': False, 'unfreeze_last_llm_layer': False, 'data_mix': 'bridge', 'shuffle_buffer_size': 256000, 'epochs': 10, 'max_steps': None, 'save_every_n_steps': 25000, 'expected_world_size': 4, 'global_batch_size': 512, 'per_device_batch_size': 8, 'learning_rate': 2e-05, 'weight_decay': 0.0, 'max_grad_norm': 1.0, 'lr_scheduler_type': 'constant', 'warmup_ratio': 0.0, 'train_strategy': 'fsdp-full-shard', 'action_tokenizer': 'extra_action_tokenizer', 'image_sequence_len': 10, 'use_wrist_image': True, 'use_depth_image': True, 'compress_history': False, 'use_flow_matching': False, 'action_chunk_size': 8, 'enable_gradient_checkpointing': True, 'enable_mixed_precision_training': True, 'reduce_in_full_precision': True, 'image_window_size': 4}, 'model_type': 'my_vla_qwen', 'data_root_dir': '/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/.new_maniskill_data', 'run_root_dir': 'myvla_exp', 'pretrained_checkpoint': 'Stanford-ILIAD/prism-qwen25-extra-dinosiglip-224px-0_5b', 'is_resume': False, 'resume_step': None, 'resume_epoch': None, 'run_id': 'bl_multiview_history_depth_set_table', 'run_id_note': None, 'save_interval': 1000, 'image_aug': False, 'seed': 7, 'hf_token': '.hf_token', 'trackers': ['jsonl', 'wandb'], 'wandb_project': 'mshab_vla', 'wandb_entity': 'traysen879-uc-san-diego', 'global_pose': False, 'is_grasped': False, 'qpos': False, 'segmentation': False, '_wandb': {}}
11
+ 2025-08-20 09:45:45,036 INFO MainThread:3720956 [wandb_init.py:init():871] starting backend
12
+ 2025-08-20 09:45:45,282 INFO MainThread:3720956 [wandb_init.py:init():874] sending inform_init request
13
+ 2025-08-20 09:45:45,291 INFO MainThread:3720956 [wandb_init.py:init():882] backend started and connected
14
+ 2025-08-20 09:45:45,298 INFO MainThread:3720956 [wandb_init.py:init():953] updated telemetry
15
+ 2025-08-20 09:45:45,334 INFO MainThread:3720956 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout
16
+ 2025-08-20 09:45:45,866 INFO MainThread:3720956 [wandb_init.py:init():1029] starting run threads in backend
17
+ 2025-08-20 09:45:46,365 INFO MainThread:3720956 [wandb_run.py:_console_start():2458] atexit reg
18
+ 2025-08-20 09:45:46,365 INFO MainThread:3720956 [wandb_run.py:_redirect():2306] redirect: wrap_raw
19
+ 2025-08-20 09:45:46,368 INFO MainThread:3720956 [wandb_run.py:_redirect():2375] Wrapping output streams.
20
+ 2025-08-20 09:45:46,370 INFO MainThread:3720956 [wandb_run.py:_redirect():2398] Redirects installed.
21
+ 2025-08-20 09:45:46,379 INFO MainThread:3720956 [wandb_init.py:init():1075] run started, returning control to user process
22
+ 2025-08-20 09:45:50,516 INFO MsgRouterThr:3720956 [mailbox.py:close():129] [no run ID] Closing mailbox, abandoning 1 handles.
wandb/run-20250820_094544-v4zsb4rt/run-v4zsb4rt.wandb ADDED
Binary file (6.28 kB). View file
 
wandb/run-20250820_095138-in9qu6p9/files/config.yaml ADDED
@@ -0,0 +1,204 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _wandb:
2
+ value:
3
+ cli_version: 0.21.0
4
+ e:
5
+ ba5qoz48zgwc5ju7pflr3irzrnx350dd:
6
+ args:
7
+ - --pretrained_checkpoint
8
+ - Stanford-ILIAD/prism-qwen25-extra-dinosiglip-224px-0_5b
9
+ - --model_type
10
+ - my_vla_qwen
11
+ - --vla.type
12
+ - myvla-qwen-224px+mx-mshab
13
+ - --vla.expected_world_size
14
+ - "4"
15
+ - --vla.global_batch_size
16
+ - "512"
17
+ - --vla.per_device_batch_size
18
+ - "8"
19
+ - --vla.learning_rate
20
+ - "2e-5"
21
+ - --vla.freeze_vision_backbone
22
+ - "True"
23
+ - --vla.freeze_llm_backbone
24
+ - "False"
25
+ - --vla.use_flow_matching
26
+ - "False"
27
+ - --vla.compress_history
28
+ - "False"
29
+ - --vla.epochs
30
+ - "10"
31
+ - --save_interval
32
+ - "1000"
33
+ - --run_id
34
+ - bl_multiview_history_depth_set_table
35
+ codePath: vla-scripts/train.py
36
+ codePathLocal: vla-scripts/train.py
37
+ cpu_count: 240
38
+ cpu_count_logical: 240
39
+ cudaVersion: "12.4"
40
+ disk:
41
+ /:
42
+ total: "20812690710528"
43
+ used: "36591595520"
44
+ email: traysen879@gmail.com
45
+ executable: /home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/bin/python3.10
46
+ git:
47
+ commit: 409e4c9a165115624c271028e9b3ee335991b747
48
+ remote: https://github.com/TRS07170/myvla.git
49
+ gpu: NVIDIA A100-SXM4-80GB
50
+ gpu_count: 8
51
+ gpu_nvidia:
52
+ - architecture: Ampere
53
+ cudaCores: 6912
54
+ memoryTotal: "85899345920"
55
+ name: NVIDIA A100-SXM4-80GB
56
+ uuid: GPU-47bfdc91-9dec-cf54-0e0a-aa57ab6fb106
57
+ - architecture: Ampere
58
+ cudaCores: 6912
59
+ memoryTotal: "85899345920"
60
+ name: NVIDIA A100-SXM4-80GB
61
+ uuid: GPU-55a7184b-b6dc-a8b3-67d5-a65679215c83
62
+ - architecture: Ampere
63
+ cudaCores: 6912
64
+ memoryTotal: "85899345920"
65
+ name: NVIDIA A100-SXM4-80GB
66
+ uuid: GPU-1de758e0-e4a9-e2e9-027c-17f65db8a69e
67
+ - architecture: Ampere
68
+ cudaCores: 6912
69
+ memoryTotal: "85899345920"
70
+ name: NVIDIA A100-SXM4-80GB
71
+ uuid: GPU-d7f94efd-7e10-156f-fe37-e505ae7b62b1
72
+ - architecture: Ampere
73
+ cudaCores: 6912
74
+ memoryTotal: "85899345920"
75
+ name: NVIDIA A100-SXM4-80GB
76
+ uuid: GPU-813530b2-64f0-5fa3-3568-3811977d3b92
77
+ - architecture: Ampere
78
+ cudaCores: 6912
79
+ memoryTotal: "85899345920"
80
+ name: NVIDIA A100-SXM4-80GB
81
+ uuid: GPU-7eac47dc-0da1-f6b2-d261-8ab3a5d4ed03
82
+ - architecture: Ampere
83
+ cudaCores: 6912
84
+ memoryTotal: "85899345920"
85
+ name: NVIDIA A100-SXM4-80GB
86
+ uuid: GPU-335150e5-634c-68e2-4930-656c95e62244
87
+ - architecture: Ampere
88
+ cudaCores: 6912
89
+ memoryTotal: "85899345920"
90
+ name: NVIDIA A100-SXM4-80GB
91
+ uuid: GPU-b3ee08d0-187c-8f80-06d5-c46759764c41
92
+ host: 164-152-109-69
93
+ memory:
94
+ total: "1902324936704"
95
+ os: Linux-6.8.0-60-generic-x86_64-with-glibc2.35
96
+ program: /lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py
97
+ python: CPython 3.10.18
98
+ root: myvla_exp/bl_multiview_history_depth_set_table
99
+ startedAt: "2025-08-20T09:51:38.344162Z"
100
+ writerId: ba5qoz48zgwc5ju7pflr3irzrnx350dd
101
+ m: []
102
+ python_version: 3.10.18
103
+ t:
104
+ "1":
105
+ - 1
106
+ - 2
107
+ - 3
108
+ - 11
109
+ - 41
110
+ - 49
111
+ - 63
112
+ - 71
113
+ "2":
114
+ - 1
115
+ - 2
116
+ - 3
117
+ - 11
118
+ - 41
119
+ - 49
120
+ - 63
121
+ - 71
122
+ "3":
123
+ - 13
124
+ - 16
125
+ "4": 3.10.18
126
+ "5": 0.21.0
127
+ "6": 4.40.1
128
+ "12": 0.21.0
129
+ "13": linux-x86_64
130
+ data_root_dir:
131
+ value: /home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/.new_maniskill_data
132
+ global_pose:
133
+ value: false
134
+ hf_token:
135
+ value: .hf_token
136
+ image_aug:
137
+ value: false
138
+ is_grasped:
139
+ value: false
140
+ is_resume:
141
+ value: false
142
+ model_type:
143
+ value: my_vla_qwen
144
+ pretrained_checkpoint:
145
+ value: Stanford-ILIAD/prism-qwen25-extra-dinosiglip-224px-0_5b
146
+ qpos:
147
+ value: false
148
+ resume_epoch:
149
+ value: null
150
+ resume_step:
151
+ value: null
152
+ run_id:
153
+ value: bl_multiview_history_depth_set_table
154
+ run_id_note:
155
+ value: null
156
+ run_root_dir:
157
+ value: myvla_exp
158
+ save_interval:
159
+ value: 1000
160
+ seed:
161
+ value: 7
162
+ segmentation:
163
+ value: false
164
+ trackers:
165
+ value:
166
+ - jsonl
167
+ - wandb
168
+ vla:
169
+ value:
170
+ action_chunk_size: 8
171
+ action_tokenizer: extra_action_tokenizer
172
+ base_vlm: prism-qwen25-extra-dinosiglip-224px+0_5b
173
+ compress_history: false
174
+ data_mix: bridge
175
+ enable_gradient_checkpointing: true
176
+ enable_mixed_precision_training: true
177
+ epochs: 10
178
+ expected_world_size: 4
179
+ freeze_llm_backbone: false
180
+ freeze_vision_backbone: true
181
+ global_batch_size: 512
182
+ image_sequence_len: 10
183
+ image_window_size: 4
184
+ learning_rate: 2e-05
185
+ lr_scheduler_type: constant
186
+ max_grad_norm: 1
187
+ max_steps: null
188
+ per_device_batch_size: 8
189
+ reduce_in_full_precision: true
190
+ save_every_n_steps: 25000
191
+ shuffle_buffer_size: 256000
192
+ train_strategy: fsdp-full-shard
193
+ type: myvla-qwen-224px+mx-mshab
194
+ unfreeze_last_llm_layer: false
195
+ use_depth_image: true
196
+ use_flow_matching: false
197
+ use_wrist_image: true
198
+ vla_id: myvla-qwen-224px+mx-mshab
199
+ warmup_ratio: 0
200
+ weight_decay: 0
201
+ wandb_entity:
202
+ value: traysen879-uc-san-diego
203
+ wandb_project:
204
+ value: mshab_vla
wandb/run-20250820_095138-in9qu6p9/files/output.log ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 08/20 [09:51:39] INFO  | >> [*] Starting VLA Training Loop ]8;id=686782;file:///lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py\train.py]8;;\:]8;id=709047;file:///lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py#340\340]8;;\
2
+ Traceback (most recent call last):
3
+ File "/lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py", line 360, in <module>
4
+ train()
5
+ File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/draccus/argparsing.py", line 203, in wrapper_inner
6
+ response = fn(cfg, *args, **kwargs)
7
+ File "/lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py", line 341, in train
8
+ train_strategy.run_vla_training(
9
+ File "/lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/prismatic/training/strategies/base_strategy.py", line 427, in run_vla_training
10
+ metrics.commit(
11
+ File "/lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/prismatic/training/metrics.py", line 318, in commit
12
+ self.state[key].append(value.detach())
13
+ AttributeError: 'NoneType' object has no attribute 'detach'
wandb/run-20250820_095138-in9qu6p9/files/requirements.txt ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ setuptools==78.1.1
2
+ wheel==0.45.1
3
+ pip==25.1
4
+ sentencepiece==0.1.99
5
+ mpmath==1.3.0
6
+ libclang==18.1.1
7
+ flatbuffers==25.2.10
8
+ zipp==3.23.0
9
+ wrapt==1.14.1
10
+ urllib3==2.5.0
11
+ typing_extensions==4.14.1
12
+ typeguard==2.13.3
13
+ tqdm==4.67.1
14
+ toml==0.10.2
15
+ termcolor==3.1.0
16
+ tensorflow-io-gcs-filesystem==0.37.1
17
+ tensorflow-estimator==2.15.0
18
+ tensorboard-data-server==0.7.2
19
+ sympy==1.14.0
20
+ smmap==5.0.2
21
+ six==1.17.0
22
+ safetensors==0.5.3
23
+ regex==2025.7.34
24
+ PyYAML==6.0.2
25
+ pyparsing==3.2.3
26
+ Pygments==2.19.2
27
+ pyasn1==0.6.1
28
+ psutil==7.0.0
29
+ protobuf==4.21.12
30
+ platformdirs==4.3.8
31
+ pillow==11.3.0
32
+ packaging==25.0
33
+ opt_einsum==3.4.0
34
+ oauthlib==3.3.1
35
+ nvidia-nvtx-cu12==12.1.105
36
+ nvidia-nvjitlink-cu12==12.9.86
37
+ nvidia-nccl-cu12==2.19.3
38
+ nvidia-curand-cu12==10.3.2.106
39
+ nvidia-cufft-cu12==11.0.2.54
40
+ nvidia-cuda-runtime-cu12==12.1.105
41
+ nvidia-cuda-nvrtc-cu12==12.1.105
42
+ nvidia-cuda-cupti-cu12==12.1.105
43
+ nvidia-cublas-cu12==12.1.3.1
44
+ numpy==1.26.4
45
+ networkx==3.4.2
46
+ mypy_extensions==1.1.0
47
+ mergedeep==1.3.4
48
+ mdurl==0.1.2
49
+ MarkupSafe==3.0.2
50
+ Markdown==3.8.2
51
+ kiwisolver==1.4.8
52
+ keras==2.15.0
53
+ importlib_resources==6.5.2
54
+ idna==3.10
55
+ hf-xet==1.1.5
56
+ grpcio==1.74.0
57
+ gast==0.6.0
58
+ fsspec==2025.7.0
59
+ fonttools==4.59.0
60
+ filelock==3.18.0
61
+ etils==1.13.0
62
+ einops==0.8.1
63
+ cycler==0.12.1
64
+ click==8.2.1
65
+ charset-normalizer==3.4.2
66
+ certifi==2025.8.3
67
+ cachetools==5.5.2
68
+ attrs==25.3.0
69
+ annotated-types==0.7.0
70
+ absl-py==2.3.1
71
+ Werkzeug==3.1.3
72
+ typing-inspection==0.4.1
73
+ typing-inspect==0.9.0
74
+ triton==2.2.0
75
+ trimesh==4.7.1
76
+ tensorflow-metadata==1.17.2
77
+ tensorflow-addons==0.23.0
78
+ sentry-sdk==2.34.1
79
+ scipy==1.15.3
80
+ rsa==4.9.1
81
+ requests==2.32.4
82
+ pyyaml-include==1.4.1
83
+ python-dateutil==2.9.0.post0
84
+ pydantic_core==2.33.2
85
+ pyasn1_modules==0.4.2
86
+ promise==2.3
87
+ OpenEXR==3.3.5
88
+ nvidia-cusparse-cu12==12.1.0.106
89
+ nvidia-cudnn-cu12==8.9.2.26
90
+ ml-dtypes==0.2.0
91
+ markdown-it-py==3.0.0
92
+ jsonlines==4.0.0
93
+ json-numpy==2.1.1
94
+ Jinja2==3.1.6
95
+ h5py==3.14.0
96
+ google-pasta==0.2.0
97
+ gitdb==4.0.12
98
+ dm-tree==0.1.9
99
+ contourpy==1.3.2
100
+ astunparse==1.6.3
101
+ rich==14.1.0
102
+ requests-oauthlib==2.0.0
103
+ pydantic==2.11.7
104
+ nvidia-cusolver-cu12==11.4.5.107
105
+ matplotlib==3.10.5
106
+ huggingface-hub==0.34.3
107
+ google-auth==2.40.3
108
+ GitPython==3.1.45
109
+ draccus==0.8.0
110
+ wandb==0.21.0
111
+ torch==2.2.0
112
+ tokenizers==0.19.1
113
+ google-auth-oauthlib==1.2.2
114
+ array_record==0.7.2
115
+ transformers==4.40.1
116
+ torchvision==0.17.0
117
+ torchaudio==2.2.0
118
+ tensorboard==2.15.2
119
+ accelerate==1.9.0
120
+ timm==0.9.10
121
+ tensorflow-datasets==4.9.3
122
+ tensorflow==2.15.0
123
+ peft==0.11.1
124
+ tensorflow-graphics==2021.12.3
125
+ dlimp==0.0.1
126
+ openvla==0.0.3
127
+ ninja==1.11.1.4
128
+ flash-attn==2.5.5
129
+ autocommand==2.2.2
130
+ backports.tarfile==1.2.0
131
+ importlib_metadata==8.0.0
132
+ inflect==7.3.1
133
+ jaraco.collections==5.1.0
134
+ jaraco.context==5.3.0
135
+ jaraco.functools==4.0.1
136
+ jaraco.text==3.12.1
137
+ more-itertools==10.3.0
138
+ packaging==24.2
139
+ platformdirs==4.2.2
140
+ tomli==2.0.1
141
+ typeguard==4.3.0
142
+ typing_extensions==4.12.2
143
+ wheel==0.45.1
144
+ zipp==3.19.2
wandb/run-20250820_095138-in9qu6p9/files/wandb-metadata.json ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-6.8.0-60-generic-x86_64-with-glibc2.35",
3
+ "python": "CPython 3.10.18",
4
+ "startedAt": "2025-08-20T09:51:38.344162Z",
5
+ "args": [
6
+ "--pretrained_checkpoint",
7
+ "Stanford-ILIAD/prism-qwen25-extra-dinosiglip-224px-0_5b",
8
+ "--model_type",
9
+ "my_vla_qwen",
10
+ "--vla.type",
11
+ "myvla-qwen-224px+mx-mshab",
12
+ "--vla.expected_world_size",
13
+ "4",
14
+ "--vla.global_batch_size",
15
+ "512",
16
+ "--vla.per_device_batch_size",
17
+ "8",
18
+ "--vla.learning_rate",
19
+ "2e-5",
20
+ "--vla.freeze_vision_backbone",
21
+ "True",
22
+ "--vla.freeze_llm_backbone",
23
+ "False",
24
+ "--vla.use_flow_matching",
25
+ "False",
26
+ "--vla.compress_history",
27
+ "False",
28
+ "--vla.epochs",
29
+ "10",
30
+ "--save_interval",
31
+ "1000",
32
+ "--run_id",
33
+ "bl_multiview_history_depth_set_table"
34
+ ],
35
+ "program": "/lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py",
36
+ "codePath": "vla-scripts/train.py",
37
+ "codePathLocal": "vla-scripts/train.py",
38
+ "git": {
39
+ "remote": "https://github.com/TRS07170/myvla.git",
40
+ "commit": "409e4c9a165115624c271028e9b3ee335991b747"
41
+ },
42
+ "email": "traysen879@gmail.com",
43
+ "root": "myvla_exp/bl_multiview_history_depth_set_table",
44
+ "host": "164-152-109-69",
45
+ "executable": "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/bin/python3.10",
46
+ "cpu_count": 240,
47
+ "cpu_count_logical": 240,
48
+ "gpu": "NVIDIA A100-SXM4-80GB",
49
+ "gpu_count": 8,
50
+ "disk": {
51
+ "/": {
52
+ "total": "20812690710528",
53
+ "used": "36591595520"
54
+ }
55
+ },
56
+ "memory": {
57
+ "total": "1902324936704"
58
+ },
59
+ "gpu_nvidia": [
60
+ {
61
+ "name": "NVIDIA A100-SXM4-80GB",
62
+ "memoryTotal": "85899345920",
63
+ "cudaCores": 6912,
64
+ "architecture": "Ampere",
65
+ "uuid": "GPU-47bfdc91-9dec-cf54-0e0a-aa57ab6fb106"
66
+ },
67
+ {
68
+ "name": "NVIDIA A100-SXM4-80GB",
69
+ "memoryTotal": "85899345920",
70
+ "cudaCores": 6912,
71
+ "architecture": "Ampere",
72
+ "uuid": "GPU-55a7184b-b6dc-a8b3-67d5-a65679215c83"
73
+ },
74
+ {
75
+ "name": "NVIDIA A100-SXM4-80GB",
76
+ "memoryTotal": "85899345920",
77
+ "cudaCores": 6912,
78
+ "architecture": "Ampere",
79
+ "uuid": "GPU-1de758e0-e4a9-e2e9-027c-17f65db8a69e"
80
+ },
81
+ {
82
+ "name": "NVIDIA A100-SXM4-80GB",
83
+ "memoryTotal": "85899345920",
84
+ "cudaCores": 6912,
85
+ "architecture": "Ampere",
86
+ "uuid": "GPU-d7f94efd-7e10-156f-fe37-e505ae7b62b1"
87
+ },
88
+ {
89
+ "name": "NVIDIA A100-SXM4-80GB",
90
+ "memoryTotal": "85899345920",
91
+ "cudaCores": 6912,
92
+ "architecture": "Ampere",
93
+ "uuid": "GPU-813530b2-64f0-5fa3-3568-3811977d3b92"
94
+ },
95
+ {
96
+ "name": "NVIDIA A100-SXM4-80GB",
97
+ "memoryTotal": "85899345920",
98
+ "cudaCores": 6912,
99
+ "architecture": "Ampere",
100
+ "uuid": "GPU-7eac47dc-0da1-f6b2-d261-8ab3a5d4ed03"
101
+ },
102
+ {
103
+ "name": "NVIDIA A100-SXM4-80GB",
104
+ "memoryTotal": "85899345920",
105
+ "cudaCores": 6912,
106
+ "architecture": "Ampere",
107
+ "uuid": "GPU-335150e5-634c-68e2-4930-656c95e62244"
108
+ },
109
+ {
110
+ "name": "NVIDIA A100-SXM4-80GB",
111
+ "memoryTotal": "85899345920",
112
+ "cudaCores": 6912,
113
+ "architecture": "Ampere",
114
+ "uuid": "GPU-b3ee08d0-187c-8f80-06d5-c46759764c41"
115
+ }
116
+ ],
117
+ "cudaVersion": "12.4",
118
+ "writerId": "ba5qoz48zgwc5ju7pflr3irzrnx350dd"
119
+ }
wandb/run-20250820_095138-in9qu6p9/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"_wandb":{"runtime":39},"_runtime":39}
wandb/run-20250820_095138-in9qu6p9/logs/debug-core.log ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-08-20T09:51:38.536260128Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpa8uiflh4/port-3721925.txt","pid":3721925,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
2
+ {"time":"2025-08-20T09:51:38.537360213Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-3721925-3722220-1575105170/socket","Net":"unix"}}
3
+ {"time":"2025-08-20T09:51:38.537456205Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":3721925}
4
+ {"time":"2025-08-20T09:51:38.640382543Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
5
+ {"time":"2025-08-20T09:51:38.656047897Z","level":"INFO","msg":"handleInformInit: received","streamId":"in9qu6p9","id":"1(@)"}
6
+ {"time":"2025-08-20T09:51:38.950720371Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"in9qu6p9","id":"1(@)"}
7
+ {"time":"2025-08-20T09:52:18.709461098Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
8
+ {"time":"2025-08-20T09:52:18.709572482Z","level":"INFO","msg":"server is shutting down"}
9
+ {"time":"2025-08-20T09:52:18.709545521Z","level":"INFO","msg":"connection: closing","id":"1(@)"}
10
+ {"time":"2025-08-20T09:52:18.709654592Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
11
+ {"time":"2025-08-20T09:52:18.709669844Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-3721925-3722220-1575105170/socket","Net":"unix"}}
12
+ {"time":"2025-08-20T09:52:19.206007071Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
13
+ {"time":"2025-08-20T09:52:19.206153915Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
14
+ {"time":"2025-08-20T09:52:19.206165435Z","level":"INFO","msg":"server is closed"}
wandb/run-20250820_095138-in9qu6p9/logs/debug-internal.log ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-08-20T09:51:38.660459543Z","level":"INFO","msg":"stream: starting","core version":"0.21.0"}
2
+ {"time":"2025-08-20T09:51:38.950393813Z","level":"INFO","msg":"stream: created new stream","id":"in9qu6p9"}
3
+ {"time":"2025-08-20T09:51:38.950544507Z","level":"INFO","msg":"stream: started","id":"in9qu6p9"}
4
+ {"time":"2025-08-20T09:51:38.950582828Z","level":"INFO","msg":"writer: Do: started","stream_id":"in9qu6p9"}
5
+ {"time":"2025-08-20T09:51:38.950630479Z","level":"INFO","msg":"sender: started","stream_id":"in9qu6p9"}
6
+ {"time":"2025-08-20T09:51:38.950622759Z","level":"INFO","msg":"handler: started","stream_id":"in9qu6p9"}
7
+ {"time":"2025-08-20T09:52:18.709584351Z","level":"INFO","msg":"stream: closing","id":"in9qu6p9"}
8
+ {"time":"2025-08-20T09:52:19.077055913Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
9
+ {"time":"2025-08-20T09:52:19.198860723Z","level":"INFO","msg":"handler: closed","stream_id":"in9qu6p9"}
10
+ {"time":"2025-08-20T09:52:19.198934456Z","level":"INFO","msg":"writer: Close: closed","stream_id":"in9qu6p9"}
11
+ {"time":"2025-08-20T09:52:19.198966537Z","level":"INFO","msg":"sender: closed","stream_id":"in9qu6p9"}
12
+ {"time":"2025-08-20T09:52:19.203409125Z","level":"INFO","msg":"stream: closed","id":"in9qu6p9"}
wandb/run-20250820_095138-in9qu6p9/logs/debug.log ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-08-20 09:51:38,385 INFO MainThread:3721925 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0
2
+ 2025-08-20 09:51:38,385 INFO MainThread:3721925 [wandb_setup.py:_flush():80] Configure stats pid to 3721925
3
+ 2025-08-20 09:51:38,385 INFO MainThread:3721925 [wandb_setup.py:_flush():80] Loading settings from /home/ubuntu/.config/wandb/settings
4
+ 2025-08-20 09:51:38,387 INFO MainThread:3721925 [wandb_setup.py:_flush():80] Loading settings from /lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/wandb/settings
5
+ 2025-08-20 09:51:38,388 INFO MainThread:3721925 [wandb_setup.py:_flush():80] Loading settings from environment variables
6
+ 2025-08-20 09:51:38,390 INFO MainThread:3721925 [wandb_init.py:setup_run_log_directory():703] Logging user logs to myvla_exp/bl_multiview_history_depth_set_table/wandb/run-20250820_095138-in9qu6p9/logs/debug.log
7
+ 2025-08-20 09:51:38,393 INFO MainThread:3721925 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to myvla_exp/bl_multiview_history_depth_set_table/wandb/run-20250820_095138-in9qu6p9/logs/debug-internal.log
8
+ 2025-08-20 09:51:38,395 INFO MainThread:3721925 [wandb_init.py:init():830] calling init triggers
9
+ 2025-08-20 09:51:38,397 INFO MainThread:3721925 [wandb_init.py:init():835] wandb.init called with sweep_config: {}
10
+ config: {'vla': {'type': 'myvla-qwen-224px+mx-mshab', 'vla_id': 'myvla-qwen-224px+mx-mshab', 'base_vlm': 'prism-qwen25-extra-dinosiglip-224px+0_5b', 'freeze_vision_backbone': True, 'freeze_llm_backbone': False, 'unfreeze_last_llm_layer': False, 'data_mix': 'bridge', 'shuffle_buffer_size': 256000, 'epochs': 10, 'max_steps': None, 'save_every_n_steps': 25000, 'expected_world_size': 4, 'global_batch_size': 512, 'per_device_batch_size': 8, 'learning_rate': 2e-05, 'weight_decay': 0.0, 'max_grad_norm': 1.0, 'lr_scheduler_type': 'constant', 'warmup_ratio': 0.0, 'train_strategy': 'fsdp-full-shard', 'action_tokenizer': 'extra_action_tokenizer', 'image_sequence_len': 10, 'use_wrist_image': True, 'use_depth_image': True, 'compress_history': False, 'use_flow_matching': False, 'action_chunk_size': 8, 'enable_gradient_checkpointing': True, 'enable_mixed_precision_training': True, 'reduce_in_full_precision': True, 'image_window_size': 4}, 'model_type': 'my_vla_qwen', 'data_root_dir': '/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/.new_maniskill_data', 'run_root_dir': 'myvla_exp', 'pretrained_checkpoint': 'Stanford-ILIAD/prism-qwen25-extra-dinosiglip-224px-0_5b', 'is_resume': False, 'resume_step': None, 'resume_epoch': None, 'run_id': 'bl_multiview_history_depth_set_table', 'run_id_note': None, 'save_interval': 1000, 'image_aug': False, 'seed': 7, 'hf_token': '.hf_token', 'trackers': ['jsonl', 'wandb'], 'wandb_project': 'mshab_vla', 'wandb_entity': 'traysen879-uc-san-diego', 'global_pose': False, 'is_grasped': False, 'qpos': False, 'segmentation': False, '_wandb': {}}
11
+ 2025-08-20 09:51:38,399 INFO MainThread:3721925 [wandb_init.py:init():871] starting backend
12
+ 2025-08-20 09:51:38,641 INFO MainThread:3721925 [wandb_init.py:init():874] sending inform_init request
13
+ 2025-08-20 09:51:38,652 INFO MainThread:3721925 [wandb_init.py:init():882] backend started and connected
14
+ 2025-08-20 09:51:38,656 INFO MainThread:3721925 [wandb_init.py:init():953] updated telemetry
15
+ 2025-08-20 09:51:38,686 INFO MainThread:3721925 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout
16
+ 2025-08-20 09:51:39,129 INFO MainThread:3721925 [wandb_init.py:init():1029] starting run threads in backend
17
+ 2025-08-20 09:51:39,596 INFO MainThread:3721925 [wandb_run.py:_console_start():2458] atexit reg
18
+ 2025-08-20 09:51:39,596 INFO MainThread:3721925 [wandb_run.py:_redirect():2306] redirect: wrap_raw
19
+ 2025-08-20 09:51:39,599 INFO MainThread:3721925 [wandb_run.py:_redirect():2375] Wrapping output streams.
20
+ 2025-08-20 09:51:39,601 INFO MainThread:3721925 [wandb_run.py:_redirect():2398] Redirects installed.
21
+ 2025-08-20 09:51:39,609 INFO MainThread:3721925 [wandb_init.py:init():1075] run started, returning control to user process
22
+ 2025-08-20 09:52:18,707 INFO MsgRouterThr:3721925 [mailbox.py:close():129] [no run ID] Closing mailbox, abandoning 1 handles.
wandb/run-20250820_095138-in9qu6p9/run-in9qu6p9.wandb ADDED
Binary file (13.4 kB). View file
 
wandb/run-20250820_095524-3yyycq6f/files/config.yaml ADDED
@@ -0,0 +1,204 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _wandb:
2
+ value:
3
+ cli_version: 0.21.0
4
+ e:
5
+ 2medvrsam6c7koxygw5woziyxdt2rfka:
6
+ args:
7
+ - --pretrained_checkpoint
8
+ - Stanford-ILIAD/prism-qwen25-extra-dinosiglip-224px-0_5b
9
+ - --model_type
10
+ - my_vla_qwen
11
+ - --vla.type
12
+ - myvla-qwen-224px+mx-mshab
13
+ - --vla.expected_world_size
14
+ - "4"
15
+ - --vla.global_batch_size
16
+ - "512"
17
+ - --vla.per_device_batch_size
18
+ - "8"
19
+ - --vla.learning_rate
20
+ - "2e-5"
21
+ - --vla.freeze_vision_backbone
22
+ - "True"
23
+ - --vla.freeze_llm_backbone
24
+ - "False"
25
+ - --vla.use_flow_matching
26
+ - "False"
27
+ - --vla.compress_history
28
+ - "False"
29
+ - --vla.epochs
30
+ - "10"
31
+ - --save_interval
32
+ - "1000"
33
+ - --run_id
34
+ - bl_multiview_history_depth_set_table
35
+ codePath: vla-scripts/train.py
36
+ codePathLocal: vla-scripts/train.py
37
+ cpu_count: 240
38
+ cpu_count_logical: 240
39
+ cudaVersion: "12.4"
40
+ disk:
41
+ /:
42
+ total: "20812690710528"
43
+ used: "36591730688"
44
+ email: traysen879@gmail.com
45
+ executable: /home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/bin/python3.10
46
+ git:
47
+ commit: 409e4c9a165115624c271028e9b3ee335991b747
48
+ remote: https://github.com/TRS07170/myvla.git
49
+ gpu: NVIDIA A100-SXM4-80GB
50
+ gpu_count: 8
51
+ gpu_nvidia:
52
+ - architecture: Ampere
53
+ cudaCores: 6912
54
+ memoryTotal: "85899345920"
55
+ name: NVIDIA A100-SXM4-80GB
56
+ uuid: GPU-47bfdc91-9dec-cf54-0e0a-aa57ab6fb106
57
+ - architecture: Ampere
58
+ cudaCores: 6912
59
+ memoryTotal: "85899345920"
60
+ name: NVIDIA A100-SXM4-80GB
61
+ uuid: GPU-55a7184b-b6dc-a8b3-67d5-a65679215c83
62
+ - architecture: Ampere
63
+ cudaCores: 6912
64
+ memoryTotal: "85899345920"
65
+ name: NVIDIA A100-SXM4-80GB
66
+ uuid: GPU-1de758e0-e4a9-e2e9-027c-17f65db8a69e
67
+ - architecture: Ampere
68
+ cudaCores: 6912
69
+ memoryTotal: "85899345920"
70
+ name: NVIDIA A100-SXM4-80GB
71
+ uuid: GPU-d7f94efd-7e10-156f-fe37-e505ae7b62b1
72
+ - architecture: Ampere
73
+ cudaCores: 6912
74
+ memoryTotal: "85899345920"
75
+ name: NVIDIA A100-SXM4-80GB
76
+ uuid: GPU-813530b2-64f0-5fa3-3568-3811977d3b92
77
+ - architecture: Ampere
78
+ cudaCores: 6912
79
+ memoryTotal: "85899345920"
80
+ name: NVIDIA A100-SXM4-80GB
81
+ uuid: GPU-7eac47dc-0da1-f6b2-d261-8ab3a5d4ed03
82
+ - architecture: Ampere
83
+ cudaCores: 6912
84
+ memoryTotal: "85899345920"
85
+ name: NVIDIA A100-SXM4-80GB
86
+ uuid: GPU-335150e5-634c-68e2-4930-656c95e62244
87
+ - architecture: Ampere
88
+ cudaCores: 6912
89
+ memoryTotal: "85899345920"
90
+ name: NVIDIA A100-SXM4-80GB
91
+ uuid: GPU-b3ee08d0-187c-8f80-06d5-c46759764c41
92
+ host: 164-152-109-69
93
+ memory:
94
+ total: "1902324936704"
95
+ os: Linux-6.8.0-60-generic-x86_64-with-glibc2.35
96
+ program: /lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py
97
+ python: CPython 3.10.18
98
+ root: myvla_exp/bl_multiview_history_depth_set_table
99
+ startedAt: "2025-08-20T09:55:24.723538Z"
100
+ writerId: 2medvrsam6c7koxygw5woziyxdt2rfka
101
+ m: []
102
+ python_version: 3.10.18
103
+ t:
104
+ "1":
105
+ - 1
106
+ - 2
107
+ - 3
108
+ - 11
109
+ - 41
110
+ - 49
111
+ - 63
112
+ - 71
113
+ "2":
114
+ - 1
115
+ - 2
116
+ - 3
117
+ - 11
118
+ - 41
119
+ - 49
120
+ - 63
121
+ - 71
122
+ "3":
123
+ - 13
124
+ - 16
125
+ "4": 3.10.18
126
+ "5": 0.21.0
127
+ "6": 4.40.1
128
+ "12": 0.21.0
129
+ "13": linux-x86_64
130
+ data_root_dir:
131
+ value: /home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/.new_maniskill_data
132
+ global_pose:
133
+ value: false
134
+ hf_token:
135
+ value: .hf_token
136
+ image_aug:
137
+ value: false
138
+ is_grasped:
139
+ value: false
140
+ is_resume:
141
+ value: false
142
+ model_type:
143
+ value: my_vla_qwen
144
+ pretrained_checkpoint:
145
+ value: Stanford-ILIAD/prism-qwen25-extra-dinosiglip-224px-0_5b
146
+ qpos:
147
+ value: false
148
+ resume_epoch:
149
+ value: null
150
+ resume_step:
151
+ value: null
152
+ run_id:
153
+ value: bl_multiview_history_depth_set_table
154
+ run_id_note:
155
+ value: null
156
+ run_root_dir:
157
+ value: myvla_exp
158
+ save_interval:
159
+ value: 1000
160
+ seed:
161
+ value: 7
162
+ segmentation:
163
+ value: false
164
+ trackers:
165
+ value:
166
+ - jsonl
167
+ - wandb
168
+ vla:
169
+ value:
170
+ action_chunk_size: 8
171
+ action_tokenizer: extra_action_tokenizer
172
+ base_vlm: prism-qwen25-extra-dinosiglip-224px+0_5b
173
+ compress_history: false
174
+ data_mix: bridge
175
+ enable_gradient_checkpointing: true
176
+ enable_mixed_precision_training: true
177
+ epochs: 10
178
+ expected_world_size: 4
179
+ freeze_llm_backbone: false
180
+ freeze_vision_backbone: true
181
+ global_batch_size: 512
182
+ image_sequence_len: 10
183
+ image_window_size: 4
184
+ learning_rate: 2e-05
185
+ lr_scheduler_type: constant
186
+ max_grad_norm: 1
187
+ max_steps: null
188
+ per_device_batch_size: 8
189
+ reduce_in_full_precision: true
190
+ save_every_n_steps: 25000
191
+ shuffle_buffer_size: 256000
192
+ train_strategy: fsdp-full-shard
193
+ type: myvla-qwen-224px+mx-mshab
194
+ unfreeze_last_llm_layer: false
195
+ use_depth_image: true
196
+ use_flow_matching: false
197
+ use_wrist_image: true
198
+ vla_id: myvla-qwen-224px+mx-mshab
199
+ warmup_ratio: 0
200
+ weight_decay: 0
201
+ wandb_entity:
202
+ value: traysen879-uc-san-diego
203
+ wandb_project:
204
+ value: mshab_vla
wandb/run-20250820_095524-3yyycq6f/files/output.log ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 08/20 [09:55:25] INFO  | >> [*] Starting VLA Training Loop ]8;id=686782;file:///lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py\train.py]8;;\:]8;id=709047;file:///lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py#340\340]8;;\
2
+ Traceback (most recent call last):
3
+ File "/lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py", line 360, in <module>
4
+ train()
5
+ File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/draccus/argparsing.py", line 203, in wrapper_inner
6
+ response = fn(cfg, *args, **kwargs)
7
+ File "/lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py", line 341, in train
8
+ train_strategy.run_vla_training(
9
+ File "/lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/prismatic/training/strategies/base_strategy.py", line 427, in run_vla_training
10
+ metrics.commit(
11
+ File "/lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/prismatic/training/metrics.py", line 318, in commit
12
+ self.state[key].append(value.detach())
13
+ AttributeError: 'int' object has no attribute 'detach'
wandb/run-20250820_095524-3yyycq6f/files/requirements.txt ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ setuptools==78.1.1
2
+ wheel==0.45.1
3
+ pip==25.1
4
+ sentencepiece==0.1.99
5
+ mpmath==1.3.0
6
+ libclang==18.1.1
7
+ flatbuffers==25.2.10
8
+ zipp==3.23.0
9
+ wrapt==1.14.1
10
+ urllib3==2.5.0
11
+ typing_extensions==4.14.1
12
+ typeguard==2.13.3
13
+ tqdm==4.67.1
14
+ toml==0.10.2
15
+ termcolor==3.1.0
16
+ tensorflow-io-gcs-filesystem==0.37.1
17
+ tensorflow-estimator==2.15.0
18
+ tensorboard-data-server==0.7.2
19
+ sympy==1.14.0
20
+ smmap==5.0.2
21
+ six==1.17.0
22
+ safetensors==0.5.3
23
+ regex==2025.7.34
24
+ PyYAML==6.0.2
25
+ pyparsing==3.2.3
26
+ Pygments==2.19.2
27
+ pyasn1==0.6.1
28
+ psutil==7.0.0
29
+ protobuf==4.21.12
30
+ platformdirs==4.3.8
31
+ pillow==11.3.0
32
+ packaging==25.0
33
+ opt_einsum==3.4.0
34
+ oauthlib==3.3.1
35
+ nvidia-nvtx-cu12==12.1.105
36
+ nvidia-nvjitlink-cu12==12.9.86
37
+ nvidia-nccl-cu12==2.19.3
38
+ nvidia-curand-cu12==10.3.2.106
39
+ nvidia-cufft-cu12==11.0.2.54
40
+ nvidia-cuda-runtime-cu12==12.1.105
41
+ nvidia-cuda-nvrtc-cu12==12.1.105
42
+ nvidia-cuda-cupti-cu12==12.1.105
43
+ nvidia-cublas-cu12==12.1.3.1
44
+ numpy==1.26.4
45
+ networkx==3.4.2
46
+ mypy_extensions==1.1.0
47
+ mergedeep==1.3.4
48
+ mdurl==0.1.2
49
+ MarkupSafe==3.0.2
50
+ Markdown==3.8.2
51
+ kiwisolver==1.4.8
52
+ keras==2.15.0
53
+ importlib_resources==6.5.2
54
+ idna==3.10
55
+ hf-xet==1.1.5
56
+ grpcio==1.74.0
57
+ gast==0.6.0
58
+ fsspec==2025.7.0
59
+ fonttools==4.59.0
60
+ filelock==3.18.0
61
+ etils==1.13.0
62
+ einops==0.8.1
63
+ cycler==0.12.1
64
+ click==8.2.1
65
+ charset-normalizer==3.4.2
66
+ certifi==2025.8.3
67
+ cachetools==5.5.2
68
+ attrs==25.3.0
69
+ annotated-types==0.7.0
70
+ absl-py==2.3.1
71
+ Werkzeug==3.1.3
72
+ typing-inspection==0.4.1
73
+ typing-inspect==0.9.0
74
+ triton==2.2.0
75
+ trimesh==4.7.1
76
+ tensorflow-metadata==1.17.2
77
+ tensorflow-addons==0.23.0
78
+ sentry-sdk==2.34.1
79
+ scipy==1.15.3
80
+ rsa==4.9.1
81
+ requests==2.32.4
82
+ pyyaml-include==1.4.1
83
+ python-dateutil==2.9.0.post0
84
+ pydantic_core==2.33.2
85
+ pyasn1_modules==0.4.2
86
+ promise==2.3
87
+ OpenEXR==3.3.5
88
+ nvidia-cusparse-cu12==12.1.0.106
89
+ nvidia-cudnn-cu12==8.9.2.26
90
+ ml-dtypes==0.2.0
91
+ markdown-it-py==3.0.0
92
+ jsonlines==4.0.0
93
+ json-numpy==2.1.1
94
+ Jinja2==3.1.6
95
+ h5py==3.14.0
96
+ google-pasta==0.2.0
97
+ gitdb==4.0.12
98
+ dm-tree==0.1.9
99
+ contourpy==1.3.2
100
+ astunparse==1.6.3
101
+ rich==14.1.0
102
+ requests-oauthlib==2.0.0
103
+ pydantic==2.11.7
104
+ nvidia-cusolver-cu12==11.4.5.107
105
+ matplotlib==3.10.5
106
+ huggingface-hub==0.34.3
107
+ google-auth==2.40.3
108
+ GitPython==3.1.45
109
+ draccus==0.8.0
110
+ wandb==0.21.0
111
+ torch==2.2.0
112
+ tokenizers==0.19.1
113
+ google-auth-oauthlib==1.2.2
114
+ array_record==0.7.2
115
+ transformers==4.40.1
116
+ torchvision==0.17.0
117
+ torchaudio==2.2.0
118
+ tensorboard==2.15.2
119
+ accelerate==1.9.0
120
+ timm==0.9.10
121
+ tensorflow-datasets==4.9.3
122
+ tensorflow==2.15.0
123
+ peft==0.11.1
124
+ tensorflow-graphics==2021.12.3
125
+ dlimp==0.0.1
126
+ openvla==0.0.3
127
+ ninja==1.11.1.4
128
+ flash-attn==2.5.5
129
+ autocommand==2.2.2
130
+ backports.tarfile==1.2.0
131
+ importlib_metadata==8.0.0
132
+ inflect==7.3.1
133
+ jaraco.collections==5.1.0
134
+ jaraco.context==5.3.0
135
+ jaraco.functools==4.0.1
136
+ jaraco.text==3.12.1
137
+ more-itertools==10.3.0
138
+ packaging==24.2
139
+ platformdirs==4.2.2
140
+ tomli==2.0.1
141
+ typeguard==4.3.0
142
+ typing_extensions==4.12.2
143
+ wheel==0.45.1
144
+ zipp==3.19.2
wandb/run-20250820_095524-3yyycq6f/files/wandb-metadata.json ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-6.8.0-60-generic-x86_64-with-glibc2.35",
3
+ "python": "CPython 3.10.18",
4
+ "startedAt": "2025-08-20T09:55:24.723538Z",
5
+ "args": [
6
+ "--pretrained_checkpoint",
7
+ "Stanford-ILIAD/prism-qwen25-extra-dinosiglip-224px-0_5b",
8
+ "--model_type",
9
+ "my_vla_qwen",
10
+ "--vla.type",
11
+ "myvla-qwen-224px+mx-mshab",
12
+ "--vla.expected_world_size",
13
+ "4",
14
+ "--vla.global_batch_size",
15
+ "512",
16
+ "--vla.per_device_batch_size",
17
+ "8",
18
+ "--vla.learning_rate",
19
+ "2e-5",
20
+ "--vla.freeze_vision_backbone",
21
+ "True",
22
+ "--vla.freeze_llm_backbone",
23
+ "False",
24
+ "--vla.use_flow_matching",
25
+ "False",
26
+ "--vla.compress_history",
27
+ "False",
28
+ "--vla.epochs",
29
+ "10",
30
+ "--save_interval",
31
+ "1000",
32
+ "--run_id",
33
+ "bl_multiview_history_depth_set_table"
34
+ ],
35
+ "program": "/lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py",
36
+ "codePath": "vla-scripts/train.py",
37
+ "codePathLocal": "vla-scripts/train.py",
38
+ "git": {
39
+ "remote": "https://github.com/TRS07170/myvla.git",
40
+ "commit": "409e4c9a165115624c271028e9b3ee335991b747"
41
+ },
42
+ "email": "traysen879@gmail.com",
43
+ "root": "myvla_exp/bl_multiview_history_depth_set_table",
44
+ "host": "164-152-109-69",
45
+ "executable": "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/bin/python3.10",
46
+ "cpu_count": 240,
47
+ "cpu_count_logical": 240,
48
+ "gpu": "NVIDIA A100-SXM4-80GB",
49
+ "gpu_count": 8,
50
+ "disk": {
51
+ "/": {
52
+ "total": "20812690710528",
53
+ "used": "36591730688"
54
+ }
55
+ },
56
+ "memory": {
57
+ "total": "1902324936704"
58
+ },
59
+ "gpu_nvidia": [
60
+ {
61
+ "name": "NVIDIA A100-SXM4-80GB",
62
+ "memoryTotal": "85899345920",
63
+ "cudaCores": 6912,
64
+ "architecture": "Ampere",
65
+ "uuid": "GPU-47bfdc91-9dec-cf54-0e0a-aa57ab6fb106"
66
+ },
67
+ {
68
+ "name": "NVIDIA A100-SXM4-80GB",
69
+ "memoryTotal": "85899345920",
70
+ "cudaCores": 6912,
71
+ "architecture": "Ampere",
72
+ "uuid": "GPU-55a7184b-b6dc-a8b3-67d5-a65679215c83"
73
+ },
74
+ {
75
+ "name": "NVIDIA A100-SXM4-80GB",
76
+ "memoryTotal": "85899345920",
77
+ "cudaCores": 6912,
78
+ "architecture": "Ampere",
79
+ "uuid": "GPU-1de758e0-e4a9-e2e9-027c-17f65db8a69e"
80
+ },
81
+ {
82
+ "name": "NVIDIA A100-SXM4-80GB",
83
+ "memoryTotal": "85899345920",
84
+ "cudaCores": 6912,
85
+ "architecture": "Ampere",
86
+ "uuid": "GPU-d7f94efd-7e10-156f-fe37-e505ae7b62b1"
87
+ },
88
+ {
89
+ "name": "NVIDIA A100-SXM4-80GB",
90
+ "memoryTotal": "85899345920",
91
+ "cudaCores": 6912,
92
+ "architecture": "Ampere",
93
+ "uuid": "GPU-813530b2-64f0-5fa3-3568-3811977d3b92"
94
+ },
95
+ {
96
+ "name": "NVIDIA A100-SXM4-80GB",
97
+ "memoryTotal": "85899345920",
98
+ "cudaCores": 6912,
99
+ "architecture": "Ampere",
100
+ "uuid": "GPU-7eac47dc-0da1-f6b2-d261-8ab3a5d4ed03"
101
+ },
102
+ {
103
+ "name": "NVIDIA A100-SXM4-80GB",
104
+ "memoryTotal": "85899345920",
105
+ "cudaCores": 6912,
106
+ "architecture": "Ampere",
107
+ "uuid": "GPU-335150e5-634c-68e2-4930-656c95e62244"
108
+ },
109
+ {
110
+ "name": "NVIDIA A100-SXM4-80GB",
111
+ "memoryTotal": "85899345920",
112
+ "cudaCores": 6912,
113
+ "architecture": "Ampere",
114
+ "uuid": "GPU-b3ee08d0-187c-8f80-06d5-c46759764c41"
115
+ }
116
+ ],
117
+ "cudaVersion": "12.4",
118
+ "writerId": "2medvrsam6c7koxygw5woziyxdt2rfka"
119
+ }
wandb/run-20250820_095524-3yyycq6f/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"_wandb":{"runtime":38},"_runtime":38}
wandb/run-20250820_095524-3yyycq6f/logs/debug-core.log ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-08-20T09:55:24.937710806Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpmwa54urq/port-3722794.txt","pid":3722794,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
2
+ {"time":"2025-08-20T09:55:24.93971427Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-3722794-3723166-688719886/socket","Net":"unix"}}
3
+ {"time":"2025-08-20T09:55:24.939880473Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":3722794}
4
+ {"time":"2025-08-20T09:55:25.036546584Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
5
+ {"time":"2025-08-20T09:55:25.054333304Z","level":"INFO","msg":"handleInformInit: received","streamId":"3yyycq6f","id":"1(@)"}
6
+ {"time":"2025-08-20T09:55:25.352508795Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"3yyycq6f","id":"1(@)"}
7
+ {"time":"2025-08-20T09:56:04.105686979Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
8
+ {"time":"2025-08-20T09:56:04.105963355Z","level":"INFO","msg":"connection: closing","id":"1(@)"}
9
+ {"time":"2025-08-20T09:56:04.106114428Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
10
+ {"time":"2025-08-20T09:56:04.106014046Z","level":"INFO","msg":"server is shutting down"}
11
+ {"time":"2025-08-20T09:56:04.10623037Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-3722794-3723166-688719886/socket","Net":"unix"}}
12
+ {"time":"2025-08-20T09:56:04.556001154Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
13
+ {"time":"2025-08-20T09:56:04.556135438Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
14
+ {"time":"2025-08-20T09:56:04.556149367Z","level":"INFO","msg":"server is closed"}