trs07170 commited on Aug 22, 2025

Commit

b67468e

verified ·

1 Parent(s): 9c6d98e

Upload folder using huggingface_hub

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +1 -0
bl_multiview_history_depth_set_table.jsonl +0 -0
checkpoints/step-007160-epoch-10-loss=0.0736.pt +3 -0
config.json +58 -0
config.yaml +54 -0
run-metrics.jsonl +1 -0
wandb/debug-internal.log +22 -0
wandb/debug.log +28 -0
wandb/run-20250820_090050-3cqyp9vg/files/config.yaml +204 -0
wandb/run-20250820_090050-3cqyp9vg/files/output.log +45 -0
wandb/run-20250820_090050-3cqyp9vg/files/requirements.txt +144 -0
wandb/run-20250820_090050-3cqyp9vg/files/wandb-metadata.json +119 -0
wandb/run-20250820_090050-3cqyp9vg/files/wandb-summary.json +1 -0
wandb/run-20250820_090050-3cqyp9vg/logs/debug-core.log +14 -0
wandb/run-20250820_090050-3cqyp9vg/logs/debug-internal.log +12 -0
wandb/run-20250820_090050-3cqyp9vg/logs/debug.log +22 -0
wandb/run-20250820_090050-3cqyp9vg/run-3cqyp9vg.wandb +0 -0
wandb/run-20250820_091147-qt1b7wpr/files/config.yaml +204 -0
wandb/run-20250820_091147-qt1b7wpr/files/output.log +11 -0
wandb/run-20250820_091147-qt1b7wpr/files/requirements.txt +144 -0
wandb/run-20250820_091147-qt1b7wpr/files/wandb-metadata.json +119 -0
wandb/run-20250820_091147-qt1b7wpr/files/wandb-summary.json +1 -0
wandb/run-20250820_091147-qt1b7wpr/logs/debug-core.log +14 -0
wandb/run-20250820_091147-qt1b7wpr/logs/debug-internal.log +12 -0
wandb/run-20250820_091147-qt1b7wpr/logs/debug.log +22 -0
wandb/run-20250820_091147-qt1b7wpr/run-qt1b7wpr.wandb +0 -0
wandb/run-20250820_094544-v4zsb4rt/files/config.yaml +204 -0
wandb/run-20250820_094544-v4zsb4rt/files/output.log +11 -0
wandb/run-20250820_094544-v4zsb4rt/files/requirements.txt +144 -0
wandb/run-20250820_094544-v4zsb4rt/files/wandb-metadata.json +119 -0
wandb/run-20250820_094544-v4zsb4rt/files/wandb-summary.json +1 -0
wandb/run-20250820_094544-v4zsb4rt/logs/debug-core.log +14 -0
wandb/run-20250820_094544-v4zsb4rt/logs/debug-internal.log +12 -0
wandb/run-20250820_094544-v4zsb4rt/logs/debug.log +22 -0
wandb/run-20250820_094544-v4zsb4rt/run-v4zsb4rt.wandb +0 -0
wandb/run-20250820_095138-in9qu6p9/files/config.yaml +204 -0
wandb/run-20250820_095138-in9qu6p9/files/output.log +13 -0
wandb/run-20250820_095138-in9qu6p9/files/requirements.txt +144 -0
wandb/run-20250820_095138-in9qu6p9/files/wandb-metadata.json +119 -0
wandb/run-20250820_095138-in9qu6p9/files/wandb-summary.json +1 -0
wandb/run-20250820_095138-in9qu6p9/logs/debug-core.log +14 -0
wandb/run-20250820_095138-in9qu6p9/logs/debug-internal.log +12 -0
wandb/run-20250820_095138-in9qu6p9/logs/debug.log +22 -0
wandb/run-20250820_095138-in9qu6p9/run-in9qu6p9.wandb +0 -0
wandb/run-20250820_095524-3yyycq6f/files/config.yaml +204 -0
wandb/run-20250820_095524-3yyycq6f/files/output.log +13 -0
wandb/run-20250820_095524-3yyycq6f/files/requirements.txt +144 -0
wandb/run-20250820_095524-3yyycq6f/files/wandb-metadata.json +119 -0
wandb/run-20250820_095524-3yyycq6f/files/wandb-summary.json +1 -0
wandb/run-20250820_095524-3yyycq6f/logs/debug-core.log +14 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+wandb/run-20250820_102642-5v6c0rms/run-5v6c0rms.wandb filter=lfs diff=lfs merge=lfs -text

bl_multiview_history_depth_set_table.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoints/step-007160-epoch-10-loss=0.0736.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:76b99149472d744b5a28f29807227ad854ae854a6933d0abd3e4b83e0b6ce9cb
+size 4093057552

config.json ADDED Viewed

	@@ -0,0 +1,58 @@

+{
+  "data_root_dir": "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/.new_maniskill_data",
+  "global_pose": false,
+  "hf_token": ".hf_token",
+  "image_aug": false,
+  "is_grasped": false,
+  "is_resume": false,
+  "model_type": "my_vla_qwen",
+  "pretrained_checkpoint": "Stanford-ILIAD/prism-qwen25-extra-dinosiglip-224px-0_5b",
+  "qpos": false,
+  "resume_epoch": null,
+  "resume_step": null,
+  "run_id": "bl_multiview_history_depth_set_table",
+  "run_id_note": null,
+  "run_root_dir": "myvla_exp",
+  "save_interval": 1000,
+  "seed": 7,
+  "segmentation": false,
+  "trackers": [
+    "jsonl",
+    "wandb"
+  ],
+  "vla": {
+    "action_chunk_size": 8,
+    "action_tokenizer": "extra_action_tokenizer",
+    "base_vlm": "prism-qwen25-extra-dinosiglip-224px+0_5b",
+    "compress_history": false,
+    "data_mix": "bridge",
+    "enable_gradient_checkpointing": true,
+    "enable_mixed_precision_training": true,
+    "epochs": 10,
+    "expected_world_size": 4,
+    "freeze_llm_backbone": false,
+    "freeze_vision_backbone": true,
+    "global_batch_size": 512,
+    "image_sequence_len": 10,
+    "image_window_size": 4,
+    "learning_rate": 2e-05,
+    "lr_scheduler_type": "constant",
+    "max_grad_norm": 1.0,
+    "max_steps": null,
+    "per_device_batch_size": 8,
+    "reduce_in_full_precision": true,
+    "save_every_n_steps": 25000,
+    "shuffle_buffer_size": 256000,
+    "train_strategy": "fsdp-full-shard",
+    "type": "myvla-qwen-224px+mx-mshab",
+    "unfreeze_last_llm_layer": false,
+    "use_depth_image": true,
+    "use_flow_matching": false,
+    "use_wrist_image": true,
+    "vla_id": "myvla-qwen-224px+mx-mshab",
+    "warmup_ratio": 0.0,
+    "weight_decay": 0.0
+  },
+  "wandb_entity": "traysen879-uc-san-diego",
+  "wandb_project": "mshab_vla"
+}

config.yaml ADDED Viewed

	@@ -0,0 +1,54 @@

+data_root_dir: /home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/.new_maniskill_data
+global_pose: false
+hf_token: .hf_token
+image_aug: false
+is_grasped: false
+is_resume: false
+model_type: my_vla_qwen
+pretrained_checkpoint: Stanford-ILIAD/prism-qwen25-extra-dinosiglip-224px-0_5b
+qpos: false
+resume_epoch: null
+resume_step: null
+run_id: bl_multiview_history_depth_set_table
+run_id_note: null
+run_root_dir: myvla_exp
+save_interval: 1000
+seed: 7
+segmentation: false
+trackers:
+- jsonl
+- wandb
+vla:
+  action_chunk_size: 8
+  action_tokenizer: extra_action_tokenizer
+  base_vlm: prism-qwen25-extra-dinosiglip-224px+0_5b
+  compress_history: false
+  data_mix: bridge
+  enable_gradient_checkpointing: true
+  enable_mixed_precision_training: true
+  epochs: 10
+  expected_world_size: 4
+  freeze_llm_backbone: false
+  freeze_vision_backbone: true
+  global_batch_size: 512
+  image_sequence_len: 10
+  image_window_size: 4
+  learning_rate: 2.0e-05
+  lr_scheduler_type: constant
+  max_grad_norm: 1.0
+  max_steps: null
+  per_device_batch_size: 8
+  reduce_in_full_precision: true
+  save_every_n_steps: 25000
+  shuffle_buffer_size: 256000
+  train_strategy: fsdp-full-shard
+  type: myvla-qwen-224px+mx-mshab
+  unfreeze_last_llm_layer: false
+  use_depth_image: true
+  use_flow_matching: false
+  use_wrist_image: true
+  vla_id: myvla-qwen-224px+mx-mshab
+  warmup_ratio: 0.0
+  weight_decay: 0.0
+wandb_entity: traysen879-uc-san-diego
+wandb_project: mshab_vla

run-metrics.jsonl ADDED Viewed

	@@ -0,0 +1 @@

+ {"hparams": {"data_root_dir": "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/.new_maniskill_data", "global_pose": false, "hf_token": ".hf_token", "image_aug": false, "is_grasped": false, "is_resume": false, "model_type": "my_vla_qwen", "pretrained_checkpoint": "Stanford-ILIAD/prism-qwen25-extra-dinosiglip-224px-0_5b", "qpos": false, "resume_epoch": null, "resume_step": null, "run_id": "bl_multiview_history_depth_set_table", "run_id_note": null, "run_root_dir": "myvla_exp", "save_interval": 1000, "seed": 7, "segmentation": false, "trackers": ["jsonl", "wandb"], "vla": {"action_chunk_size": 8, "action_tokenizer": "extra_action_tokenizer", "base_vlm": "prism-qwen25-extra-dinosiglip-224px+0_5b", "compress_history": false, "data_mix": "bridge", "enable_gradient_checkpointing": true, "enable_mixed_precision_training": true, "epochs": 10, "expected_world_size": 4, "freeze_llm_backbone": false, "freeze_vision_backbone": true, "global_batch_size": 512, "image_sequence_len": 10, "image_window_size": 4, "learning_rate": 2e-05, "lr_scheduler_type": "constant", "max_grad_norm": 1.0, "max_steps": null, "per_device_batch_size": 8, "reduce_in_full_precision": true, "save_every_n_steps": 25000, "shuffle_buffer_size": 256000, "train_strategy": "fsdp-full-shard", "type": "myvla-qwen-224px+mx-mshab", "unfreeze_last_llm_layer": false, "use_depth_image": true, "use_flow_matching": false, "use_wrist_image": true, "vla_id": "myvla-qwen-224px+mx-mshab", "warmup_ratio": 0.0, "weight_decay": 0.0}, "wandb_entity": "traysen879-uc-san-diego", "wandb_project": "mshab_vla"}, "run_id": "bl_multiview_history_depth_set_table"}

wandb/debug-internal.log ADDED Viewed

	@@ -0,0 +1,22 @@

+{"time":"2025-08-20T10:26:43.204675859Z","level":"INFO","msg":"stream: starting","core version":"0.21.0"}
+{"time":"2025-08-20T10:26:43.516851918Z","level":"INFO","msg":"stream: created new stream","id":"5v6c0rms"}
+{"time":"2025-08-20T10:26:43.516991671Z","level":"INFO","msg":"stream: started","id":"5v6c0rms"}
+{"time":"2025-08-20T10:26:43.517178725Z","level":"INFO","msg":"writer: Do: started","stream_id":"5v6c0rms"}
+{"time":"2025-08-20T10:26:43.517063263Z","level":"INFO","msg":"handler: started","stream_id":"5v6c0rms"}
+{"time":"2025-08-20T10:26:43.517088984Z","level":"INFO","msg":"sender: started","stream_id":"5v6c0rms"}
+{"time":"2025-08-20T16:19:00.197037424Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)"}
+{"time":"2025-08-20T18:26:46.626255579Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
+{"time":"2025-08-20T20:34:33.882438305Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/traysen879-uc-san-diego/mshab_vla/5v6c0rms/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
+{"time":"2025-08-21T07:33:39.302593268Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded (Client.Timeout exceeded while awaiting headers)"}
+{"time":"2025-08-21T12:32:35.830938746Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/files/traysen879-uc-san-diego/mshab_vla/5v6c0rms/file_stream\": http2: server sent GOAWAY and closed the connection; LastStreamID=1, ErrCode=NO_ERROR, debug=\"server_shutting_down\""}
+{"time":"2025-08-21T12:53:56.107366065Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
+{"time":"2025-08-21T19:51:10.717177409Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/traysen879-uc-san-diego/mshab_vla/5v6c0rms/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
+{"time":"2025-08-21T19:55:28.726667762Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": context deadline exceeded"}
+{"time":"2025-08-21T21:35:15.927046924Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/traysen879-uc-san-diego/mshab_vla/5v6c0rms/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
+{"time":"2025-08-22T08:17:32.392853531Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2025-08-22T08:17:32.457515475Z","level":"INFO","msg":"handler: operation stats","stats":{}}
+{"time":"2025-08-22T08:17:32.467508897Z","level":"INFO","msg":"stream: closing","id":"5v6c0rms"}
+{"time":"2025-08-22T08:17:32.468012198Z","level":"INFO","msg":"handler: closed","stream_id":"5v6c0rms"}
+{"time":"2025-08-22T08:17:32.468066299Z","level":"INFO","msg":"writer: Close: closed","stream_id":"5v6c0rms"}
+{"time":"2025-08-22T08:17:32.4681065Z","level":"INFO","msg":"sender: closed","stream_id":"5v6c0rms"}
+{"time":"2025-08-22T08:17:32.47079997Z","level":"INFO","msg":"stream: closed","id":"5v6c0rms"}

wandb/debug.log ADDED Viewed

	@@ -0,0 +1,28 @@

+2025-08-20 10:26:42,933 INFO    MainThread:3727471 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0
+2025-08-20 10:26:42,933 INFO    MainThread:3727471 [wandb_setup.py:_flush():80] Configure stats pid to 3727471
+2025-08-20 10:26:42,933 INFO    MainThread:3727471 [wandb_setup.py:_flush():80] Loading settings from /home/ubuntu/.config/wandb/settings
+2025-08-20 10:26:42,933 INFO    MainThread:3727471 [wandb_setup.py:_flush():80] Loading settings from /lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/wandb/settings
+2025-08-20 10:26:42,934 INFO    MainThread:3727471 [wandb_setup.py:_flush():80] Loading settings from environment variables
+2025-08-20 10:26:42,934 INFO    MainThread:3727471 [wandb_init.py:setup_run_log_directory():703] Logging user logs to myvla_exp/bl_multiview_history_depth_set_table/wandb/run-20250820_102642-5v6c0rms/logs/debug.log
+2025-08-20 10:26:42,934 INFO    MainThread:3727471 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to myvla_exp/bl_multiview_history_depth_set_table/wandb/run-20250820_102642-5v6c0rms/logs/debug-internal.log
+2025-08-20 10:26:42,934 INFO    MainThread:3727471 [wandb_init.py:init():830] calling init triggers
+2025-08-20 10:26:42,934 INFO    MainThread:3727471 [wandb_init.py:init():835] wandb.init called with sweep_config: {}
+config: {'vla': {'type': 'myvla-qwen-224px+mx-mshab', 'vla_id': 'myvla-qwen-224px+mx-mshab', 'base_vlm': 'prism-qwen25-extra-dinosiglip-224px+0_5b', 'freeze_vision_backbone': True, 'freeze_llm_backbone': False, 'unfreeze_last_llm_layer': False, 'data_mix': 'bridge', 'shuffle_buffer_size': 256000, 'epochs': 10, 'max_steps': None, 'save_every_n_steps': 25000, 'expected_world_size': 4, 'global_batch_size': 512, 'per_device_batch_size': 8, 'learning_rate': 2e-05, 'weight_decay': 0.0, 'max_grad_norm': 1.0, 'lr_scheduler_type': 'constant', 'warmup_ratio': 0.0, 'train_strategy': 'fsdp-full-shard', 'action_tokenizer': 'extra_action_tokenizer', 'image_sequence_len': 10, 'use_wrist_image': True, 'use_depth_image': True, 'compress_history': False, 'use_flow_matching': False, 'action_chunk_size': 8, 'enable_gradient_checkpointing': True, 'enable_mixed_precision_training': True, 'reduce_in_full_precision': True, 'image_window_size': 4}, 'model_type': 'my_vla_qwen', 'data_root_dir': '/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/.new_maniskill_data', 'run_root_dir': 'myvla_exp', 'pretrained_checkpoint': 'Stanford-ILIAD/prism-qwen25-extra-dinosiglip-224px-0_5b', 'is_resume': False, 'resume_step': None, 'resume_epoch': None, 'run_id': 'bl_multiview_history_depth_set_table', 'run_id_note': None, 'save_interval': 1000, 'image_aug': False, 'seed': 7, 'hf_token': '.hf_token', 'trackers': ['jsonl', 'wandb'], 'wandb_project': 'mshab_vla', 'wandb_entity': 'traysen879-uc-san-diego', 'global_pose': False, 'is_grasped': False, 'qpos': False, 'segmentation': False, '_wandb': {}}
+2025-08-20 10:26:42,934 INFO    MainThread:3727471 [wandb_init.py:init():871] starting backend
+2025-08-20 10:26:43,181 INFO    MainThread:3727471 [wandb_init.py:init():874] sending inform_init request
+2025-08-20 10:26:43,190 INFO    MainThread:3727471 [wandb_init.py:init():882] backend started and connected
+2025-08-20 10:26:43,201 INFO    MainThread:3727471 [wandb_init.py:init():953] updated telemetry
+2025-08-20 10:26:43,238 INFO    MainThread:3727471 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout
+2025-08-20 10:26:43,789 INFO    MainThread:3727471 [wandb_init.py:init():1029] starting run threads in backend
+2025-08-20 10:26:44,280 INFO    MainThread:3727471 [wandb_run.py:_console_start():2458] atexit reg
+2025-08-20 10:26:44,281 INFO    MainThread:3727471 [wandb_run.py:_redirect():2306] redirect: wrap_raw
+2025-08-20 10:26:44,281 INFO    MainThread:3727471 [wandb_run.py:_redirect():2375] Wrapping output streams.
+2025-08-20 10:26:44,282 INFO    MainThread:3727471 [wandb_run.py:_redirect():2398] Redirects installed.
+2025-08-20 10:26:44,290 INFO    MainThread:3727471 [wandb_init.py:init():1075] run started, returning control to user process
+2025-08-22 08:17:32,042 INFO    MainThread:3727471 [wandb_run.py:_finish():2224] finishing run traysen879-uc-san-diego/mshab_vla/5v6c0rms
+2025-08-22 08:17:32,043 INFO    MainThread:3727471 [wandb_run.py:_atexit_cleanup():2423] got exitcode: 0
+2025-08-22 08:17:32,044 INFO    MainThread:3727471 [wandb_run.py:_restore():2405] restore
+2025-08-22 08:17:32,044 INFO    MainThread:3727471 [wandb_run.py:_restore():2411] restore done
+2025-08-22 08:17:32,460 INFO    MainThread:3727471 [wandb_run.py:_footer_history_summary_info():3903] rendering history
+2025-08-22 08:17:32,462 INFO    MainThread:3727471 [wandb_run.py:_footer_history_summary_info():3935] rendering summary
+2025-08-22 08:17:32,463 INFO    MainThread:3727471 [wandb_run.py:_footer_sync_info():3864] logging synced files

wandb/run-20250820_090050-3cqyp9vg/files/config.yaml ADDED Viewed

	@@ -0,0 +1,204 @@

+_wandb:
+    value:
+        cli_version: 0.21.0
+        e:
+            beqmkna8ab9p0pvi5lhmmalckxlanj5v:
+                args:
+                    - --pretrained_checkpoint
+                    - Stanford-ILIAD/prism-qwen25-extra-dinosiglip-224px-0_5b
+                    - --model_type
+                    - my_vla_qwen
+                    - --vla.type
+                    - myvla-qwen-224px+mx-mshab
+                    - --vla.expected_world_size
+                    - "4"
+                    - --vla.global_batch_size
+                    - "512"
+                    - --vla.per_device_batch_size
+                    - "16"
+                    - --vla.learning_rate
+                    - "2e-5"
+                    - --vla.freeze_vision_backbone
+                    - "True"
+                    - --vla.freeze_llm_backbone
+                    - "False"
+                    - --vla.use_flow_matching
+                    - "False"
+                    - --vla.compress_history
+                    - "False"
+                    - --vla.epochs
+                    - "10"
+                    - --save_interval
+                    - "1000"
+                    - --run_id
+                    - bl_multiview_history_depth_set_table
+                codePath: vla-scripts/train.py
+                codePathLocal: vla-scripts/train.py
+                cpu_count: 240
+                cpu_count_logical: 240
+                cudaVersion: "12.4"
+                disk:
+                    /:
+                        total: "20812690710528"
+                        used: "36590596096"
+                email: traysen879@gmail.com
+                executable: /home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/bin/python3.10
+                git:
+                    commit: 409e4c9a165115624c271028e9b3ee335991b747
+                    remote: https://github.com/TRS07170/myvla.git
+                gpu: NVIDIA A100-SXM4-80GB
+                gpu_count: 8
+                gpu_nvidia:
+                    - architecture: Ampere
+                      cudaCores: 6912
+                      memoryTotal: "85899345920"
+                      name: NVIDIA A100-SXM4-80GB
+                      uuid: GPU-47bfdc91-9dec-cf54-0e0a-aa57ab6fb106
+                    - architecture: Ampere
+                      cudaCores: 6912
+                      memoryTotal: "85899345920"
+                      name: NVIDIA A100-SXM4-80GB
+                      uuid: GPU-55a7184b-b6dc-a8b3-67d5-a65679215c83
+                    - architecture: Ampere
+                      cudaCores: 6912
+                      memoryTotal: "85899345920"
+                      name: NVIDIA A100-SXM4-80GB
+                      uuid: GPU-1de758e0-e4a9-e2e9-027c-17f65db8a69e
+                    - architecture: Ampere
+                      cudaCores: 6912
+                      memoryTotal: "85899345920"
+                      name: NVIDIA A100-SXM4-80GB
+                      uuid: GPU-d7f94efd-7e10-156f-fe37-e505ae7b62b1
+                    - architecture: Ampere
+                      cudaCores: 6912
+                      memoryTotal: "85899345920"
+                      name: NVIDIA A100-SXM4-80GB
+                      uuid: GPU-813530b2-64f0-5fa3-3568-3811977d3b92
+                    - architecture: Ampere
+                      cudaCores: 6912
+                      memoryTotal: "85899345920"
+                      name: NVIDIA A100-SXM4-80GB
+                      uuid: GPU-7eac47dc-0da1-f6b2-d261-8ab3a5d4ed03
+                    - architecture: Ampere
+                      cudaCores: 6912
+                      memoryTotal: "85899345920"
+                      name: NVIDIA A100-SXM4-80GB
+                      uuid: GPU-335150e5-634c-68e2-4930-656c95e62244
+                    - architecture: Ampere
+                      cudaCores: 6912
+                      memoryTotal: "85899345920"
+                      name: NVIDIA A100-SXM4-80GB
+                      uuid: GPU-b3ee08d0-187c-8f80-06d5-c46759764c41
+                host: 164-152-109-69
+                memory:
+                    total: "1902324936704"
+                os: Linux-6.8.0-60-generic-x86_64-with-glibc2.35
+                program: /lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py
+                python: CPython 3.10.18
+                root: myvla_exp/bl_multiview_history_depth_set_table
+                startedAt: "2025-08-20T09:00:50.038649Z"
+                writerId: beqmkna8ab9p0pvi5lhmmalckxlanj5v
+        m: []
+        python_version: 3.10.18
+        t:
+            "1":
+                - 1
+                - 2
+                - 3
+                - 11
+                - 41
+                - 49
+                - 63
+                - 71
+            "2":
+                - 1
+                - 2
+                - 3
+                - 11
+                - 41
+                - 49
+                - 63
+                - 71
+            "3":
+                - 13
+                - 16
+            "4": 3.10.18
+            "5": 0.21.0
+            "6": 4.40.1
+            "12": 0.21.0
+            "13": linux-x86_64
+data_root_dir:
+    value: /home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/.new_maniskill_data
+global_pose:
+    value: false
+hf_token:
+    value: .hf_token
+image_aug:
+    value: false
+is_grasped:
+    value: false
+is_resume:
+    value: false
+model_type:
+    value: my_vla_qwen
+pretrained_checkpoint:
+    value: Stanford-ILIAD/prism-qwen25-extra-dinosiglip-224px-0_5b
+qpos:
+    value: false
+resume_epoch:
+    value: null
+resume_step:
+    value: null
+run_id:
+    value: bl_multiview_history_depth_set_table
+run_id_note:
+    value: null
+run_root_dir:
+    value: myvla_exp
+save_interval:
+    value: 1000
+seed:
+    value: 7
+segmentation:
+    value: false
+trackers:
+    value:
+        - jsonl
+        - wandb
+vla:
+    value:
+        action_chunk_size: 8
+        action_tokenizer: extra_action_tokenizer
+        base_vlm: prism-qwen25-extra-dinosiglip-224px+0_5b
+        compress_history: false
+        data_mix: bridge
+        enable_gradient_checkpointing: true
+        enable_mixed_precision_training: true
+        epochs: 10
+        expected_world_size: 4
+        freeze_llm_backbone: false
+        freeze_vision_backbone: true
+        global_batch_size: 512
+        image_sequence_len: 10
+        image_window_size: 4
+        learning_rate: 2e-05
+        lr_scheduler_type: constant
+        max_grad_norm: 1
+        max_steps: null
+        per_device_batch_size: 16
+        reduce_in_full_precision: true
+        save_every_n_steps: 25000
+        shuffle_buffer_size: 256000
+        train_strategy: fsdp-full-shard
+        type: myvla-qwen-224px+mx-mshab
+        unfreeze_last_llm_layer: false
+        use_depth_image: true
+        use_flow_matching: false
+        use_wrist_image: true
+        vla_id: myvla-qwen-224px+mx-mshab
+        warmup_ratio: 0
+        weight_decay: 0
+wandb_entity:
+    value: traysen879-uc-san-diego
+wandb_project:
+    value: mshab_vla

wandb/run-20250820_090050-3cqyp9vg/files/output.log ADDED Viewed

	@@ -0,0 +1,45 @@

+[2;36m08/20 [09:00:51][0m[2;36m [0m[34mINFO    [0m | >> [1m[[0m*[1m][0m Starting VLA Training Loop                                                        ]8;id=686782;file:///lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py\[2mtrain.py[0m]8;;\[2m:[0m]8;id=709047;file:///lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py#340\[2m340[0m]8;;\
+Traceback (most recent call last):
+  File "/lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py", line 360, in <module>
+    train()
+  File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/draccus/argparsing.py", line 203, in wrapper_inner
+    response = fn(cfg, *args, **kwargs)
+  File "/lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py", line 341, in train
+    train_strategy.run_vla_training(
+  File "/lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/prismatic/training/strategies/base_strategy.py", line 342, in run_vla_training
+    output, aux_loss = self.vlm(
+  File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
+    return self._call_impl(*args, **kwargs)
+  File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
+    return forward_call(*args, **kwargs)
+  File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", line 849, in forward
+    output = self._fsdp_wrapped_module(*args, **kwargs)
+  File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
+    return self._call_impl(*args, **kwargs)
+  File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
+    return forward_call(*args, **kwargs)
+  File "/lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/prismatic/models/vlas/myvla.py", line 262, in forward
+    output = super().forward(
+  File "/lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/prismatic/models/vlms/prismatic.py", line 497, in forward
+    output = self.llm_backbone(
+  File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
+    return self._call_impl(*args, **kwargs)
+  File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
+    return forward_call(*args, **kwargs)
+  File "/lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/prismatic/models/backbones/llm/base_llm.py", line 221, in forward
+    output: CausalLMOutputWithPast = self.llm(
+  File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
+    return self._call_impl(*args, **kwargs)
+  File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
+    return forward_call(*args, **kwargs)
+  File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/transformers/models/qwen2/modeling_qwen2.py", line 1196, in forward
+    loss = loss_fct(shift_logits, shift_labels)
+  File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1511, in _wrapped_call_impl
+    return self._call_impl(*args, **kwargs)
+  File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1520, in _call_impl
+    return forward_call(*args, **kwargs)
+  File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/torch/nn/modules/loss.py", line 1179, in forward
+    return F.cross_entropy(input, target, weight=self.weight,
+  File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/torch/nn/functional.py", line 3059, in cross_entropy
+    return torch._C._nn.cross_entropy_loss(input, target, weight, _Reduction.get_enum(reduction), ignore_index, label_smoothing)
+torch.cuda.OutOfMemoryError: CUDA out of memory. Tried to allocate 23.96 GiB. GPU 0 has a total capacity of 79.14 GiB of which 21.94 GiB is free. Including non-PyTorch memory, this process has 57.19 GiB memory in use. Of the allocated memory 53.35 GiB is allocated by PyTorch, and 1.49 GiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

wandb/run-20250820_090050-3cqyp9vg/files/requirements.txt ADDED Viewed

	@@ -0,0 +1,144 @@

+setuptools==78.1.1
+wheel==0.45.1
+pip==25.1
+sentencepiece==0.1.99
+mpmath==1.3.0
+libclang==18.1.1
+flatbuffers==25.2.10
+zipp==3.23.0
+wrapt==1.14.1
+urllib3==2.5.0
+typing_extensions==4.14.1
+typeguard==2.13.3
+tqdm==4.67.1
+toml==0.10.2
+termcolor==3.1.0
+tensorflow-io-gcs-filesystem==0.37.1
+tensorflow-estimator==2.15.0
+tensorboard-data-server==0.7.2
+sympy==1.14.0
+smmap==5.0.2
+six==1.17.0
+safetensors==0.5.3
+regex==2025.7.34
+PyYAML==6.0.2
+pyparsing==3.2.3
+Pygments==2.19.2
+pyasn1==0.6.1
+psutil==7.0.0
+protobuf==4.21.12
+platformdirs==4.3.8
+pillow==11.3.0
+packaging==25.0
+opt_einsum==3.4.0
+oauthlib==3.3.1
+nvidia-nvtx-cu12==12.1.105
+nvidia-nvjitlink-cu12==12.9.86
+nvidia-nccl-cu12==2.19.3
+nvidia-curand-cu12==10.3.2.106
+nvidia-cufft-cu12==11.0.2.54
+nvidia-cuda-runtime-cu12==12.1.105
+nvidia-cuda-nvrtc-cu12==12.1.105
+nvidia-cuda-cupti-cu12==12.1.105
+nvidia-cublas-cu12==12.1.3.1
+numpy==1.26.4
+networkx==3.4.2
+mypy_extensions==1.1.0
+mergedeep==1.3.4
+mdurl==0.1.2
+MarkupSafe==3.0.2
+Markdown==3.8.2
+kiwisolver==1.4.8
+keras==2.15.0
+importlib_resources==6.5.2
+idna==3.10
+hf-xet==1.1.5
+grpcio==1.74.0
+gast==0.6.0
+fsspec==2025.7.0
+fonttools==4.59.0
+filelock==3.18.0
+etils==1.13.0
+einops==0.8.1
+cycler==0.12.1
+click==8.2.1
+charset-normalizer==3.4.2
+certifi==2025.8.3
+cachetools==5.5.2
+attrs==25.3.0
+annotated-types==0.7.0
+absl-py==2.3.1
+Werkzeug==3.1.3
+typing-inspection==0.4.1
+typing-inspect==0.9.0
+triton==2.2.0
+trimesh==4.7.1
+tensorflow-metadata==1.17.2
+tensorflow-addons==0.23.0
+sentry-sdk==2.34.1
+scipy==1.15.3
+rsa==4.9.1
+requests==2.32.4
+pyyaml-include==1.4.1
+python-dateutil==2.9.0.post0
+pydantic_core==2.33.2
+pyasn1_modules==0.4.2
+promise==2.3
+OpenEXR==3.3.5
+nvidia-cusparse-cu12==12.1.0.106
+nvidia-cudnn-cu12==8.9.2.26
+ml-dtypes==0.2.0
+markdown-it-py==3.0.0
+jsonlines==4.0.0
+json-numpy==2.1.1
+Jinja2==3.1.6
+h5py==3.14.0
+google-pasta==0.2.0
+gitdb==4.0.12
+dm-tree==0.1.9
+contourpy==1.3.2
+astunparse==1.6.3
+rich==14.1.0
+requests-oauthlib==2.0.0
+pydantic==2.11.7
+nvidia-cusolver-cu12==11.4.5.107
+matplotlib==3.10.5
+huggingface-hub==0.34.3
+google-auth==2.40.3
+GitPython==3.1.45
+draccus==0.8.0
+wandb==0.21.0
+torch==2.2.0
+tokenizers==0.19.1
+google-auth-oauthlib==1.2.2
+array_record==0.7.2
+transformers==4.40.1
+torchvision==0.17.0
+torchaudio==2.2.0
+tensorboard==2.15.2
+accelerate==1.9.0
+timm==0.9.10
+tensorflow-datasets==4.9.3
+tensorflow==2.15.0
+peft==0.11.1
+tensorflow-graphics==2021.12.3
+dlimp==0.0.1
+openvla==0.0.3
+ninja==1.11.1.4
+flash-attn==2.5.5
+autocommand==2.2.2
+backports.tarfile==1.2.0
+importlib_metadata==8.0.0
+inflect==7.3.1
+jaraco.collections==5.1.0
+jaraco.context==5.3.0
+jaraco.functools==4.0.1
+jaraco.text==3.12.1
+more-itertools==10.3.0
+packaging==24.2
+platformdirs==4.2.2
+tomli==2.0.1
+typeguard==4.3.0
+typing_extensions==4.12.2
+wheel==0.45.1
+zipp==3.19.2

wandb/run-20250820_090050-3cqyp9vg/files/wandb-metadata.json ADDED Viewed

	@@ -0,0 +1,119 @@

+{
+  "os":  "Linux-6.8.0-60-generic-x86_64-with-glibc2.35",
+  "python":  "CPython 3.10.18",
+  "startedAt":  "2025-08-20T09:00:50.038649Z",
+  "args":  [
+    "--pretrained_checkpoint",
+    "Stanford-ILIAD/prism-qwen25-extra-dinosiglip-224px-0_5b",
+    "--model_type",
+    "my_vla_qwen",
+    "--vla.type",
+    "myvla-qwen-224px+mx-mshab",
+    "--vla.expected_world_size",
+    "4",
+    "--vla.global_batch_size",
+    "512",
+    "--vla.per_device_batch_size",
+    "16",
+    "--vla.learning_rate",
+    "2e-5",
+    "--vla.freeze_vision_backbone",
+    "True",
+    "--vla.freeze_llm_backbone",
+    "False",
+    "--vla.use_flow_matching",
+    "False",
+    "--vla.compress_history",
+    "False",
+    "--vla.epochs",
+    "10",
+    "--save_interval",
+    "1000",
+    "--run_id",
+    "bl_multiview_history_depth_set_table"
+  ],
+  "program":  "/lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py",
+  "codePath":  "vla-scripts/train.py",
+  "codePathLocal":  "vla-scripts/train.py",
+  "git":  {
+    "remote":  "https://github.com/TRS07170/myvla.git",
+    "commit":  "409e4c9a165115624c271028e9b3ee335991b747"
+  },
+  "email":  "traysen879@gmail.com",
+  "root":  "myvla_exp/bl_multiview_history_depth_set_table",
+  "host":  "164-152-109-69",
+  "executable":  "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/bin/python3.10",
+  "cpu_count":  240,
+  "cpu_count_logical":  240,
+  "gpu":  "NVIDIA A100-SXM4-80GB",
+  "gpu_count":  8,
+  "disk":  {
+    "/":  {
+      "total":  "20812690710528",
+      "used":  "36590596096"
+    }
+  },
+  "memory":  {
+    "total":  "1902324936704"
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA A100-SXM4-80GB",
+      "memoryTotal":  "85899345920",
+      "cudaCores":  6912,
+      "architecture":  "Ampere",
+      "uuid":  "GPU-47bfdc91-9dec-cf54-0e0a-aa57ab6fb106"
+    },
+    {
+      "name":  "NVIDIA A100-SXM4-80GB",
+      "memoryTotal":  "85899345920",
+      "cudaCores":  6912,
+      "architecture":  "Ampere",
+      "uuid":  "GPU-55a7184b-b6dc-a8b3-67d5-a65679215c83"
+    },
+    {
+      "name":  "NVIDIA A100-SXM4-80GB",
+      "memoryTotal":  "85899345920",
+      "cudaCores":  6912,
+      "architecture":  "Ampere",
+      "uuid":  "GPU-1de758e0-e4a9-e2e9-027c-17f65db8a69e"
+    },
+    {
+      "name":  "NVIDIA A100-SXM4-80GB",
+      "memoryTotal":  "85899345920",
+      "cudaCores":  6912,
+      "architecture":  "Ampere",
+      "uuid":  "GPU-d7f94efd-7e10-156f-fe37-e505ae7b62b1"
+    },
+    {
+      "name":  "NVIDIA A100-SXM4-80GB",
+      "memoryTotal":  "85899345920",
+      "cudaCores":  6912,
+      "architecture":  "Ampere",
+      "uuid":  "GPU-813530b2-64f0-5fa3-3568-3811977d3b92"
+    },
+    {
+      "name":  "NVIDIA A100-SXM4-80GB",
+      "memoryTotal":  "85899345920",
+      "cudaCores":  6912,
+      "architecture":  "Ampere",
+      "uuid":  "GPU-7eac47dc-0da1-f6b2-d261-8ab3a5d4ed03"
+    },
+    {
+      "name":  "NVIDIA A100-SXM4-80GB",
+      "memoryTotal":  "85899345920",
+      "cudaCores":  6912,
+      "architecture":  "Ampere",
+      "uuid":  "GPU-335150e5-634c-68e2-4930-656c95e62244"
+    },
+    {
+      "name":  "NVIDIA A100-SXM4-80GB",
+      "memoryTotal":  "85899345920",
+      "cudaCores":  6912,
+      "architecture":  "Ampere",
+      "uuid":  "GPU-b3ee08d0-187c-8f80-06d5-c46759764c41"
+    }
+  ],
+  "cudaVersion":  "12.4",
+  "writerId":  "beqmkna8ab9p0pvi5lhmmalckxlanj5v"
+}

wandb/run-20250820_090050-3cqyp9vg/files/wandb-summary.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"_wandb":{"runtime":5},"_runtime":5}

wandb/run-20250820_090050-3cqyp9vg/logs/debug-core.log ADDED Viewed

	@@ -0,0 +1,14 @@

+{"time":"2025-08-20T09:00:50.318936347Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmp_xbsp_gd/port-3716149.txt","pid":3716149,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
+{"time":"2025-08-20T09:00:50.320251545Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-3716149-3717420-2439034435/socket","Net":"unix"}}
+{"time":"2025-08-20T09:00:50.320360967Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":3716149}
+{"time":"2025-08-20T09:00:50.339976746Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
+{"time":"2025-08-20T09:00:50.355069096Z","level":"INFO","msg":"handleInformInit: received","streamId":"3cqyp9vg","id":"1(@)"}
+{"time":"2025-08-20T09:00:50.652012124Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"3cqyp9vg","id":"1(@)"}
+{"time":"2025-08-20T09:00:56.549737027Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
+{"time":"2025-08-20T09:00:56.549990573Z","level":"INFO","msg":"server is shutting down"}
+{"time":"2025-08-20T09:00:56.549964273Z","level":"INFO","msg":"connection: closing","id":"1(@)"}
+{"time":"2025-08-20T09:00:56.550106895Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-3716149-3717420-2439034435/socket","Net":"unix"}}
+{"time":"2025-08-20T09:00:56.550226089Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
+{"time":"2025-08-20T09:00:57.066086131Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
+{"time":"2025-08-20T09:00:57.066135043Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
+{"time":"2025-08-20T09:00:57.066147462Z","level":"INFO","msg":"server is closed"}

wandb/run-20250820_090050-3cqyp9vg/logs/debug-internal.log ADDED Viewed

	@@ -0,0 +1,12 @@

+{"time":"2025-08-20T09:00:50.35935463Z","level":"INFO","msg":"stream: starting","core version":"0.21.0"}
+{"time":"2025-08-20T09:00:50.651655947Z","level":"INFO","msg":"stream: created new stream","id":"3cqyp9vg"}
+{"time":"2025-08-20T09:00:50.651899322Z","level":"INFO","msg":"stream: started","id":"3cqyp9vg"}
+{"time":"2025-08-20T09:00:50.651968385Z","level":"INFO","msg":"handler: started","stream_id":"3cqyp9vg"}
+{"time":"2025-08-20T09:00:50.651942224Z","level":"INFO","msg":"writer: Do: started","stream_id":"3cqyp9vg"}
+{"time":"2025-08-20T09:00:50.652025325Z","level":"INFO","msg":"sender: started","stream_id":"3cqyp9vg"}
+{"time":"2025-08-20T09:00:56.550049694Z","level":"INFO","msg":"stream: closing","id":"3cqyp9vg"}
+{"time":"2025-08-20T09:00:56.863554095Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2025-08-20T09:00:57.058314771Z","level":"INFO","msg":"handler: closed","stream_id":"3cqyp9vg"}
+{"time":"2025-08-20T09:00:57.058449844Z","level":"INFO","msg":"writer: Close: closed","stream_id":"3cqyp9vg"}
+{"time":"2025-08-20T09:00:57.058507535Z","level":"INFO","msg":"sender: closed","stream_id":"3cqyp9vg"}
+{"time":"2025-08-20T09:00:57.063852083Z","level":"INFO","msg":"stream: closed","id":"3cqyp9vg"}

wandb/run-20250820_090050-3cqyp9vg/logs/debug.log ADDED Viewed

	@@ -0,0 +1,22 @@

+2025-08-20 09:00:50,078 INFO    MainThread:3716149 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0
+2025-08-20 09:00:50,079 INFO    MainThread:3716149 [wandb_setup.py:_flush():80] Configure stats pid to 3716149
+2025-08-20 09:00:50,079 INFO    MainThread:3716149 [wandb_setup.py:_flush():80] Loading settings from /home/ubuntu/.config/wandb/settings
+2025-08-20 09:00:50,081 INFO    MainThread:3716149 [wandb_setup.py:_flush():80] Loading settings from /lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/wandb/settings
+2025-08-20 09:00:50,081 INFO    MainThread:3716149 [wandb_setup.py:_flush():80] Loading settings from environment variables
+2025-08-20 09:00:50,084 INFO    MainThread:3716149 [wandb_init.py:setup_run_log_directory():703] Logging user logs to myvla_exp/bl_multiview_history_depth_set_table/wandb/run-20250820_090050-3cqyp9vg/logs/debug.log
+2025-08-20 09:00:50,086 INFO    MainThread:3716149 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to myvla_exp/bl_multiview_history_depth_set_table/wandb/run-20250820_090050-3cqyp9vg/logs/debug-internal.log
+2025-08-20 09:00:50,088 INFO    MainThread:3716149 [wandb_init.py:init():830] calling init triggers
+2025-08-20 09:00:50,090 INFO    MainThread:3716149 [wandb_init.py:init():835] wandb.init called with sweep_config: {}
+config: {'vla': {'type': 'myvla-qwen-224px+mx-mshab', 'vla_id': 'myvla-qwen-224px+mx-mshab', 'base_vlm': 'prism-qwen25-extra-dinosiglip-224px+0_5b', 'freeze_vision_backbone': True, 'freeze_llm_backbone': False, 'unfreeze_last_llm_layer': False, 'data_mix': 'bridge', 'shuffle_buffer_size': 256000, 'epochs': 10, 'max_steps': None, 'save_every_n_steps': 25000, 'expected_world_size': 4, 'global_batch_size': 512, 'per_device_batch_size': 16, 'learning_rate': 2e-05, 'weight_decay': 0.0, 'max_grad_norm': 1.0, 'lr_scheduler_type': 'constant', 'warmup_ratio': 0.0, 'train_strategy': 'fsdp-full-shard', 'action_tokenizer': 'extra_action_tokenizer', 'image_sequence_len': 10, 'use_wrist_image': True, 'use_depth_image': True, 'compress_history': False, 'use_flow_matching': False, 'action_chunk_size': 8, 'enable_gradient_checkpointing': True, 'enable_mixed_precision_training': True, 'reduce_in_full_precision': True, 'image_window_size': 4}, 'model_type': 'my_vla_qwen', 'data_root_dir': '/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/.new_maniskill_data', 'run_root_dir': 'myvla_exp', 'pretrained_checkpoint': 'Stanford-ILIAD/prism-qwen25-extra-dinosiglip-224px-0_5b', 'is_resume': False, 'resume_step': None, 'resume_epoch': None, 'run_id': 'bl_multiview_history_depth_set_table', 'run_id_note': None, 'save_interval': 1000, 'image_aug': False, 'seed': 7, 'hf_token': '.hf_token', 'trackers': ['jsonl', 'wandb'], 'wandb_project': 'mshab_vla', 'wandb_entity': 'traysen879-uc-san-diego', 'global_pose': False, 'is_grasped': False, 'qpos': False, 'segmentation': False, '_wandb': {}}
+2025-08-20 09:00:50,092 INFO    MainThread:3716149 [wandb_init.py:init():871] starting backend
+2025-08-20 09:00:50,340 INFO    MainThread:3716149 [wandb_init.py:init():874] sending inform_init request
+2025-08-20 09:00:50,349 INFO    MainThread:3716149 [wandb_init.py:init():882] backend started and connected
+2025-08-20 09:00:50,355 INFO    MainThread:3716149 [wandb_init.py:init():953] updated telemetry
+2025-08-20 09:00:50,391 INFO    MainThread:3716149 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout
+2025-08-20 09:00:50,848 INFO    MainThread:3716149 [wandb_init.py:init():1029] starting run threads in backend
+2025-08-20 09:00:51,348 INFO    MainThread:3716149 [wandb_run.py:_console_start():2458] atexit reg
+2025-08-20 09:00:51,354 INFO    MainThread:3716149 [wandb_run.py:_redirect():2306] redirect: wrap_raw
+2025-08-20 09:00:51,355 INFO    MainThread:3716149 [wandb_run.py:_redirect():2375] Wrapping output streams.
+2025-08-20 09:00:51,355 INFO    MainThread:3716149 [wandb_run.py:_redirect():2398] Redirects installed.
+2025-08-20 09:00:51,365 INFO    MainThread:3716149 [wandb_init.py:init():1075] run started, returning control to user process
+2025-08-20 09:00:56,545 INFO    MsgRouterThr:3716149 [mailbox.py:close():129] [no run ID] Closing mailbox, abandoning 1 handles.

wandb/run-20250820_090050-3cqyp9vg/run-3cqyp9vg.wandb ADDED Viewed

Binary file (12 kB). View file

wandb/run-20250820_091147-qt1b7wpr/files/config.yaml ADDED Viewed

	@@ -0,0 +1,204 @@

+_wandb:
+    value:
+        cli_version: 0.21.0
+        e:
+            ns7xudltxb04d0a18uyfc7iv6bgtzle9:
+                args:
+                    - --pretrained_checkpoint
+                    - Stanford-ILIAD/prism-qwen25-extra-dinosiglip-224px-0_5b
+                    - --model_type
+                    - my_vla_qwen
+                    - --vla.type
+                    - myvla-qwen-224px+mx-mshab
+                    - --vla.expected_world_size
+                    - "4"
+                    - --vla.global_batch_size
+                    - "512"
+                    - --vla.per_device_batch_size
+                    - "8"
+                    - --vla.learning_rate
+                    - "2e-5"
+                    - --vla.freeze_vision_backbone
+                    - "True"
+                    - --vla.freeze_llm_backbone
+                    - "False"
+                    - --vla.use_flow_matching
+                    - "False"
+                    - --vla.compress_history
+                    - "False"
+                    - --vla.epochs
+                    - "10"
+                    - --save_interval
+                    - "1000"
+                    - --run_id
+                    - bl_multiview_history_depth_set_table
+                codePath: vla-scripts/train.py
+                codePathLocal: vla-scripts/train.py
+                cpu_count: 240
+                cpu_count_logical: 240
+                cudaVersion: "12.4"
+                disk:
+                    /:
+                        total: "20812690710528"
+                        used: "36590649344"
+                email: traysen879@gmail.com
+                executable: /home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/bin/python3.10
+                git:
+                    commit: 409e4c9a165115624c271028e9b3ee335991b747
+                    remote: https://github.com/TRS07170/myvla.git
+                gpu: NVIDIA A100-SXM4-80GB
+                gpu_count: 8
+                gpu_nvidia:
+                    - architecture: Ampere
+                      cudaCores: 6912
+                      memoryTotal: "85899345920"
+                      name: NVIDIA A100-SXM4-80GB
+                      uuid: GPU-47bfdc91-9dec-cf54-0e0a-aa57ab6fb106
+                    - architecture: Ampere
+                      cudaCores: 6912
+                      memoryTotal: "85899345920"
+                      name: NVIDIA A100-SXM4-80GB
+                      uuid: GPU-55a7184b-b6dc-a8b3-67d5-a65679215c83
+                    - architecture: Ampere
+                      cudaCores: 6912
+                      memoryTotal: "85899345920"
+                      name: NVIDIA A100-SXM4-80GB
+                      uuid: GPU-1de758e0-e4a9-e2e9-027c-17f65db8a69e
+                    - architecture: Ampere
+                      cudaCores: 6912
+                      memoryTotal: "85899345920"
+                      name: NVIDIA A100-SXM4-80GB
+                      uuid: GPU-d7f94efd-7e10-156f-fe37-e505ae7b62b1
+                    - architecture: Ampere
+                      cudaCores: 6912
+                      memoryTotal: "85899345920"
+                      name: NVIDIA A100-SXM4-80GB
+                      uuid: GPU-813530b2-64f0-5fa3-3568-3811977d3b92
+                    - architecture: Ampere
+                      cudaCores: 6912
+                      memoryTotal: "85899345920"
+                      name: NVIDIA A100-SXM4-80GB
+                      uuid: GPU-7eac47dc-0da1-f6b2-d261-8ab3a5d4ed03
+                    - architecture: Ampere
+                      cudaCores: 6912
+                      memoryTotal: "85899345920"
+                      name: NVIDIA A100-SXM4-80GB
+                      uuid: GPU-335150e5-634c-68e2-4930-656c95e62244
+                    - architecture: Ampere
+                      cudaCores: 6912
+                      memoryTotal: "85899345920"
+                      name: NVIDIA A100-SXM4-80GB
+                      uuid: GPU-b3ee08d0-187c-8f80-06d5-c46759764c41
+                host: 164-152-109-69
+                memory:
+                    total: "1902324936704"
+                os: Linux-6.8.0-60-generic-x86_64-with-glibc2.35
+                program: /lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py
+                python: CPython 3.10.18
+                root: myvla_exp/bl_multiview_history_depth_set_table
+                startedAt: "2025-08-20T09:11:47.878963Z"
+                writerId: ns7xudltxb04d0a18uyfc7iv6bgtzle9
+        m: []
+        python_version: 3.10.18
+        t:
+            "1":
+                - 1
+                - 2
+                - 3
+                - 11
+                - 41
+                - 49
+                - 63
+                - 71
+            "2":
+                - 1
+                - 2
+                - 3
+                - 11
+                - 41
+                - 49
+                - 63
+                - 71
+            "3":
+                - 13
+                - 16
+            "4": 3.10.18
+            "5": 0.21.0
+            "6": 4.40.1
+            "12": 0.21.0
+            "13": linux-x86_64
+data_root_dir:
+    value: /home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/.new_maniskill_data
+global_pose:
+    value: false
+hf_token:
+    value: .hf_token
+image_aug:
+    value: false
+is_grasped:
+    value: false
+is_resume:
+    value: false
+model_type:
+    value: my_vla_qwen
+pretrained_checkpoint:
+    value: Stanford-ILIAD/prism-qwen25-extra-dinosiglip-224px-0_5b
+qpos:
+    value: false
+resume_epoch:
+    value: null
+resume_step:
+    value: null
+run_id:
+    value: bl_multiview_history_depth_set_table
+run_id_note:
+    value: null
+run_root_dir:
+    value: myvla_exp
+save_interval:
+    value: 1000
+seed:
+    value: 7
+segmentation:
+    value: false
+trackers:
+    value:
+        - jsonl
+        - wandb
+vla:
+    value:
+        action_chunk_size: 8
+        action_tokenizer: extra_action_tokenizer
+        base_vlm: prism-qwen25-extra-dinosiglip-224px+0_5b
+        compress_history: false
+        data_mix: bridge
+        enable_gradient_checkpointing: true
+        enable_mixed_precision_training: true
+        epochs: 10
+        expected_world_size: 4
+        freeze_llm_backbone: false
+        freeze_vision_backbone: true
+        global_batch_size: 512
+        image_sequence_len: 10
+        image_window_size: 4
+        learning_rate: 2e-05
+        lr_scheduler_type: constant
+        max_grad_norm: 1
+        max_steps: null
+        per_device_batch_size: 8
+        reduce_in_full_precision: true
+        save_every_n_steps: 25000
+        shuffle_buffer_size: 256000
+        train_strategy: fsdp-full-shard
+        type: myvla-qwen-224px+mx-mshab
+        unfreeze_last_llm_layer: false
+        use_depth_image: true
+        use_flow_matching: false
+        use_wrist_image: true
+        vla_id: myvla-qwen-224px+mx-mshab
+        warmup_ratio: 0
+        weight_decay: 0
+wandb_entity:
+    value: traysen879-uc-san-diego
+wandb_project:
+    value: mshab_vla

wandb/run-20250820_091147-qt1b7wpr/files/output.log ADDED Viewed

	@@ -0,0 +1,11 @@

+[2;36m08/20 [09:11:49][0m[2;36m [0m[34mINFO    [0m | >> [1m[[0m*[1m][0m Starting VLA Training Loop                                                        ]8;id=686782;file:///lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py\[2mtrain.py[0m]8;;\[2m:[0m]8;id=709047;file:///lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py#340\[2m340[0m]8;;\
+Traceback (most recent call last):
+  File "/lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py", line 360, in <module>
+    train()
+  File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/draccus/argparsing.py", line 203, in wrapper_inner
+    response = fn(cfg, *args, **kwargs)
+  File "/lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py", line 341, in train
+    train_strategy.run_vla_training(
+  File "/lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/prismatic/training/strategies/base_strategy.py", line 367, in run_vla_training
+    normalized_loss = (loss + aux_loss) / self.grad_accumulation_steps
+TypeError: unsupported operand type(s) for +: 'Tensor' and 'NoneType'

wandb/run-20250820_091147-qt1b7wpr/files/requirements.txt ADDED Viewed

	@@ -0,0 +1,144 @@

+setuptools==78.1.1
+wheel==0.45.1
+pip==25.1
+sentencepiece==0.1.99
+mpmath==1.3.0
+libclang==18.1.1
+flatbuffers==25.2.10
+zipp==3.23.0
+wrapt==1.14.1
+urllib3==2.5.0
+typing_extensions==4.14.1
+typeguard==2.13.3
+tqdm==4.67.1
+toml==0.10.2
+termcolor==3.1.0
+tensorflow-io-gcs-filesystem==0.37.1
+tensorflow-estimator==2.15.0
+tensorboard-data-server==0.7.2
+sympy==1.14.0
+smmap==5.0.2
+six==1.17.0
+safetensors==0.5.3
+regex==2025.7.34
+PyYAML==6.0.2
+pyparsing==3.2.3
+Pygments==2.19.2
+pyasn1==0.6.1
+psutil==7.0.0
+protobuf==4.21.12
+platformdirs==4.3.8
+pillow==11.3.0
+packaging==25.0
+opt_einsum==3.4.0
+oauthlib==3.3.1
+nvidia-nvtx-cu12==12.1.105
+nvidia-nvjitlink-cu12==12.9.86
+nvidia-nccl-cu12==2.19.3
+nvidia-curand-cu12==10.3.2.106
+nvidia-cufft-cu12==11.0.2.54
+nvidia-cuda-runtime-cu12==12.1.105
+nvidia-cuda-nvrtc-cu12==12.1.105
+nvidia-cuda-cupti-cu12==12.1.105
+nvidia-cublas-cu12==12.1.3.1
+numpy==1.26.4
+networkx==3.4.2
+mypy_extensions==1.1.0
+mergedeep==1.3.4
+mdurl==0.1.2
+MarkupSafe==3.0.2
+Markdown==3.8.2
+kiwisolver==1.4.8
+keras==2.15.0
+importlib_resources==6.5.2
+idna==3.10
+hf-xet==1.1.5
+grpcio==1.74.0
+gast==0.6.0
+fsspec==2025.7.0
+fonttools==4.59.0
+filelock==3.18.0
+etils==1.13.0
+einops==0.8.1
+cycler==0.12.1
+click==8.2.1
+charset-normalizer==3.4.2
+certifi==2025.8.3
+cachetools==5.5.2
+attrs==25.3.0
+annotated-types==0.7.0
+absl-py==2.3.1
+Werkzeug==3.1.3
+typing-inspection==0.4.1
+typing-inspect==0.9.0
+triton==2.2.0
+trimesh==4.7.1
+tensorflow-metadata==1.17.2
+tensorflow-addons==0.23.0
+sentry-sdk==2.34.1
+scipy==1.15.3
+rsa==4.9.1
+requests==2.32.4
+pyyaml-include==1.4.1
+python-dateutil==2.9.0.post0
+pydantic_core==2.33.2
+pyasn1_modules==0.4.2
+promise==2.3
+OpenEXR==3.3.5
+nvidia-cusparse-cu12==12.1.0.106
+nvidia-cudnn-cu12==8.9.2.26
+ml-dtypes==0.2.0
+markdown-it-py==3.0.0
+jsonlines==4.0.0
+json-numpy==2.1.1
+Jinja2==3.1.6
+h5py==3.14.0
+google-pasta==0.2.0
+gitdb==4.0.12
+dm-tree==0.1.9
+contourpy==1.3.2
+astunparse==1.6.3
+rich==14.1.0
+requests-oauthlib==2.0.0
+pydantic==2.11.7
+nvidia-cusolver-cu12==11.4.5.107
+matplotlib==3.10.5
+huggingface-hub==0.34.3
+google-auth==2.40.3
+GitPython==3.1.45
+draccus==0.8.0
+wandb==0.21.0
+torch==2.2.0
+tokenizers==0.19.1
+google-auth-oauthlib==1.2.2
+array_record==0.7.2
+transformers==4.40.1
+torchvision==0.17.0
+torchaudio==2.2.0
+tensorboard==2.15.2
+accelerate==1.9.0
+timm==0.9.10
+tensorflow-datasets==4.9.3
+tensorflow==2.15.0
+peft==0.11.1
+tensorflow-graphics==2021.12.3
+dlimp==0.0.1
+openvla==0.0.3
+ninja==1.11.1.4
+flash-attn==2.5.5
+autocommand==2.2.2
+backports.tarfile==1.2.0
+importlib_metadata==8.0.0
+inflect==7.3.1
+jaraco.collections==5.1.0
+jaraco.context==5.3.0
+jaraco.functools==4.0.1
+jaraco.text==3.12.1
+more-itertools==10.3.0
+packaging==24.2
+platformdirs==4.2.2
+tomli==2.0.1
+typeguard==4.3.0
+typing_extensions==4.12.2
+wheel==0.45.1
+zipp==3.19.2

wandb/run-20250820_091147-qt1b7wpr/files/wandb-metadata.json ADDED Viewed

	@@ -0,0 +1,119 @@

+{
+  "os":  "Linux-6.8.0-60-generic-x86_64-with-glibc2.35",
+  "python":  "CPython 3.10.18",
+  "startedAt":  "2025-08-20T09:11:47.878963Z",
+  "args":  [
+    "--pretrained_checkpoint",
+    "Stanford-ILIAD/prism-qwen25-extra-dinosiglip-224px-0_5b",
+    "--model_type",
+    "my_vla_qwen",
+    "--vla.type",
+    "myvla-qwen-224px+mx-mshab",
+    "--vla.expected_world_size",
+    "4",
+    "--vla.global_batch_size",
+    "512",
+    "--vla.per_device_batch_size",
+    "8",
+    "--vla.learning_rate",
+    "2e-5",
+    "--vla.freeze_vision_backbone",
+    "True",
+    "--vla.freeze_llm_backbone",
+    "False",
+    "--vla.use_flow_matching",
+    "False",
+    "--vla.compress_history",
+    "False",
+    "--vla.epochs",
+    "10",
+    "--save_interval",
+    "1000",
+    "--run_id",
+    "bl_multiview_history_depth_set_table"
+  ],
+  "program":  "/lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py",
+  "codePath":  "vla-scripts/train.py",
+  "codePathLocal":  "vla-scripts/train.py",
+  "git":  {
+    "remote":  "https://github.com/TRS07170/myvla.git",
+    "commit":  "409e4c9a165115624c271028e9b3ee335991b747"
+  },
+  "email":  "traysen879@gmail.com",
+  "root":  "myvla_exp/bl_multiview_history_depth_set_table",
+  "host":  "164-152-109-69",
+  "executable":  "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/bin/python3.10",
+  "cpu_count":  240,
+  "cpu_count_logical":  240,
+  "gpu":  "NVIDIA A100-SXM4-80GB",
+  "gpu_count":  8,
+  "disk":  {
+    "/":  {
+      "total":  "20812690710528",
+      "used":  "36590649344"
+    }
+  },
+  "memory":  {
+    "total":  "1902324936704"
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA A100-SXM4-80GB",
+      "memoryTotal":  "85899345920",
+      "cudaCores":  6912,
+      "architecture":  "Ampere",
+      "uuid":  "GPU-47bfdc91-9dec-cf54-0e0a-aa57ab6fb106"
+    },
+    {
+      "name":  "NVIDIA A100-SXM4-80GB",
+      "memoryTotal":  "85899345920",
+      "cudaCores":  6912,
+      "architecture":  "Ampere",
+      "uuid":  "GPU-55a7184b-b6dc-a8b3-67d5-a65679215c83"
+    },
+    {
+      "name":  "NVIDIA A100-SXM4-80GB",
+      "memoryTotal":  "85899345920",
+      "cudaCores":  6912,
+      "architecture":  "Ampere",
+      "uuid":  "GPU-1de758e0-e4a9-e2e9-027c-17f65db8a69e"
+    },
+    {
+      "name":  "NVIDIA A100-SXM4-80GB",
+      "memoryTotal":  "85899345920",
+      "cudaCores":  6912,
+      "architecture":  "Ampere",
+      "uuid":  "GPU-d7f94efd-7e10-156f-fe37-e505ae7b62b1"
+    },
+    {
+      "name":  "NVIDIA A100-SXM4-80GB",
+      "memoryTotal":  "85899345920",
+      "cudaCores":  6912,
+      "architecture":  "Ampere",
+      "uuid":  "GPU-813530b2-64f0-5fa3-3568-3811977d3b92"
+    },
+    {
+      "name":  "NVIDIA A100-SXM4-80GB",
+      "memoryTotal":  "85899345920",
+      "cudaCores":  6912,
+      "architecture":  "Ampere",
+      "uuid":  "GPU-7eac47dc-0da1-f6b2-d261-8ab3a5d4ed03"
+    },
+    {
+      "name":  "NVIDIA A100-SXM4-80GB",
+      "memoryTotal":  "85899345920",
+      "cudaCores":  6912,
+      "architecture":  "Ampere",
+      "uuid":  "GPU-335150e5-634c-68e2-4930-656c95e62244"
+    },
+    {
+      "name":  "NVIDIA A100-SXM4-80GB",
+      "memoryTotal":  "85899345920",
+      "cudaCores":  6912,
+      "architecture":  "Ampere",
+      "uuid":  "GPU-b3ee08d0-187c-8f80-06d5-c46759764c41"
+    }
+  ],
+  "cudaVersion":  "12.4",
+  "writerId":  "ns7xudltxb04d0a18uyfc7iv6bgtzle9"
+}

wandb/run-20250820_091147-qt1b7wpr/files/wandb-summary.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"_wandb":{"runtime":17},"_runtime":17}

wandb/run-20250820_091147-qt1b7wpr/logs/debug-core.log ADDED Viewed

	@@ -0,0 +1,14 @@

+{"time":"2025-08-20T09:11:48.070858415Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpkvexv_g_/port-3718336.txt","pid":3718336,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
+{"time":"2025-08-20T09:11:48.072184654Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":3718336}
+{"time":"2025-08-20T09:11:48.072124353Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-3718336-3718637-3969663796/socket","Net":"unix"}}
+{"time":"2025-08-20T09:11:48.174906593Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
+{"time":"2025-08-20T09:11:48.19028357Z","level":"INFO","msg":"handleInformInit: received","streamId":"qt1b7wpr","id":"1(@)"}
+{"time":"2025-08-20T09:11:48.483840285Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"qt1b7wpr","id":"1(@)"}
+{"time":"2025-08-20T09:12:06.693009732Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
+{"time":"2025-08-20T09:12:06.69337768Z","level":"INFO","msg":"server is shutting down"}
+{"time":"2025-08-20T09:12:06.693489192Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-3718336-3718637-3969663796/socket","Net":"unix"}}
+{"time":"2025-08-20T09:12:06.693662267Z","level":"INFO","msg":"connection: closing","id":"1(@)"}
+{"time":"2025-08-20T09:12:06.693698588Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
+{"time":"2025-08-20T09:12:07.063511858Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
+{"time":"2025-08-20T09:12:07.063548929Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
+{"time":"2025-08-20T09:12:07.063559649Z","level":"INFO","msg":"server is closed"}

wandb/run-20250820_091147-qt1b7wpr/logs/debug-internal.log ADDED Viewed

	@@ -0,0 +1,12 @@

+{"time":"2025-08-20T09:11:48.19393641Z","level":"INFO","msg":"stream: starting","core version":"0.21.0"}
+{"time":"2025-08-20T09:11:48.483544178Z","level":"INFO","msg":"stream: created new stream","id":"qt1b7wpr"}
+{"time":"2025-08-20T09:11:48.483667061Z","level":"INFO","msg":"stream: started","id":"qt1b7wpr"}
+{"time":"2025-08-20T09:11:48.483708542Z","level":"INFO","msg":"writer: Do: started","stream_id":"qt1b7wpr"}
+{"time":"2025-08-20T09:11:48.483745883Z","level":"INFO","msg":"sender: started","stream_id":"qt1b7wpr"}
+{"time":"2025-08-20T09:11:48.483736392Z","level":"INFO","msg":"handler: started","stream_id":"qt1b7wpr"}
+{"time":"2025-08-20T09:12:06.693274498Z","level":"INFO","msg":"stream: closing","id":"qt1b7wpr"}
+{"time":"2025-08-20T09:12:06.954966462Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2025-08-20T09:12:07.056140267Z","level":"INFO","msg":"handler: closed","stream_id":"qt1b7wpr"}
+{"time":"2025-08-20T09:12:07.056225088Z","level":"INFO","msg":"writer: Close: closed","stream_id":"qt1b7wpr"}
+{"time":"2025-08-20T09:12:07.056243458Z","level":"INFO","msg":"sender: closed","stream_id":"qt1b7wpr"}
+{"time":"2025-08-20T09:12:07.061147966Z","level":"INFO","msg":"stream: closed","id":"qt1b7wpr"}

wandb/run-20250820_091147-qt1b7wpr/logs/debug.log ADDED Viewed

	@@ -0,0 +1,22 @@

+2025-08-20 09:11:47,921 INFO    MainThread:3718336 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0
+2025-08-20 09:11:47,921 INFO    MainThread:3718336 [wandb_setup.py:_flush():80] Configure stats pid to 3718336
+2025-08-20 09:11:47,923 INFO    MainThread:3718336 [wandb_setup.py:_flush():80] Loading settings from /home/ubuntu/.config/wandb/settings
+2025-08-20 09:11:47,923 INFO    MainThread:3718336 [wandb_setup.py:_flush():80] Loading settings from /lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/wandb/settings
+2025-08-20 09:11:47,926 INFO    MainThread:3718336 [wandb_setup.py:_flush():80] Loading settings from environment variables
+2025-08-20 09:11:47,926 INFO    MainThread:3718336 [wandb_init.py:setup_run_log_directory():703] Logging user logs to myvla_exp/bl_multiview_history_depth_set_table/wandb/run-20250820_091147-qt1b7wpr/logs/debug.log
+2025-08-20 09:11:47,928 INFO    MainThread:3718336 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to myvla_exp/bl_multiview_history_depth_set_table/wandb/run-20250820_091147-qt1b7wpr/logs/debug-internal.log
+2025-08-20 09:11:47,930 INFO    MainThread:3718336 [wandb_init.py:init():830] calling init triggers
+2025-08-20 09:11:47,932 INFO    MainThread:3718336 [wandb_init.py:init():835] wandb.init called with sweep_config: {}
+config: {'vla': {'type': 'myvla-qwen-224px+mx-mshab', 'vla_id': 'myvla-qwen-224px+mx-mshab', 'base_vlm': 'prism-qwen25-extra-dinosiglip-224px+0_5b', 'freeze_vision_backbone': True, 'freeze_llm_backbone': False, 'unfreeze_last_llm_layer': False, 'data_mix': 'bridge', 'shuffle_buffer_size': 256000, 'epochs': 10, 'max_steps': None, 'save_every_n_steps': 25000, 'expected_world_size': 4, 'global_batch_size': 512, 'per_device_batch_size': 8, 'learning_rate': 2e-05, 'weight_decay': 0.0, 'max_grad_norm': 1.0, 'lr_scheduler_type': 'constant', 'warmup_ratio': 0.0, 'train_strategy': 'fsdp-full-shard', 'action_tokenizer': 'extra_action_tokenizer', 'image_sequence_len': 10, 'use_wrist_image': True, 'use_depth_image': True, 'compress_history': False, 'use_flow_matching': False, 'action_chunk_size': 8, 'enable_gradient_checkpointing': True, 'enable_mixed_precision_training': True, 'reduce_in_full_precision': True, 'image_window_size': 4}, 'model_type': 'my_vla_qwen', 'data_root_dir': '/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/.new_maniskill_data', 'run_root_dir': 'myvla_exp', 'pretrained_checkpoint': 'Stanford-ILIAD/prism-qwen25-extra-dinosiglip-224px-0_5b', 'is_resume': False, 'resume_step': None, 'resume_epoch': None, 'run_id': 'bl_multiview_history_depth_set_table', 'run_id_note': None, 'save_interval': 1000, 'image_aug': False, 'seed': 7, 'hf_token': '.hf_token', 'trackers': ['jsonl', 'wandb'], 'wandb_project': 'mshab_vla', 'wandb_entity': 'traysen879-uc-san-diego', 'global_pose': False, 'is_grasped': False, 'qpos': False, 'segmentation': False, '_wandb': {}}
+2025-08-20 09:11:47,934 INFO    MainThread:3718336 [wandb_init.py:init():871] starting backend
+2025-08-20 09:11:48,175 INFO    MainThread:3718336 [wandb_init.py:init():874] sending inform_init request
+2025-08-20 09:11:48,183 INFO    MainThread:3718336 [wandb_init.py:init():882] backend started and connected
+2025-08-20 09:11:48,187 INFO    MainThread:3718336 [wandb_init.py:init():953] updated telemetry
+2025-08-20 09:11:48,216 INFO    MainThread:3718336 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout
+2025-08-20 09:11:48,783 INFO    MainThread:3718336 [wandb_init.py:init():1029] starting run threads in backend
+2025-08-20 09:11:49,363 INFO    MainThread:3718336 [wandb_run.py:_console_start():2458] atexit reg
+2025-08-20 09:11:49,364 INFO    MainThread:3718336 [wandb_run.py:_redirect():2306] redirect: wrap_raw
+2025-08-20 09:11:49,366 INFO    MainThread:3718336 [wandb_run.py:_redirect():2375] Wrapping output streams.
+2025-08-20 09:11:49,366 INFO    MainThread:3718336 [wandb_run.py:_redirect():2398] Redirects installed.
+2025-08-20 09:11:49,377 INFO    MainThread:3718336 [wandb_init.py:init():1075] run started, returning control to user process
+2025-08-20 09:12:06,691 INFO    MsgRouterThr:3718336 [mailbox.py:close():129] [no run ID] Closing mailbox, abandoning 1 handles.

wandb/run-20250820_091147-qt1b7wpr/run-qt1b7wpr.wandb ADDED Viewed

Binary file (9.75 kB). View file

wandb/run-20250820_094544-v4zsb4rt/files/config.yaml ADDED Viewed

	@@ -0,0 +1,204 @@

+_wandb:
+    value:
+        cli_version: 0.21.0
+        e:
+            g4ijpwdw40101qp7o8xly8dy5734ui43:
+                args:
+                    - --pretrained_checkpoint
+                    - Stanford-ILIAD/prism-qwen25-extra-dinosiglip-224px-0_5b
+                    - --model_type
+                    - my_vla_qwen
+                    - --vla.type
+                    - myvla-qwen-224px+mx-mshab
+                    - --vla.expected_world_size
+                    - "4"
+                    - --vla.global_batch_size
+                    - "512"
+                    - --vla.per_device_batch_size
+                    - "8"
+                    - --vla.learning_rate
+                    - "2e-5"
+                    - --vla.freeze_vision_backbone
+                    - "True"
+                    - --vla.freeze_llm_backbone
+                    - "False"
+                    - --vla.use_flow_matching
+                    - "False"
+                    - --vla.compress_history
+                    - "False"
+                    - --vla.epochs
+                    - "10"
+                    - --save_interval
+                    - "1000"
+                    - --run_id
+                    - bl_multiview_history_depth_set_table
+                codePath: vla-scripts/train.py
+                codePathLocal: vla-scripts/train.py
+                cpu_count: 240
+                cpu_count_logical: 240
+                cudaVersion: "12.4"
+                disk:
+                    /:
+                        total: "20812690710528"
+                        used: "36591529984"
+                email: traysen879@gmail.com
+                executable: /home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/bin/python3.10
+                git:
+                    commit: 409e4c9a165115624c271028e9b3ee335991b747
+                    remote: https://github.com/TRS07170/myvla.git
+                gpu: NVIDIA A100-SXM4-80GB
+                gpu_count: 8
+                gpu_nvidia:
+                    - architecture: Ampere
+                      cudaCores: 6912
+                      memoryTotal: "85899345920"
+                      name: NVIDIA A100-SXM4-80GB
+                      uuid: GPU-47bfdc91-9dec-cf54-0e0a-aa57ab6fb106
+                    - architecture: Ampere
+                      cudaCores: 6912
+                      memoryTotal: "85899345920"
+                      name: NVIDIA A100-SXM4-80GB
+                      uuid: GPU-55a7184b-b6dc-a8b3-67d5-a65679215c83
+                    - architecture: Ampere
+                      cudaCores: 6912
+                      memoryTotal: "85899345920"
+                      name: NVIDIA A100-SXM4-80GB
+                      uuid: GPU-1de758e0-e4a9-e2e9-027c-17f65db8a69e
+                    - architecture: Ampere
+                      cudaCores: 6912
+                      memoryTotal: "85899345920"
+                      name: NVIDIA A100-SXM4-80GB
+                      uuid: GPU-d7f94efd-7e10-156f-fe37-e505ae7b62b1
+                    - architecture: Ampere
+                      cudaCores: 6912
+                      memoryTotal: "85899345920"
+                      name: NVIDIA A100-SXM4-80GB
+                      uuid: GPU-813530b2-64f0-5fa3-3568-3811977d3b92
+                    - architecture: Ampere
+                      cudaCores: 6912
+                      memoryTotal: "85899345920"
+                      name: NVIDIA A100-SXM4-80GB
+                      uuid: GPU-7eac47dc-0da1-f6b2-d261-8ab3a5d4ed03
+                    - architecture: Ampere
+                      cudaCores: 6912
+                      memoryTotal: "85899345920"
+                      name: NVIDIA A100-SXM4-80GB
+                      uuid: GPU-335150e5-634c-68e2-4930-656c95e62244
+                    - architecture: Ampere
+                      cudaCores: 6912
+                      memoryTotal: "85899345920"
+                      name: NVIDIA A100-SXM4-80GB
+                      uuid: GPU-b3ee08d0-187c-8f80-06d5-c46759764c41
+                host: 164-152-109-69
+                memory:
+                    total: "1902324936704"
+                os: Linux-6.8.0-60-generic-x86_64-with-glibc2.35
+                program: /lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py
+                python: CPython 3.10.18
+                root: myvla_exp/bl_multiview_history_depth_set_table
+                startedAt: "2025-08-20T09:45:44.966050Z"
+                writerId: g4ijpwdw40101qp7o8xly8dy5734ui43
+        m: []
+        python_version: 3.10.18
+        t:
+            "1":
+                - 1
+                - 2
+                - 3
+                - 11
+                - 41
+                - 49
+                - 63
+                - 71
+            "2":
+                - 1
+                - 2
+                - 3
+                - 11
+                - 41
+                - 49
+                - 63
+                - 71
+            "3":
+                - 13
+                - 16
+            "4": 3.10.18
+            "5": 0.21.0
+            "6": 4.40.1
+            "12": 0.21.0
+            "13": linux-x86_64
+data_root_dir:
+    value: /home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/.new_maniskill_data
+global_pose:
+    value: false
+hf_token:
+    value: .hf_token
+image_aug:
+    value: false
+is_grasped:
+    value: false
+is_resume:
+    value: false
+model_type:
+    value: my_vla_qwen
+pretrained_checkpoint:
+    value: Stanford-ILIAD/prism-qwen25-extra-dinosiglip-224px-0_5b
+qpos:
+    value: false
+resume_epoch:
+    value: null
+resume_step:
+    value: null
+run_id:
+    value: bl_multiview_history_depth_set_table
+run_id_note:
+    value: null
+run_root_dir:
+    value: myvla_exp
+save_interval:
+    value: 1000
+seed:
+    value: 7
+segmentation:
+    value: false
+trackers:
+    value:
+        - jsonl
+        - wandb
+vla:
+    value:
+        action_chunk_size: 8
+        action_tokenizer: extra_action_tokenizer
+        base_vlm: prism-qwen25-extra-dinosiglip-224px+0_5b
+        compress_history: false
+        data_mix: bridge
+        enable_gradient_checkpointing: true
+        enable_mixed_precision_training: true
+        epochs: 10
+        expected_world_size: 4
+        freeze_llm_backbone: false
+        freeze_vision_backbone: true
+        global_batch_size: 512
+        image_sequence_len: 10
+        image_window_size: 4
+        learning_rate: 2e-05
+        lr_scheduler_type: constant
+        max_grad_norm: 1
+        max_steps: null
+        per_device_batch_size: 8
+        reduce_in_full_precision: true
+        save_every_n_steps: 25000
+        shuffle_buffer_size: 256000
+        train_strategy: fsdp-full-shard
+        type: myvla-qwen-224px+mx-mshab
+        unfreeze_last_llm_layer: false
+        use_depth_image: true
+        use_flow_matching: false
+        use_wrist_image: true
+        vla_id: myvla-qwen-224px+mx-mshab
+        warmup_ratio: 0
+        weight_decay: 0
+wandb_entity:
+    value: traysen879-uc-san-diego
+wandb_project:
+    value: mshab_vla

wandb/run-20250820_094544-v4zsb4rt/files/output.log ADDED Viewed

	@@ -0,0 +1,11 @@

+[2;36m08/20 [09:45:46][0m[2;36m [0m[34mINFO    [0m | >> [1m[[0m*[1m][0m Starting VLA Training Loop                                                        ]8;id=686782;file:///lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py\[2mtrain.py[0m]8;;\[2m:[0m]8;id=709047;file:///lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py#340\[2m340[0m]8;;\
+Traceback (most recent call last):
+  File "/lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py", line 360, in <module>
+    train()
+  File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/draccus/argparsing.py", line 203, in wrapper_inner
+    response = fn(cfg, *args, **kwargs)
+  File "/lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py", line 341, in train
+    train_strategy.run_vla_training(
+  File "/lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/prismatic/training/strategies/base_strategy.py", line 370, in run_vla_training
+    normalized_loss = loss + aux_loss
+TypeError: unsupported operand type(s) for +: 'Tensor' and 'NoneType'

wandb/run-20250820_094544-v4zsb4rt/files/requirements.txt ADDED Viewed

	@@ -0,0 +1,144 @@

+setuptools==78.1.1
+wheel==0.45.1
+pip==25.1
+sentencepiece==0.1.99
+mpmath==1.3.0
+libclang==18.1.1
+flatbuffers==25.2.10
+zipp==3.23.0
+wrapt==1.14.1
+urllib3==2.5.0
+typing_extensions==4.14.1
+typeguard==2.13.3
+tqdm==4.67.1
+toml==0.10.2
+termcolor==3.1.0
+tensorflow-io-gcs-filesystem==0.37.1
+tensorflow-estimator==2.15.0
+tensorboard-data-server==0.7.2
+sympy==1.14.0
+smmap==5.0.2
+six==1.17.0
+safetensors==0.5.3
+regex==2025.7.34
+PyYAML==6.0.2
+pyparsing==3.2.3
+Pygments==2.19.2
+pyasn1==0.6.1
+psutil==7.0.0
+protobuf==4.21.12
+platformdirs==4.3.8
+pillow==11.3.0
+packaging==25.0
+opt_einsum==3.4.0
+oauthlib==3.3.1
+nvidia-nvtx-cu12==12.1.105
+nvidia-nvjitlink-cu12==12.9.86
+nvidia-nccl-cu12==2.19.3
+nvidia-curand-cu12==10.3.2.106
+nvidia-cufft-cu12==11.0.2.54
+nvidia-cuda-runtime-cu12==12.1.105
+nvidia-cuda-nvrtc-cu12==12.1.105
+nvidia-cuda-cupti-cu12==12.1.105
+nvidia-cublas-cu12==12.1.3.1
+numpy==1.26.4
+networkx==3.4.2
+mypy_extensions==1.1.0
+mergedeep==1.3.4
+mdurl==0.1.2
+MarkupSafe==3.0.2
+Markdown==3.8.2
+kiwisolver==1.4.8
+keras==2.15.0
+importlib_resources==6.5.2
+idna==3.10
+hf-xet==1.1.5
+grpcio==1.74.0
+gast==0.6.0
+fsspec==2025.7.0
+fonttools==4.59.0
+filelock==3.18.0
+etils==1.13.0
+einops==0.8.1
+cycler==0.12.1
+click==8.2.1
+charset-normalizer==3.4.2
+certifi==2025.8.3
+cachetools==5.5.2
+attrs==25.3.0
+annotated-types==0.7.0
+absl-py==2.3.1
+Werkzeug==3.1.3
+typing-inspection==0.4.1
+typing-inspect==0.9.0
+triton==2.2.0
+trimesh==4.7.1
+tensorflow-metadata==1.17.2
+tensorflow-addons==0.23.0
+sentry-sdk==2.34.1
+scipy==1.15.3
+rsa==4.9.1
+requests==2.32.4
+pyyaml-include==1.4.1
+python-dateutil==2.9.0.post0
+pydantic_core==2.33.2
+pyasn1_modules==0.4.2
+promise==2.3
+OpenEXR==3.3.5
+nvidia-cusparse-cu12==12.1.0.106
+nvidia-cudnn-cu12==8.9.2.26
+ml-dtypes==0.2.0
+markdown-it-py==3.0.0
+jsonlines==4.0.0
+json-numpy==2.1.1
+Jinja2==3.1.6
+h5py==3.14.0
+google-pasta==0.2.0
+gitdb==4.0.12
+dm-tree==0.1.9
+contourpy==1.3.2
+astunparse==1.6.3
+rich==14.1.0
+requests-oauthlib==2.0.0
+pydantic==2.11.7
+nvidia-cusolver-cu12==11.4.5.107
+matplotlib==3.10.5
+huggingface-hub==0.34.3
+google-auth==2.40.3
+GitPython==3.1.45
+draccus==0.8.0
+wandb==0.21.0
+torch==2.2.0
+tokenizers==0.19.1
+google-auth-oauthlib==1.2.2
+array_record==0.7.2
+transformers==4.40.1
+torchvision==0.17.0
+torchaudio==2.2.0
+tensorboard==2.15.2
+accelerate==1.9.0
+timm==0.9.10
+tensorflow-datasets==4.9.3
+tensorflow==2.15.0
+peft==0.11.1
+tensorflow-graphics==2021.12.3
+dlimp==0.0.1
+openvla==0.0.3
+ninja==1.11.1.4
+flash-attn==2.5.5
+autocommand==2.2.2
+backports.tarfile==1.2.0
+importlib_metadata==8.0.0
+inflect==7.3.1
+jaraco.collections==5.1.0
+jaraco.context==5.3.0
+jaraco.functools==4.0.1
+jaraco.text==3.12.1
+more-itertools==10.3.0
+packaging==24.2
+platformdirs==4.2.2
+tomli==2.0.1
+typeguard==4.3.0
+typing_extensions==4.12.2
+wheel==0.45.1
+zipp==3.19.2

wandb/run-20250820_094544-v4zsb4rt/files/wandb-metadata.json ADDED Viewed

	@@ -0,0 +1,119 @@

+{
+  "os":  "Linux-6.8.0-60-generic-x86_64-with-glibc2.35",
+  "python":  "CPython 3.10.18",
+  "startedAt":  "2025-08-20T09:45:44.966050Z",
+  "args":  [
+    "--pretrained_checkpoint",
+    "Stanford-ILIAD/prism-qwen25-extra-dinosiglip-224px-0_5b",
+    "--model_type",
+    "my_vla_qwen",
+    "--vla.type",
+    "myvla-qwen-224px+mx-mshab",
+    "--vla.expected_world_size",
+    "4",
+    "--vla.global_batch_size",
+    "512",
+    "--vla.per_device_batch_size",
+    "8",
+    "--vla.learning_rate",
+    "2e-5",
+    "--vla.freeze_vision_backbone",
+    "True",
+    "--vla.freeze_llm_backbone",
+    "False",
+    "--vla.use_flow_matching",
+    "False",
+    "--vla.compress_history",
+    "False",
+    "--vla.epochs",
+    "10",
+    "--save_interval",
+    "1000",
+    "--run_id",
+    "bl_multiview_history_depth_set_table"
+  ],
+  "program":  "/lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py",
+  "codePath":  "vla-scripts/train.py",
+  "codePathLocal":  "vla-scripts/train.py",
+  "git":  {
+    "remote":  "https://github.com/TRS07170/myvla.git",
+    "commit":  "409e4c9a165115624c271028e9b3ee335991b747"
+  },
+  "email":  "traysen879@gmail.com",
+  "root":  "myvla_exp/bl_multiview_history_depth_set_table",
+  "host":  "164-152-109-69",
+  "executable":  "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/bin/python3.10",
+  "cpu_count":  240,
+  "cpu_count_logical":  240,
+  "gpu":  "NVIDIA A100-SXM4-80GB",
+  "gpu_count":  8,
+  "disk":  {
+    "/":  {
+      "total":  "20812690710528",
+      "used":  "36591529984"
+    }
+  },
+  "memory":  {
+    "total":  "1902324936704"
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA A100-SXM4-80GB",
+      "memoryTotal":  "85899345920",
+      "cudaCores":  6912,
+      "architecture":  "Ampere",
+      "uuid":  "GPU-47bfdc91-9dec-cf54-0e0a-aa57ab6fb106"
+    },
+    {
+      "name":  "NVIDIA A100-SXM4-80GB",
+      "memoryTotal":  "85899345920",
+      "cudaCores":  6912,
+      "architecture":  "Ampere",
+      "uuid":  "GPU-55a7184b-b6dc-a8b3-67d5-a65679215c83"
+    },
+    {
+      "name":  "NVIDIA A100-SXM4-80GB",
+      "memoryTotal":  "85899345920",
+      "cudaCores":  6912,
+      "architecture":  "Ampere",
+      "uuid":  "GPU-1de758e0-e4a9-e2e9-027c-17f65db8a69e"
+    },
+    {
+      "name":  "NVIDIA A100-SXM4-80GB",
+      "memoryTotal":  "85899345920",
+      "cudaCores":  6912,
+      "architecture":  "Ampere",
+      "uuid":  "GPU-d7f94efd-7e10-156f-fe37-e505ae7b62b1"
+    },
+    {
+      "name":  "NVIDIA A100-SXM4-80GB",
+      "memoryTotal":  "85899345920",
+      "cudaCores":  6912,
+      "architecture":  "Ampere",
+      "uuid":  "GPU-813530b2-64f0-5fa3-3568-3811977d3b92"
+    },
+    {
+      "name":  "NVIDIA A100-SXM4-80GB",
+      "memoryTotal":  "85899345920",
+      "cudaCores":  6912,
+      "architecture":  "Ampere",
+      "uuid":  "GPU-7eac47dc-0da1-f6b2-d261-8ab3a5d4ed03"
+    },
+    {
+      "name":  "NVIDIA A100-SXM4-80GB",
+      "memoryTotal":  "85899345920",
+      "cudaCores":  6912,
+      "architecture":  "Ampere",
+      "uuid":  "GPU-335150e5-634c-68e2-4930-656c95e62244"
+    },
+    {
+      "name":  "NVIDIA A100-SXM4-80GB",
+      "memoryTotal":  "85899345920",
+      "cudaCores":  6912,
+      "architecture":  "Ampere",
+      "uuid":  "GPU-b3ee08d0-187c-8f80-06d5-c46759764c41"
+    }
+  ],
+  "cudaVersion":  "12.4",
+  "writerId":  "g4ijpwdw40101qp7o8xly8dy5734ui43"
+}

wandb/run-20250820_094544-v4zsb4rt/files/wandb-summary.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"_wandb":{"runtime":4},"_runtime":4}

wandb/run-20250820_094544-v4zsb4rt/logs/debug-core.log ADDED Viewed

	@@ -0,0 +1,14 @@

+{"time":"2025-08-20T09:45:45.174640913Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpajeirdwt/port-3720956.txt","pid":3720956,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
+{"time":"2025-08-20T09:45:45.175809008Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-3720956-3721259-1781631432/socket","Net":"unix"}}
+{"time":"2025-08-20T09:45:45.175845869Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":3720956}
+{"time":"2025-08-20T09:45:45.282055099Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
+{"time":"2025-08-20T09:45:45.297889706Z","level":"INFO","msg":"handleInformInit: received","streamId":"v4zsb4rt","id":"1(@)"}
+{"time":"2025-08-20T09:45:45.599182755Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"v4zsb4rt","id":"1(@)"}
+{"time":"2025-08-20T09:45:50.517475416Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
+{"time":"2025-08-20T09:45:50.517551897Z","level":"INFO","msg":"connection: closing","id":"1(@)"}
+{"time":"2025-08-20T09:45:50.517592208Z","level":"INFO","msg":"server is shutting down"}
+{"time":"2025-08-20T09:45:50.51768034Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
+{"time":"2025-08-20T09:45:50.517838543Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-3720956-3721259-1781631432/socket","Net":"unix"}}
+{"time":"2025-08-20T09:45:51.092657793Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
+{"time":"2025-08-20T09:45:51.092700164Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
+{"time":"2025-08-20T09:45:51.092711254Z","level":"INFO","msg":"server is closed"}

wandb/run-20250820_094544-v4zsb4rt/logs/debug-internal.log ADDED Viewed

	@@ -0,0 +1,12 @@

+{"time":"2025-08-20T09:45:45.302214181Z","level":"INFO","msg":"stream: starting","core version":"0.21.0"}
+{"time":"2025-08-20T09:45:45.598969981Z","level":"INFO","msg":"stream: created new stream","id":"v4zsb4rt"}
+{"time":"2025-08-20T09:45:45.599115765Z","level":"INFO","msg":"stream: started","id":"v4zsb4rt"}
+{"time":"2025-08-20T09:45:45.599177585Z","level":"INFO","msg":"handler: started","stream_id":"v4zsb4rt"}
+{"time":"2025-08-20T09:45:45.599261048Z","level":"INFO","msg":"sender: started","stream_id":"v4zsb4rt"}
+{"time":"2025-08-20T09:45:45.599154334Z","level":"INFO","msg":"writer: Do: started","stream_id":"v4zsb4rt"}
+{"time":"2025-08-20T09:45:50.517603438Z","level":"INFO","msg":"stream: closing","id":"v4zsb4rt"}
+{"time":"2025-08-20T09:45:51.014498279Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2025-08-20T09:45:51.086917937Z","level":"INFO","msg":"handler: closed","stream_id":"v4zsb4rt"}
+{"time":"2025-08-20T09:45:51.087014889Z","level":"INFO","msg":"writer: Close: closed","stream_id":"v4zsb4rt"}
+{"time":"2025-08-20T09:45:51.08706522Z","level":"INFO","msg":"sender: closed","stream_id":"v4zsb4rt"}
+{"time":"2025-08-20T09:45:51.089613696Z","level":"INFO","msg":"stream: closed","id":"v4zsb4rt"}

wandb/run-20250820_094544-v4zsb4rt/logs/debug.log ADDED Viewed

	@@ -0,0 +1,22 @@

+2025-08-20 09:45:45,020 INFO    MainThread:3720956 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0
+2025-08-20 09:45:45,020 INFO    MainThread:3720956 [wandb_setup.py:_flush():80] Configure stats pid to 3720956
+2025-08-20 09:45:45,020 INFO    MainThread:3720956 [wandb_setup.py:_flush():80] Loading settings from /home/ubuntu/.config/wandb/settings
+2025-08-20 09:45:45,023 INFO    MainThread:3720956 [wandb_setup.py:_flush():80] Loading settings from /lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/wandb/settings
+2025-08-20 09:45:45,026 INFO    MainThread:3720956 [wandb_setup.py:_flush():80] Loading settings from environment variables
+2025-08-20 09:45:45,028 INFO    MainThread:3720956 [wandb_init.py:setup_run_log_directory():703] Logging user logs to myvla_exp/bl_multiview_history_depth_set_table/wandb/run-20250820_094544-v4zsb4rt/logs/debug.log
+2025-08-20 09:45:45,030 INFO    MainThread:3720956 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to myvla_exp/bl_multiview_history_depth_set_table/wandb/run-20250820_094544-v4zsb4rt/logs/debug-internal.log
+2025-08-20 09:45:45,032 INFO    MainThread:3720956 [wandb_init.py:init():830] calling init triggers
+2025-08-20 09:45:45,034 INFO    MainThread:3720956 [wandb_init.py:init():835] wandb.init called with sweep_config: {}
+config: {'vla': {'type': 'myvla-qwen-224px+mx-mshab', 'vla_id': 'myvla-qwen-224px+mx-mshab', 'base_vlm': 'prism-qwen25-extra-dinosiglip-224px+0_5b', 'freeze_vision_backbone': True, 'freeze_llm_backbone': False, 'unfreeze_last_llm_layer': False, 'data_mix': 'bridge', 'shuffle_buffer_size': 256000, 'epochs': 10, 'max_steps': None, 'save_every_n_steps': 25000, 'expected_world_size': 4, 'global_batch_size': 512, 'per_device_batch_size': 8, 'learning_rate': 2e-05, 'weight_decay': 0.0, 'max_grad_norm': 1.0, 'lr_scheduler_type': 'constant', 'warmup_ratio': 0.0, 'train_strategy': 'fsdp-full-shard', 'action_tokenizer': 'extra_action_tokenizer', 'image_sequence_len': 10, 'use_wrist_image': True, 'use_depth_image': True, 'compress_history': False, 'use_flow_matching': False, 'action_chunk_size': 8, 'enable_gradient_checkpointing': True, 'enable_mixed_precision_training': True, 'reduce_in_full_precision': True, 'image_window_size': 4}, 'model_type': 'my_vla_qwen', 'data_root_dir': '/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/.new_maniskill_data', 'run_root_dir': 'myvla_exp', 'pretrained_checkpoint': 'Stanford-ILIAD/prism-qwen25-extra-dinosiglip-224px-0_5b', 'is_resume': False, 'resume_step': None, 'resume_epoch': None, 'run_id': 'bl_multiview_history_depth_set_table', 'run_id_note': None, 'save_interval': 1000, 'image_aug': False, 'seed': 7, 'hf_token': '.hf_token', 'trackers': ['jsonl', 'wandb'], 'wandb_project': 'mshab_vla', 'wandb_entity': 'traysen879-uc-san-diego', 'global_pose': False, 'is_grasped': False, 'qpos': False, 'segmentation': False, '_wandb': {}}
+2025-08-20 09:45:45,036 INFO    MainThread:3720956 [wandb_init.py:init():871] starting backend
+2025-08-20 09:45:45,282 INFO    MainThread:3720956 [wandb_init.py:init():874] sending inform_init request
+2025-08-20 09:45:45,291 INFO    MainThread:3720956 [wandb_init.py:init():882] backend started and connected
+2025-08-20 09:45:45,298 INFO    MainThread:3720956 [wandb_init.py:init():953] updated telemetry
+2025-08-20 09:45:45,334 INFO    MainThread:3720956 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout
+2025-08-20 09:45:45,866 INFO    MainThread:3720956 [wandb_init.py:init():1029] starting run threads in backend
+2025-08-20 09:45:46,365 INFO    MainThread:3720956 [wandb_run.py:_console_start():2458] atexit reg
+2025-08-20 09:45:46,365 INFO    MainThread:3720956 [wandb_run.py:_redirect():2306] redirect: wrap_raw
+2025-08-20 09:45:46,368 INFO    MainThread:3720956 [wandb_run.py:_redirect():2375] Wrapping output streams.
+2025-08-20 09:45:46,370 INFO    MainThread:3720956 [wandb_run.py:_redirect():2398] Redirects installed.
+2025-08-20 09:45:46,379 INFO    MainThread:3720956 [wandb_init.py:init():1075] run started, returning control to user process
+2025-08-20 09:45:50,516 INFO    MsgRouterThr:3720956 [mailbox.py:close():129] [no run ID] Closing mailbox, abandoning 1 handles.

wandb/run-20250820_094544-v4zsb4rt/run-v4zsb4rt.wandb ADDED Viewed

Binary file (6.28 kB). View file

wandb/run-20250820_095138-in9qu6p9/files/config.yaml ADDED Viewed

	@@ -0,0 +1,204 @@

+_wandb:
+    value:
+        cli_version: 0.21.0
+        e:
+            ba5qoz48zgwc5ju7pflr3irzrnx350dd:
+                args:
+                    - --pretrained_checkpoint
+                    - Stanford-ILIAD/prism-qwen25-extra-dinosiglip-224px-0_5b
+                    - --model_type
+                    - my_vla_qwen
+                    - --vla.type
+                    - myvla-qwen-224px+mx-mshab
+                    - --vla.expected_world_size
+                    - "4"
+                    - --vla.global_batch_size
+                    - "512"
+                    - --vla.per_device_batch_size
+                    - "8"
+                    - --vla.learning_rate
+                    - "2e-5"
+                    - --vla.freeze_vision_backbone
+                    - "True"
+                    - --vla.freeze_llm_backbone
+                    - "False"
+                    - --vla.use_flow_matching
+                    - "False"
+                    - --vla.compress_history
+                    - "False"
+                    - --vla.epochs
+                    - "10"
+                    - --save_interval
+                    - "1000"
+                    - --run_id
+                    - bl_multiview_history_depth_set_table
+                codePath: vla-scripts/train.py
+                codePathLocal: vla-scripts/train.py
+                cpu_count: 240
+                cpu_count_logical: 240
+                cudaVersion: "12.4"
+                disk:
+                    /:
+                        total: "20812690710528"
+                        used: "36591595520"
+                email: traysen879@gmail.com
+                executable: /home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/bin/python3.10
+                git:
+                    commit: 409e4c9a165115624c271028e9b3ee335991b747
+                    remote: https://github.com/TRS07170/myvla.git
+                gpu: NVIDIA A100-SXM4-80GB
+                gpu_count: 8
+                gpu_nvidia:
+                    - architecture: Ampere
+                      cudaCores: 6912
+                      memoryTotal: "85899345920"
+                      name: NVIDIA A100-SXM4-80GB
+                      uuid: GPU-47bfdc91-9dec-cf54-0e0a-aa57ab6fb106
+                    - architecture: Ampere
+                      cudaCores: 6912
+                      memoryTotal: "85899345920"
+                      name: NVIDIA A100-SXM4-80GB
+                      uuid: GPU-55a7184b-b6dc-a8b3-67d5-a65679215c83
+                    - architecture: Ampere
+                      cudaCores: 6912
+                      memoryTotal: "85899345920"
+                      name: NVIDIA A100-SXM4-80GB
+                      uuid: GPU-1de758e0-e4a9-e2e9-027c-17f65db8a69e
+                    - architecture: Ampere
+                      cudaCores: 6912
+                      memoryTotal: "85899345920"
+                      name: NVIDIA A100-SXM4-80GB
+                      uuid: GPU-d7f94efd-7e10-156f-fe37-e505ae7b62b1
+                    - architecture: Ampere
+                      cudaCores: 6912
+                      memoryTotal: "85899345920"
+                      name: NVIDIA A100-SXM4-80GB
+                      uuid: GPU-813530b2-64f0-5fa3-3568-3811977d3b92
+                    - architecture: Ampere
+                      cudaCores: 6912
+                      memoryTotal: "85899345920"
+                      name: NVIDIA A100-SXM4-80GB
+                      uuid: GPU-7eac47dc-0da1-f6b2-d261-8ab3a5d4ed03
+                    - architecture: Ampere
+                      cudaCores: 6912
+                      memoryTotal: "85899345920"
+                      name: NVIDIA A100-SXM4-80GB
+                      uuid: GPU-335150e5-634c-68e2-4930-656c95e62244
+                    - architecture: Ampere
+                      cudaCores: 6912
+                      memoryTotal: "85899345920"
+                      name: NVIDIA A100-SXM4-80GB
+                      uuid: GPU-b3ee08d0-187c-8f80-06d5-c46759764c41
+                host: 164-152-109-69
+                memory:
+                    total: "1902324936704"
+                os: Linux-6.8.0-60-generic-x86_64-with-glibc2.35
+                program: /lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py
+                python: CPython 3.10.18
+                root: myvla_exp/bl_multiview_history_depth_set_table
+                startedAt: "2025-08-20T09:51:38.344162Z"
+                writerId: ba5qoz48zgwc5ju7pflr3irzrnx350dd
+        m: []
+        python_version: 3.10.18
+        t:
+            "1":
+                - 1
+                - 2
+                - 3
+                - 11
+                - 41
+                - 49
+                - 63
+                - 71
+            "2":
+                - 1
+                - 2
+                - 3
+                - 11
+                - 41
+                - 49
+                - 63
+                - 71
+            "3":
+                - 13
+                - 16
+            "4": 3.10.18
+            "5": 0.21.0
+            "6": 4.40.1
+            "12": 0.21.0
+            "13": linux-x86_64
+data_root_dir:
+    value: /home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/.new_maniskill_data
+global_pose:
+    value: false
+hf_token:
+    value: .hf_token
+image_aug:
+    value: false
+is_grasped:
+    value: false
+is_resume:
+    value: false
+model_type:
+    value: my_vla_qwen
+pretrained_checkpoint:
+    value: Stanford-ILIAD/prism-qwen25-extra-dinosiglip-224px-0_5b
+qpos:
+    value: false
+resume_epoch:
+    value: null
+resume_step:
+    value: null
+run_id:
+    value: bl_multiview_history_depth_set_table
+run_id_note:
+    value: null
+run_root_dir:
+    value: myvla_exp
+save_interval:
+    value: 1000
+seed:
+    value: 7
+segmentation:
+    value: false
+trackers:
+    value:
+        - jsonl
+        - wandb
+vla:
+    value:
+        action_chunk_size: 8
+        action_tokenizer: extra_action_tokenizer
+        base_vlm: prism-qwen25-extra-dinosiglip-224px+0_5b
+        compress_history: false
+        data_mix: bridge
+        enable_gradient_checkpointing: true
+        enable_mixed_precision_training: true
+        epochs: 10
+        expected_world_size: 4
+        freeze_llm_backbone: false
+        freeze_vision_backbone: true
+        global_batch_size: 512
+        image_sequence_len: 10
+        image_window_size: 4
+        learning_rate: 2e-05
+        lr_scheduler_type: constant
+        max_grad_norm: 1
+        max_steps: null
+        per_device_batch_size: 8
+        reduce_in_full_precision: true
+        save_every_n_steps: 25000
+        shuffle_buffer_size: 256000
+        train_strategy: fsdp-full-shard
+        type: myvla-qwen-224px+mx-mshab
+        unfreeze_last_llm_layer: false
+        use_depth_image: true
+        use_flow_matching: false
+        use_wrist_image: true
+        vla_id: myvla-qwen-224px+mx-mshab
+        warmup_ratio: 0
+        weight_decay: 0
+wandb_entity:
+    value: traysen879-uc-san-diego
+wandb_project:
+    value: mshab_vla

wandb/run-20250820_095138-in9qu6p9/files/output.log ADDED Viewed

	@@ -0,0 +1,13 @@

+[2;36m08/20 [09:51:39][0m[2;36m [0m[34mINFO    [0m | >> [1m[[0m*[1m][0m Starting VLA Training Loop                                                        ]8;id=686782;file:///lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py\[2mtrain.py[0m]8;;\[2m:[0m]8;id=709047;file:///lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py#340\[2m340[0m]8;;\
+Traceback (most recent call last):
+  File "/lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py", line 360, in <module>
+    train()
+  File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/draccus/argparsing.py", line 203, in wrapper_inner
+    response = fn(cfg, *args, **kwargs)
+  File "/lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py", line 341, in train
+    train_strategy.run_vla_training(
+  File "/lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/prismatic/training/strategies/base_strategy.py", line 427, in run_vla_training
+    metrics.commit(
+  File "/lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/prismatic/training/metrics.py", line 318, in commit
+    self.state[key].append(value.detach())
+AttributeError: 'NoneType' object has no attribute 'detach'

wandb/run-20250820_095138-in9qu6p9/files/requirements.txt ADDED Viewed

	@@ -0,0 +1,144 @@

+setuptools==78.1.1
+wheel==0.45.1
+pip==25.1
+sentencepiece==0.1.99
+mpmath==1.3.0
+libclang==18.1.1
+flatbuffers==25.2.10
+zipp==3.23.0
+wrapt==1.14.1
+urllib3==2.5.0
+typing_extensions==4.14.1
+typeguard==2.13.3
+tqdm==4.67.1
+toml==0.10.2
+termcolor==3.1.0
+tensorflow-io-gcs-filesystem==0.37.1
+tensorflow-estimator==2.15.0
+tensorboard-data-server==0.7.2
+sympy==1.14.0
+smmap==5.0.2
+six==1.17.0
+safetensors==0.5.3
+regex==2025.7.34
+PyYAML==6.0.2
+pyparsing==3.2.3
+Pygments==2.19.2
+pyasn1==0.6.1
+psutil==7.0.0
+protobuf==4.21.12
+platformdirs==4.3.8
+pillow==11.3.0
+packaging==25.0
+opt_einsum==3.4.0
+oauthlib==3.3.1
+nvidia-nvtx-cu12==12.1.105
+nvidia-nvjitlink-cu12==12.9.86
+nvidia-nccl-cu12==2.19.3
+nvidia-curand-cu12==10.3.2.106
+nvidia-cufft-cu12==11.0.2.54
+nvidia-cuda-runtime-cu12==12.1.105
+nvidia-cuda-nvrtc-cu12==12.1.105
+nvidia-cuda-cupti-cu12==12.1.105
+nvidia-cublas-cu12==12.1.3.1
+numpy==1.26.4
+networkx==3.4.2
+mypy_extensions==1.1.0
+mergedeep==1.3.4
+mdurl==0.1.2
+MarkupSafe==3.0.2
+Markdown==3.8.2
+kiwisolver==1.4.8
+keras==2.15.0
+importlib_resources==6.5.2
+idna==3.10
+hf-xet==1.1.5
+grpcio==1.74.0
+gast==0.6.0
+fsspec==2025.7.0
+fonttools==4.59.0
+filelock==3.18.0
+etils==1.13.0
+einops==0.8.1
+cycler==0.12.1
+click==8.2.1
+charset-normalizer==3.4.2
+certifi==2025.8.3
+cachetools==5.5.2
+attrs==25.3.0
+annotated-types==0.7.0
+absl-py==2.3.1
+Werkzeug==3.1.3
+typing-inspection==0.4.1
+typing-inspect==0.9.0
+triton==2.2.0
+trimesh==4.7.1
+tensorflow-metadata==1.17.2
+tensorflow-addons==0.23.0
+sentry-sdk==2.34.1
+scipy==1.15.3
+rsa==4.9.1
+requests==2.32.4
+pyyaml-include==1.4.1
+python-dateutil==2.9.0.post0
+pydantic_core==2.33.2
+pyasn1_modules==0.4.2
+promise==2.3
+OpenEXR==3.3.5
+nvidia-cusparse-cu12==12.1.0.106
+nvidia-cudnn-cu12==8.9.2.26
+ml-dtypes==0.2.0
+markdown-it-py==3.0.0
+jsonlines==4.0.0
+json-numpy==2.1.1
+Jinja2==3.1.6
+h5py==3.14.0
+google-pasta==0.2.0
+gitdb==4.0.12
+dm-tree==0.1.9
+contourpy==1.3.2
+astunparse==1.6.3
+rich==14.1.0
+requests-oauthlib==2.0.0
+pydantic==2.11.7
+nvidia-cusolver-cu12==11.4.5.107
+matplotlib==3.10.5
+huggingface-hub==0.34.3
+google-auth==2.40.3
+GitPython==3.1.45
+draccus==0.8.0
+wandb==0.21.0
+torch==2.2.0
+tokenizers==0.19.1
+google-auth-oauthlib==1.2.2
+array_record==0.7.2
+transformers==4.40.1
+torchvision==0.17.0
+torchaudio==2.2.0
+tensorboard==2.15.2
+accelerate==1.9.0
+timm==0.9.10
+tensorflow-datasets==4.9.3
+tensorflow==2.15.0
+peft==0.11.1
+tensorflow-graphics==2021.12.3
+dlimp==0.0.1
+openvla==0.0.3
+ninja==1.11.1.4
+flash-attn==2.5.5
+autocommand==2.2.2
+backports.tarfile==1.2.0
+importlib_metadata==8.0.0
+inflect==7.3.1
+jaraco.collections==5.1.0
+jaraco.context==5.3.0
+jaraco.functools==4.0.1
+jaraco.text==3.12.1
+more-itertools==10.3.0
+packaging==24.2
+platformdirs==4.2.2
+tomli==2.0.1
+typeguard==4.3.0
+typing_extensions==4.12.2
+wheel==0.45.1
+zipp==3.19.2

wandb/run-20250820_095138-in9qu6p9/files/wandb-metadata.json ADDED Viewed

	@@ -0,0 +1,119 @@

+{
+  "os":  "Linux-6.8.0-60-generic-x86_64-with-glibc2.35",
+  "python":  "CPython 3.10.18",
+  "startedAt":  "2025-08-20T09:51:38.344162Z",
+  "args":  [
+    "--pretrained_checkpoint",
+    "Stanford-ILIAD/prism-qwen25-extra-dinosiglip-224px-0_5b",
+    "--model_type",
+    "my_vla_qwen",
+    "--vla.type",
+    "myvla-qwen-224px+mx-mshab",
+    "--vla.expected_world_size",
+    "4",
+    "--vla.global_batch_size",
+    "512",
+    "--vla.per_device_batch_size",
+    "8",
+    "--vla.learning_rate",
+    "2e-5",
+    "--vla.freeze_vision_backbone",
+    "True",
+    "--vla.freeze_llm_backbone",
+    "False",
+    "--vla.use_flow_matching",
+    "False",
+    "--vla.compress_history",
+    "False",
+    "--vla.epochs",
+    "10",
+    "--save_interval",
+    "1000",
+    "--run_id",
+    "bl_multiview_history_depth_set_table"
+  ],
+  "program":  "/lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py",
+  "codePath":  "vla-scripts/train.py",
+  "codePathLocal":  "vla-scripts/train.py",
+  "git":  {
+    "remote":  "https://github.com/TRS07170/myvla.git",
+    "commit":  "409e4c9a165115624c271028e9b3ee335991b747"
+  },
+  "email":  "traysen879@gmail.com",
+  "root":  "myvla_exp/bl_multiview_history_depth_set_table",
+  "host":  "164-152-109-69",
+  "executable":  "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/bin/python3.10",
+  "cpu_count":  240,
+  "cpu_count_logical":  240,
+  "gpu":  "NVIDIA A100-SXM4-80GB",
+  "gpu_count":  8,
+  "disk":  {
+    "/":  {
+      "total":  "20812690710528",
+      "used":  "36591595520"
+    }
+  },
+  "memory":  {
+    "total":  "1902324936704"
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA A100-SXM4-80GB",
+      "memoryTotal":  "85899345920",
+      "cudaCores":  6912,
+      "architecture":  "Ampere",
+      "uuid":  "GPU-47bfdc91-9dec-cf54-0e0a-aa57ab6fb106"
+    },
+    {
+      "name":  "NVIDIA A100-SXM4-80GB",
+      "memoryTotal":  "85899345920",
+      "cudaCores":  6912,
+      "architecture":  "Ampere",
+      "uuid":  "GPU-55a7184b-b6dc-a8b3-67d5-a65679215c83"
+    },
+    {
+      "name":  "NVIDIA A100-SXM4-80GB",
+      "memoryTotal":  "85899345920",
+      "cudaCores":  6912,
+      "architecture":  "Ampere",
+      "uuid":  "GPU-1de758e0-e4a9-e2e9-027c-17f65db8a69e"
+    },
+    {
+      "name":  "NVIDIA A100-SXM4-80GB",
+      "memoryTotal":  "85899345920",
+      "cudaCores":  6912,
+      "architecture":  "Ampere",
+      "uuid":  "GPU-d7f94efd-7e10-156f-fe37-e505ae7b62b1"
+    },
+    {
+      "name":  "NVIDIA A100-SXM4-80GB",
+      "memoryTotal":  "85899345920",
+      "cudaCores":  6912,
+      "architecture":  "Ampere",
+      "uuid":  "GPU-813530b2-64f0-5fa3-3568-3811977d3b92"
+    },
+    {
+      "name":  "NVIDIA A100-SXM4-80GB",
+      "memoryTotal":  "85899345920",
+      "cudaCores":  6912,
+      "architecture":  "Ampere",
+      "uuid":  "GPU-7eac47dc-0da1-f6b2-d261-8ab3a5d4ed03"
+    },
+    {
+      "name":  "NVIDIA A100-SXM4-80GB",
+      "memoryTotal":  "85899345920",
+      "cudaCores":  6912,
+      "architecture":  "Ampere",
+      "uuid":  "GPU-335150e5-634c-68e2-4930-656c95e62244"
+    },
+    {
+      "name":  "NVIDIA A100-SXM4-80GB",
+      "memoryTotal":  "85899345920",
+      "cudaCores":  6912,
+      "architecture":  "Ampere",
+      "uuid":  "GPU-b3ee08d0-187c-8f80-06d5-c46759764c41"
+    }
+  ],
+  "cudaVersion":  "12.4",
+  "writerId":  "ba5qoz48zgwc5ju7pflr3irzrnx350dd"
+}

wandb/run-20250820_095138-in9qu6p9/files/wandb-summary.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"_wandb":{"runtime":39},"_runtime":39}

wandb/run-20250820_095138-in9qu6p9/logs/debug-core.log ADDED Viewed

	@@ -0,0 +1,14 @@

+{"time":"2025-08-20T09:51:38.536260128Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpa8uiflh4/port-3721925.txt","pid":3721925,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
+{"time":"2025-08-20T09:51:38.537360213Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-3721925-3722220-1575105170/socket","Net":"unix"}}
+{"time":"2025-08-20T09:51:38.537456205Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":3721925}
+{"time":"2025-08-20T09:51:38.640382543Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
+{"time":"2025-08-20T09:51:38.656047897Z","level":"INFO","msg":"handleInformInit: received","streamId":"in9qu6p9","id":"1(@)"}
+{"time":"2025-08-20T09:51:38.950720371Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"in9qu6p9","id":"1(@)"}
+{"time":"2025-08-20T09:52:18.709461098Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
+{"time":"2025-08-20T09:52:18.709572482Z","level":"INFO","msg":"server is shutting down"}
+{"time":"2025-08-20T09:52:18.709545521Z","level":"INFO","msg":"connection: closing","id":"1(@)"}
+{"time":"2025-08-20T09:52:18.709654592Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
+{"time":"2025-08-20T09:52:18.709669844Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-3721925-3722220-1575105170/socket","Net":"unix"}}
+{"time":"2025-08-20T09:52:19.206007071Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
+{"time":"2025-08-20T09:52:19.206153915Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
+{"time":"2025-08-20T09:52:19.206165435Z","level":"INFO","msg":"server is closed"}

wandb/run-20250820_095138-in9qu6p9/logs/debug-internal.log ADDED Viewed

	@@ -0,0 +1,12 @@

+{"time":"2025-08-20T09:51:38.660459543Z","level":"INFO","msg":"stream: starting","core version":"0.21.0"}
+{"time":"2025-08-20T09:51:38.950393813Z","level":"INFO","msg":"stream: created new stream","id":"in9qu6p9"}
+{"time":"2025-08-20T09:51:38.950544507Z","level":"INFO","msg":"stream: started","id":"in9qu6p9"}
+{"time":"2025-08-20T09:51:38.950582828Z","level":"INFO","msg":"writer: Do: started","stream_id":"in9qu6p9"}
+{"time":"2025-08-20T09:51:38.950630479Z","level":"INFO","msg":"sender: started","stream_id":"in9qu6p9"}
+{"time":"2025-08-20T09:51:38.950622759Z","level":"INFO","msg":"handler: started","stream_id":"in9qu6p9"}
+{"time":"2025-08-20T09:52:18.709584351Z","level":"INFO","msg":"stream: closing","id":"in9qu6p9"}
+{"time":"2025-08-20T09:52:19.077055913Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
+{"time":"2025-08-20T09:52:19.198860723Z","level":"INFO","msg":"handler: closed","stream_id":"in9qu6p9"}
+{"time":"2025-08-20T09:52:19.198934456Z","level":"INFO","msg":"writer: Close: closed","stream_id":"in9qu6p9"}
+{"time":"2025-08-20T09:52:19.198966537Z","level":"INFO","msg":"sender: closed","stream_id":"in9qu6p9"}
+{"time":"2025-08-20T09:52:19.203409125Z","level":"INFO","msg":"stream: closed","id":"in9qu6p9"}

wandb/run-20250820_095138-in9qu6p9/logs/debug.log ADDED Viewed

	@@ -0,0 +1,22 @@

+2025-08-20 09:51:38,385 INFO    MainThread:3721925 [wandb_setup.py:_flush():80] Current SDK version is 0.21.0
+2025-08-20 09:51:38,385 INFO    MainThread:3721925 [wandb_setup.py:_flush():80] Configure stats pid to 3721925
+2025-08-20 09:51:38,385 INFO    MainThread:3721925 [wandb_setup.py:_flush():80] Loading settings from /home/ubuntu/.config/wandb/settings
+2025-08-20 09:51:38,387 INFO    MainThread:3721925 [wandb_setup.py:_flush():80] Loading settings from /lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/wandb/settings
+2025-08-20 09:51:38,388 INFO    MainThread:3721925 [wandb_setup.py:_flush():80] Loading settings from environment variables
+2025-08-20 09:51:38,390 INFO    MainThread:3721925 [wandb_init.py:setup_run_log_directory():703] Logging user logs to myvla_exp/bl_multiview_history_depth_set_table/wandb/run-20250820_095138-in9qu6p9/logs/debug.log
+2025-08-20 09:51:38,393 INFO    MainThread:3721925 [wandb_init.py:setup_run_log_directory():704] Logging internal logs to myvla_exp/bl_multiview_history_depth_set_table/wandb/run-20250820_095138-in9qu6p9/logs/debug-internal.log
+2025-08-20 09:51:38,395 INFO    MainThread:3721925 [wandb_init.py:init():830] calling init triggers
+2025-08-20 09:51:38,397 INFO    MainThread:3721925 [wandb_init.py:init():835] wandb.init called with sweep_config: {}
+config: {'vla': {'type': 'myvla-qwen-224px+mx-mshab', 'vla_id': 'myvla-qwen-224px+mx-mshab', 'base_vlm': 'prism-qwen25-extra-dinosiglip-224px+0_5b', 'freeze_vision_backbone': True, 'freeze_llm_backbone': False, 'unfreeze_last_llm_layer': False, 'data_mix': 'bridge', 'shuffle_buffer_size': 256000, 'epochs': 10, 'max_steps': None, 'save_every_n_steps': 25000, 'expected_world_size': 4, 'global_batch_size': 512, 'per_device_batch_size': 8, 'learning_rate': 2e-05, 'weight_decay': 0.0, 'max_grad_norm': 1.0, 'lr_scheduler_type': 'constant', 'warmup_ratio': 0.0, 'train_strategy': 'fsdp-full-shard', 'action_tokenizer': 'extra_action_tokenizer', 'image_sequence_len': 10, 'use_wrist_image': True, 'use_depth_image': True, 'compress_history': False, 'use_flow_matching': False, 'action_chunk_size': 8, 'enable_gradient_checkpointing': True, 'enable_mixed_precision_training': True, 'reduce_in_full_precision': True, 'image_window_size': 4}, 'model_type': 'my_vla_qwen', 'data_root_dir': '/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/.new_maniskill_data', 'run_root_dir': 'myvla_exp', 'pretrained_checkpoint': 'Stanford-ILIAD/prism-qwen25-extra-dinosiglip-224px-0_5b', 'is_resume': False, 'resume_step': None, 'resume_epoch': None, 'run_id': 'bl_multiview_history_depth_set_table', 'run_id_note': None, 'save_interval': 1000, 'image_aug': False, 'seed': 7, 'hf_token': '.hf_token', 'trackers': ['jsonl', 'wandb'], 'wandb_project': 'mshab_vla', 'wandb_entity': 'traysen879-uc-san-diego', 'global_pose': False, 'is_grasped': False, 'qpos': False, 'segmentation': False, '_wandb': {}}
+2025-08-20 09:51:38,399 INFO    MainThread:3721925 [wandb_init.py:init():871] starting backend
+2025-08-20 09:51:38,641 INFO    MainThread:3721925 [wandb_init.py:init():874] sending inform_init request
+2025-08-20 09:51:38,652 INFO    MainThread:3721925 [wandb_init.py:init():882] backend started and connected
+2025-08-20 09:51:38,656 INFO    MainThread:3721925 [wandb_init.py:init():953] updated telemetry
+2025-08-20 09:51:38,686 INFO    MainThread:3721925 [wandb_init.py:init():977] communicating run to backend with 90.0 second timeout
+2025-08-20 09:51:39,129 INFO    MainThread:3721925 [wandb_init.py:init():1029] starting run threads in backend
+2025-08-20 09:51:39,596 INFO    MainThread:3721925 [wandb_run.py:_console_start():2458] atexit reg
+2025-08-20 09:51:39,596 INFO    MainThread:3721925 [wandb_run.py:_redirect():2306] redirect: wrap_raw
+2025-08-20 09:51:39,599 INFO    MainThread:3721925 [wandb_run.py:_redirect():2375] Wrapping output streams.
+2025-08-20 09:51:39,601 INFO    MainThread:3721925 [wandb_run.py:_redirect():2398] Redirects installed.
+2025-08-20 09:51:39,609 INFO    MainThread:3721925 [wandb_init.py:init():1075] run started, returning control to user process
+2025-08-20 09:52:18,707 INFO    MsgRouterThr:3721925 [mailbox.py:close():129] [no run ID] Closing mailbox, abandoning 1 handles.

wandb/run-20250820_095138-in9qu6p9/run-in9qu6p9.wandb ADDED Viewed

Binary file (13.4 kB). View file

wandb/run-20250820_095524-3yyycq6f/files/config.yaml ADDED Viewed

	@@ -0,0 +1,204 @@

+_wandb:
+    value:
+        cli_version: 0.21.0
+        e:
+            2medvrsam6c7koxygw5woziyxdt2rfka:
+                args:
+                    - --pretrained_checkpoint
+                    - Stanford-ILIAD/prism-qwen25-extra-dinosiglip-224px-0_5b
+                    - --model_type
+                    - my_vla_qwen
+                    - --vla.type
+                    - myvla-qwen-224px+mx-mshab
+                    - --vla.expected_world_size
+                    - "4"
+                    - --vla.global_batch_size
+                    - "512"
+                    - --vla.per_device_batch_size
+                    - "8"
+                    - --vla.learning_rate
+                    - "2e-5"
+                    - --vla.freeze_vision_backbone
+                    - "True"
+                    - --vla.freeze_llm_backbone
+                    - "False"
+                    - --vla.use_flow_matching
+                    - "False"
+                    - --vla.compress_history
+                    - "False"
+                    - --vla.epochs
+                    - "10"
+                    - --save_interval
+                    - "1000"
+                    - --run_id
+                    - bl_multiview_history_depth_set_table
+                codePath: vla-scripts/train.py
+                codePathLocal: vla-scripts/train.py
+                cpu_count: 240
+                cpu_count_logical: 240
+                cudaVersion: "12.4"
+                disk:
+                    /:
+                        total: "20812690710528"
+                        used: "36591730688"
+                email: traysen879@gmail.com
+                executable: /home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/bin/python3.10
+                git:
+                    commit: 409e4c9a165115624c271028e9b3ee335991b747
+                    remote: https://github.com/TRS07170/myvla.git
+                gpu: NVIDIA A100-SXM4-80GB
+                gpu_count: 8
+                gpu_nvidia:
+                    - architecture: Ampere
+                      cudaCores: 6912
+                      memoryTotal: "85899345920"
+                      name: NVIDIA A100-SXM4-80GB
+                      uuid: GPU-47bfdc91-9dec-cf54-0e0a-aa57ab6fb106
+                    - architecture: Ampere
+                      cudaCores: 6912
+                      memoryTotal: "85899345920"
+                      name: NVIDIA A100-SXM4-80GB
+                      uuid: GPU-55a7184b-b6dc-a8b3-67d5-a65679215c83
+                    - architecture: Ampere
+                      cudaCores: 6912
+                      memoryTotal: "85899345920"
+                      name: NVIDIA A100-SXM4-80GB
+                      uuid: GPU-1de758e0-e4a9-e2e9-027c-17f65db8a69e
+                    - architecture: Ampere
+                      cudaCores: 6912
+                      memoryTotal: "85899345920"
+                      name: NVIDIA A100-SXM4-80GB
+                      uuid: GPU-d7f94efd-7e10-156f-fe37-e505ae7b62b1
+                    - architecture: Ampere
+                      cudaCores: 6912
+                      memoryTotal: "85899345920"
+                      name: NVIDIA A100-SXM4-80GB
+                      uuid: GPU-813530b2-64f0-5fa3-3568-3811977d3b92
+                    - architecture: Ampere
+                      cudaCores: 6912
+                      memoryTotal: "85899345920"
+                      name: NVIDIA A100-SXM4-80GB
+                      uuid: GPU-7eac47dc-0da1-f6b2-d261-8ab3a5d4ed03
+                    - architecture: Ampere
+                      cudaCores: 6912
+                      memoryTotal: "85899345920"
+                      name: NVIDIA A100-SXM4-80GB
+                      uuid: GPU-335150e5-634c-68e2-4930-656c95e62244
+                    - architecture: Ampere
+                      cudaCores: 6912
+                      memoryTotal: "85899345920"
+                      name: NVIDIA A100-SXM4-80GB
+                      uuid: GPU-b3ee08d0-187c-8f80-06d5-c46759764c41
+                host: 164-152-109-69
+                memory:
+                    total: "1902324936704"
+                os: Linux-6.8.0-60-generic-x86_64-with-glibc2.35
+                program: /lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py
+                python: CPython 3.10.18
+                root: myvla_exp/bl_multiview_history_depth_set_table
+                startedAt: "2025-08-20T09:55:24.723538Z"
+                writerId: 2medvrsam6c7koxygw5woziyxdt2rfka
+        m: []
+        python_version: 3.10.18
+        t:
+            "1":
+                - 1
+                - 2
+                - 3
+                - 11
+                - 41
+                - 49
+                - 63
+                - 71
+            "2":
+                - 1
+                - 2
+                - 3
+                - 11
+                - 41
+                - 49
+                - 63
+                - 71
+            "3":
+                - 13
+                - 16
+            "4": 3.10.18
+            "5": 0.21.0
+            "6": 4.40.1
+            "12": 0.21.0
+            "13": linux-x86_64
+data_root_dir:
+    value: /home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/.new_maniskill_data
+global_pose:
+    value: false
+hf_token:
+    value: .hf_token
+image_aug:
+    value: false
+is_grasped:
+    value: false
+is_resume:
+    value: false
+model_type:
+    value: my_vla_qwen
+pretrained_checkpoint:
+    value: Stanford-ILIAD/prism-qwen25-extra-dinosiglip-224px-0_5b
+qpos:
+    value: false
+resume_epoch:
+    value: null
+resume_step:
+    value: null
+run_id:
+    value: bl_multiview_history_depth_set_table
+run_id_note:
+    value: null
+run_root_dir:
+    value: myvla_exp
+save_interval:
+    value: 1000
+seed:
+    value: 7
+segmentation:
+    value: false
+trackers:
+    value:
+        - jsonl
+        - wandb
+vla:
+    value:
+        action_chunk_size: 8
+        action_tokenizer: extra_action_tokenizer
+        base_vlm: prism-qwen25-extra-dinosiglip-224px+0_5b
+        compress_history: false
+        data_mix: bridge
+        enable_gradient_checkpointing: true
+        enable_mixed_precision_training: true
+        epochs: 10
+        expected_world_size: 4
+        freeze_llm_backbone: false
+        freeze_vision_backbone: true
+        global_batch_size: 512
+        image_sequence_len: 10
+        image_window_size: 4
+        learning_rate: 2e-05
+        lr_scheduler_type: constant
+        max_grad_norm: 1
+        max_steps: null
+        per_device_batch_size: 8
+        reduce_in_full_precision: true
+        save_every_n_steps: 25000
+        shuffle_buffer_size: 256000
+        train_strategy: fsdp-full-shard
+        type: myvla-qwen-224px+mx-mshab
+        unfreeze_last_llm_layer: false
+        use_depth_image: true
+        use_flow_matching: false
+        use_wrist_image: true
+        vla_id: myvla-qwen-224px+mx-mshab
+        warmup_ratio: 0
+        weight_decay: 0
+wandb_entity:
+    value: traysen879-uc-san-diego
+wandb_project:
+    value: mshab_vla

wandb/run-20250820_095524-3yyycq6f/files/output.log ADDED Viewed

	@@ -0,0 +1,13 @@

+[2;36m08/20 [09:55:25][0m[2;36m [0m[34mINFO    [0m | >> [1m[[0m*[1m][0m Starting VLA Training Loop                                                        ]8;id=686782;file:///lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py\[2mtrain.py[0m]8;;\[2m:[0m]8;id=709047;file:///lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py#340\[2m340[0m]8;;\
+Traceback (most recent call last):
+  File "/lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py", line 360, in <module>
+    train()
+  File "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/lib/python3.10/site-packages/draccus/argparsing.py", line 203, in wrapper_inner
+    response = fn(cfg, *args, **kwargs)
+  File "/lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py", line 341, in train
+    train_strategy.run_vla_training(
+  File "/lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/prismatic/training/strategies/base_strategy.py", line 427, in run_vla_training
+    metrics.commit(
+  File "/lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/prismatic/training/metrics.py", line 318, in commit
+    self.state[key].append(value.detach())
+AttributeError: 'int' object has no attribute 'detach'

wandb/run-20250820_095524-3yyycq6f/files/requirements.txt ADDED Viewed

	@@ -0,0 +1,144 @@

+setuptools==78.1.1
+wheel==0.45.1
+pip==25.1
+sentencepiece==0.1.99
+mpmath==1.3.0
+libclang==18.1.1
+flatbuffers==25.2.10
+zipp==3.23.0
+wrapt==1.14.1
+urllib3==2.5.0
+typing_extensions==4.14.1
+typeguard==2.13.3
+tqdm==4.67.1
+toml==0.10.2
+termcolor==3.1.0
+tensorflow-io-gcs-filesystem==0.37.1
+tensorflow-estimator==2.15.0
+tensorboard-data-server==0.7.2
+sympy==1.14.0
+smmap==5.0.2
+six==1.17.0
+safetensors==0.5.3
+regex==2025.7.34
+PyYAML==6.0.2
+pyparsing==3.2.3
+Pygments==2.19.2
+pyasn1==0.6.1
+psutil==7.0.0
+protobuf==4.21.12
+platformdirs==4.3.8
+pillow==11.3.0
+packaging==25.0
+opt_einsum==3.4.0
+oauthlib==3.3.1
+nvidia-nvtx-cu12==12.1.105
+nvidia-nvjitlink-cu12==12.9.86
+nvidia-nccl-cu12==2.19.3
+nvidia-curand-cu12==10.3.2.106
+nvidia-cufft-cu12==11.0.2.54
+nvidia-cuda-runtime-cu12==12.1.105
+nvidia-cuda-nvrtc-cu12==12.1.105
+nvidia-cuda-cupti-cu12==12.1.105
+nvidia-cublas-cu12==12.1.3.1
+numpy==1.26.4
+networkx==3.4.2
+mypy_extensions==1.1.0
+mergedeep==1.3.4
+mdurl==0.1.2
+MarkupSafe==3.0.2
+Markdown==3.8.2
+kiwisolver==1.4.8
+keras==2.15.0
+importlib_resources==6.5.2
+idna==3.10
+hf-xet==1.1.5
+grpcio==1.74.0
+gast==0.6.0
+fsspec==2025.7.0
+fonttools==4.59.0
+filelock==3.18.0
+etils==1.13.0
+einops==0.8.1
+cycler==0.12.1
+click==8.2.1
+charset-normalizer==3.4.2
+certifi==2025.8.3
+cachetools==5.5.2
+attrs==25.3.0
+annotated-types==0.7.0
+absl-py==2.3.1
+Werkzeug==3.1.3
+typing-inspection==0.4.1
+typing-inspect==0.9.0
+triton==2.2.0
+trimesh==4.7.1
+tensorflow-metadata==1.17.2
+tensorflow-addons==0.23.0
+sentry-sdk==2.34.1
+scipy==1.15.3
+rsa==4.9.1
+requests==2.32.4
+pyyaml-include==1.4.1
+python-dateutil==2.9.0.post0
+pydantic_core==2.33.2
+pyasn1_modules==0.4.2
+promise==2.3
+OpenEXR==3.3.5
+nvidia-cusparse-cu12==12.1.0.106
+nvidia-cudnn-cu12==8.9.2.26
+ml-dtypes==0.2.0
+markdown-it-py==3.0.0
+jsonlines==4.0.0
+json-numpy==2.1.1
+Jinja2==3.1.6
+h5py==3.14.0
+google-pasta==0.2.0
+gitdb==4.0.12
+dm-tree==0.1.9
+contourpy==1.3.2
+astunparse==1.6.3
+rich==14.1.0
+requests-oauthlib==2.0.0
+pydantic==2.11.7
+nvidia-cusolver-cu12==11.4.5.107
+matplotlib==3.10.5
+huggingface-hub==0.34.3
+google-auth==2.40.3
+GitPython==3.1.45
+draccus==0.8.0
+wandb==0.21.0
+torch==2.2.0
+tokenizers==0.19.1
+google-auth-oauthlib==1.2.2
+array_record==0.7.2
+transformers==4.40.1
+torchvision==0.17.0
+torchaudio==2.2.0
+tensorboard==2.15.2
+accelerate==1.9.0
+timm==0.9.10
+tensorflow-datasets==4.9.3
+tensorflow==2.15.0
+peft==0.11.1
+tensorflow-graphics==2021.12.3
+dlimp==0.0.1
+openvla==0.0.3
+ninja==1.11.1.4
+flash-attn==2.5.5
+autocommand==2.2.2
+backports.tarfile==1.2.0
+importlib_metadata==8.0.0
+inflect==7.3.1
+jaraco.collections==5.1.0
+jaraco.context==5.3.0
+jaraco.functools==4.0.1
+jaraco.text==3.12.1
+more-itertools==10.3.0
+packaging==24.2
+platformdirs==4.2.2
+tomli==2.0.1
+typeguard==4.3.0
+typing_extensions==4.12.2
+wheel==0.45.1
+zipp==3.19.2

wandb/run-20250820_095524-3yyycq6f/files/wandb-metadata.json ADDED Viewed

	@@ -0,0 +1,119 @@

+{
+  "os":  "Linux-6.8.0-60-generic-x86_64-with-glibc2.35",
+  "python":  "CPython 3.10.18",
+  "startedAt":  "2025-08-20T09:55:24.723538Z",
+  "args":  [
+    "--pretrained_checkpoint",
+    "Stanford-ILIAD/prism-qwen25-extra-dinosiglip-224px-0_5b",
+    "--model_type",
+    "my_vla_qwen",
+    "--vla.type",
+    "myvla-qwen-224px+mx-mshab",
+    "--vla.expected_world_size",
+    "4",
+    "--vla.global_batch_size",
+    "512",
+    "--vla.per_device_batch_size",
+    "8",
+    "--vla.learning_rate",
+    "2e-5",
+    "--vla.freeze_vision_backbone",
+    "True",
+    "--vla.freeze_llm_backbone",
+    "False",
+    "--vla.use_flow_matching",
+    "False",
+    "--vla.compress_history",
+    "False",
+    "--vla.epochs",
+    "10",
+    "--save_interval",
+    "1000",
+    "--run_id",
+    "bl_multiview_history_depth_set_table"
+  ],
+  "program":  "/lambda/nfs/jianwen-us-midwest-1/tulab/ruisen/myvla/vla-scripts/train.py",
+  "codePath":  "vla-scripts/train.py",
+  "codePathLocal":  "vla-scripts/train.py",
+  "git":  {
+    "remote":  "https://github.com/TRS07170/myvla.git",
+    "commit":  "409e4c9a165115624c271028e9b3ee335991b747"
+  },
+  "email":  "traysen879@gmail.com",
+  "root":  "myvla_exp/bl_multiview_history_depth_set_table",
+  "host":  "164-152-109-69",
+  "executable":  "/home/ubuntu/jianwen-us-midwest-1/tulab/ruisen/miniconda3/envs/myvla/bin/python3.10",
+  "cpu_count":  240,
+  "cpu_count_logical":  240,
+  "gpu":  "NVIDIA A100-SXM4-80GB",
+  "gpu_count":  8,
+  "disk":  {
+    "/":  {
+      "total":  "20812690710528",
+      "used":  "36591730688"
+    }
+  },
+  "memory":  {
+    "total":  "1902324936704"
+  },
+  "gpu_nvidia":  [
+    {
+      "name":  "NVIDIA A100-SXM4-80GB",
+      "memoryTotal":  "85899345920",
+      "cudaCores":  6912,
+      "architecture":  "Ampere",
+      "uuid":  "GPU-47bfdc91-9dec-cf54-0e0a-aa57ab6fb106"
+    },
+    {
+      "name":  "NVIDIA A100-SXM4-80GB",
+      "memoryTotal":  "85899345920",
+      "cudaCores":  6912,
+      "architecture":  "Ampere",
+      "uuid":  "GPU-55a7184b-b6dc-a8b3-67d5-a65679215c83"
+    },
+    {
+      "name":  "NVIDIA A100-SXM4-80GB",
+      "memoryTotal":  "85899345920",
+      "cudaCores":  6912,
+      "architecture":  "Ampere",
+      "uuid":  "GPU-1de758e0-e4a9-e2e9-027c-17f65db8a69e"
+    },
+    {
+      "name":  "NVIDIA A100-SXM4-80GB",
+      "memoryTotal":  "85899345920",
+      "cudaCores":  6912,
+      "architecture":  "Ampere",
+      "uuid":  "GPU-d7f94efd-7e10-156f-fe37-e505ae7b62b1"
+    },
+    {
+      "name":  "NVIDIA A100-SXM4-80GB",
+      "memoryTotal":  "85899345920",
+      "cudaCores":  6912,
+      "architecture":  "Ampere",
+      "uuid":  "GPU-813530b2-64f0-5fa3-3568-3811977d3b92"
+    },
+    {
+      "name":  "NVIDIA A100-SXM4-80GB",
+      "memoryTotal":  "85899345920",
+      "cudaCores":  6912,
+      "architecture":  "Ampere",
+      "uuid":  "GPU-7eac47dc-0da1-f6b2-d261-8ab3a5d4ed03"
+    },
+    {
+      "name":  "NVIDIA A100-SXM4-80GB",
+      "memoryTotal":  "85899345920",
+      "cudaCores":  6912,
+      "architecture":  "Ampere",
+      "uuid":  "GPU-335150e5-634c-68e2-4930-656c95e62244"
+    },
+    {
+      "name":  "NVIDIA A100-SXM4-80GB",
+      "memoryTotal":  "85899345920",
+      "cudaCores":  6912,
+      "architecture":  "Ampere",
+      "uuid":  "GPU-b3ee08d0-187c-8f80-06d5-c46759764c41"
+    }
+  ],
+  "cudaVersion":  "12.4",
+  "writerId":  "2medvrsam6c7koxygw5woziyxdt2rfka"
+}

wandb/run-20250820_095524-3yyycq6f/files/wandb-summary.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"_wandb":{"runtime":38},"_runtime":38}

wandb/run-20250820_095524-3yyycq6f/logs/debug-core.log ADDED Viewed

	@@ -0,0 +1,14 @@

+{"time":"2025-08-20T09:55:24.937710806Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpmwa54urq/port-3722794.txt","pid":3722794,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
+{"time":"2025-08-20T09:55:24.93971427Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-3722794-3723166-688719886/socket","Net":"unix"}}
+{"time":"2025-08-20T09:55:24.939880473Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":3722794}
+{"time":"2025-08-20T09:55:25.036546584Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
+{"time":"2025-08-20T09:55:25.054333304Z","level":"INFO","msg":"handleInformInit: received","streamId":"3yyycq6f","id":"1(@)"}
+{"time":"2025-08-20T09:55:25.352508795Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"3yyycq6f","id":"1(@)"}
+{"time":"2025-08-20T09:56:04.105686979Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
+{"time":"2025-08-20T09:56:04.105963355Z","level":"INFO","msg":"connection: closing","id":"1(@)"}
+{"time":"2025-08-20T09:56:04.106114428Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
+{"time":"2025-08-20T09:56:04.106014046Z","level":"INFO","msg":"server is shutting down"}
+{"time":"2025-08-20T09:56:04.10623037Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-3722794-3723166-688719886/socket","Net":"unix"}}
+{"time":"2025-08-20T09:56:04.556001154Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
+{"time":"2025-08-20T09:56:04.556135438Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
+{"time":"2025-08-20T09:56:04.556149367Z","level":"INFO","msg":"server is closed"}