Henryeahhh commited on
Commit
b00e6cc
·
verified ·
1 Parent(s): e5ec998

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +3 -0
  2. all_flow_matching/glue_best/wandb/wandb/run-20250924_061930-dnrnwv30/files/output.log +0 -0
  3. all_flow_matching/glue_best/wandb/wandb/run-20250924_061930-dnrnwv30/files/wandb-summary.json +1 -0
  4. all_flow_matching/glue_best/wandb/wandb/run-20250924_061930-dnrnwv30/logs/debug-core.log +14 -0
  5. all_flow_matching/glue_best/wandb/wandb/run-20250924_061930-dnrnwv30/logs/debug-internal.log +16 -0
  6. all_flow_matching/glue_best/wandb/wandb/run-20250924_061930-dnrnwv30/logs/debug.log +0 -0
  7. all_flow_matching/glue_best/wandb/wandb/run-20250924_062357-hmmpns57/files/output.log +0 -0
  8. all_flow_matching/glue_best/wandb/wandb/run-20250924_062357-hmmpns57/files/wandb-summary.json +1 -0
  9. all_flow_matching/glue_best/wandb/wandb/run-20250924_062357-hmmpns57/logs/debug-core.log +14 -0
  10. all_flow_matching/glue_best/wandb/wandb/run-20250924_062357-hmmpns57/logs/debug-internal.log +16 -0
  11. all_flow_matching/glue_best/wandb/wandb/run-20250924_062357-hmmpns57/logs/debug.log +0 -0
  12. all_flow_matching/glue_best/wandb/wandb/run-20250924_063128-wtatxotn/files/requirements.txt +283 -0
  13. all_flow_matching/glue_best/wandb/wandb/run-20250924_063128-wtatxotn/files/wandb-metadata.json +204 -0
  14. all_flow_matching/glue_best/wandb/wandb/run-20250924_063128-wtatxotn/files/wandb-summary.json +1 -0
  15. all_flow_matching/glue_best/wandb/wandb/run-20250924_063128-wtatxotn/logs/debug-core.log +14 -0
  16. all_flow_matching/glue_best/wandb/wandb/run-20250924_063128-wtatxotn/logs/debug-internal.log +11 -0
  17. all_flow_matching/glue_best/wandb/wandb/run-20250924_063128-wtatxotn/logs/debug.log +1 -0
  18. all_flow_matching/glue_best/wandb/wandb/run-20250924_063642-6tj2c8pr/logs/debug-core.log +14 -0
  19. all_flow_matching/glue_best/wandb/wandb/run-20250924_063642-6tj2c8pr/logs/debug.log +1 -0
  20. all_l1/step8000-action-head/metadata.pt +3 -0
  21. all_l1/step8500-action-head/metadata.pt +3 -0
  22. all_l1/step8500-unsharded/config.yaml +322 -0
  23. all_l1/step8500-unsharded/lora.pt +3 -0
  24. all_l1/step8500-unsharded/train.pt +3 -0
  25. all_l1/wandb/wandb/debug-internal.log +9 -0
  26. all_l1/wandb/wandb/debug.log +0 -0
  27. all_l1/wandb/wandb/run-20250929_170512-2lq20p1f/files/config.yaml +621 -0
  28. all_l1/wandb/wandb/run-20250929_170512-2lq20p1f/files/output.log +122 -0
  29. all_l1/wandb/wandb/run-20250929_170512-2lq20p1f/files/requirements.txt +284 -0
  30. all_l1/wandb/wandb/run-20250929_170512-2lq20p1f/files/wandb-metadata.json +202 -0
  31. all_l1/wandb/wandb/run-20250929_170512-2lq20p1f/files/wandb-summary.json +1 -0
  32. all_l1/wandb/wandb/run-20250929_170512-2lq20p1f/logs/debug-core.log +16 -0
  33. all_l1/wandb/wandb/run-20250929_170512-2lq20p1f/logs/debug-internal.log +12 -0
  34. all_l1/wandb/wandb/run-20250929_170512-2lq20p1f/logs/debug.log +0 -0
  35. all_l1/wandb/wandb/run-20250929_170512-2lq20p1f/run-2lq20p1f.wandb +0 -0
  36. all_l1/wandb/wandb/run-20250930_085206-50kj35c0/files/config.yaml +617 -0
  37. all_l1/wandb/wandb/run-20250930_085206-50kj35c0/files/output.log +86 -0
  38. all_l1/wandb/wandb/run-20250930_085206-50kj35c0/files/requirements.txt +284 -0
  39. all_l1/wandb/wandb/run-20250930_085206-50kj35c0/files/wandb-metadata.json +202 -0
  40. all_l1/wandb/wandb/run-20250930_085206-50kj35c0/files/wandb-summary.json +1 -0
  41. all_l1/wandb/wandb/run-20250930_085206-50kj35c0/logs/debug-core.log +14 -0
  42. all_l1/wandb/wandb/run-20250930_085206-50kj35c0/logs/debug-internal.log +11 -0
  43. all_l1/wandb/wandb/run-20250930_085206-50kj35c0/logs/debug.log +1 -0
  44. all_l1/wandb/wandb/run-20250930_085206-50kj35c0/run-50kj35c0.wandb +0 -0
  45. all_l1/wandb/wandb/run-20250930_090155-w2yi62pb/files/config.yaml +617 -0
  46. all_l1/wandb/wandb/run-20250930_090155-w2yi62pb/files/output.log +85 -0
  47. all_l1/wandb/wandb/run-20250930_090155-w2yi62pb/files/requirements.txt +284 -0
  48. all_l1/wandb/wandb/run-20250930_090155-w2yi62pb/files/wandb-metadata.json +202 -0
  49. all_l1/wandb/wandb/run-20250930_090155-w2yi62pb/files/wandb-summary.json +1 -0
  50. all_l1/wandb/wandb/run-20250930_090155-w2yi62pb/logs/debug-core.log +14 -0
.gitattributes CHANGED
@@ -39,3 +39,6 @@ wandb/wandb/run-20251002_155441-70dhy5dq/run-70dhy5dq.wandb filter=lfs diff=lfs
39
  wandb/wandb/run-20251002_150921-kqbx0cjv/run-kqbx0cjv.wandb filter=lfs diff=lfs merge=lfs -text
40
  cleandesk_l1_regression/wandb/wandb/run-20251008_163831-76mxu43t/run-76mxu43t.wandb filter=lfs diff=lfs merge=lfs -text
41
  wipe_l1_regression/wandb/wandb/run-20251005_163743-a1znetn8/run-a1znetn8.wandb filter=lfs diff=lfs merge=lfs -text
 
 
 
 
39
  wandb/wandb/run-20251002_150921-kqbx0cjv/run-kqbx0cjv.wandb filter=lfs diff=lfs merge=lfs -text
40
  cleandesk_l1_regression/wandb/wandb/run-20251008_163831-76mxu43t/run-76mxu43t.wandb filter=lfs diff=lfs merge=lfs -text
41
  wipe_l1_regression/wandb/wandb/run-20251005_163743-a1znetn8/run-a1znetn8.wandb filter=lfs diff=lfs merge=lfs -text
42
+ cleandesk50_flow_matching/wandb/wandb/run-20251008_163834-quokv8gn/run-quokv8gn.wandb filter=lfs diff=lfs merge=lfs -text
43
+ cleandesk50_l1_regression/wandb/wandb/run-20251008_163831-fqdwkc8m/run-fqdwkc8m.wandb filter=lfs diff=lfs merge=lfs -text
44
+ glue_flow_matching/wandb/wandb/run-20251002_163728-tmwli25x/run-tmwli25x.wandb filter=lfs diff=lfs merge=lfs -text
all_flow_matching/glue_best/wandb/wandb/run-20250924_061930-dnrnwv30/files/output.log ADDED
File without changes
all_flow_matching/glue_best/wandb/wandb/run-20250924_061930-dnrnwv30/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"_wandb":{"runtime":0},"_runtime":0}
all_flow_matching/glue_best/wandb/wandb/run-20250924_061930-dnrnwv30/logs/debug-core.log ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-09-24T06:19:30.419454454Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmph06ly_es/port-2188820.txt","pid":2188820,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
2
+ {"time":"2025-09-24T06:19:30.420467727Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":2188820}
3
+ {"time":"2025-09-24T06:19:30.42153553Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2188820-2188995-771080915/socket","Net":"unix"}}
4
+ {"time":"2025-09-24T06:19:30.607757412Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
5
+ {"time":"2025-09-24T06:19:30.624290644Z","level":"INFO","msg":"handleInformInit: received","streamId":"dnrnwv30","id":"1(@)"}
6
+ {"time":"2025-09-24T06:19:31.778152452Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"dnrnwv30","id":"1(@)"}
7
+ {"time":"2025-09-24T06:19:32.042400183Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
8
+ {"time":"2025-09-24T06:19:32.042449263Z","level":"INFO","msg":"server is shutting down"}
9
+ {"time":"2025-09-24T06:19:32.042441673Z","level":"INFO","msg":"connection: closing","id":"1(@)"}
10
+ {"time":"2025-09-24T06:19:32.042538224Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2188820-2188995-771080915/socket","Net":"unix"}}
11
+ {"time":"2025-09-24T06:19:32.042560085Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
12
+ {"time":"2025-09-24T06:19:32.340314533Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
13
+ {"time":"2025-09-24T06:19:32.340328313Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
14
+ {"time":"2025-09-24T06:19:32.340337613Z","level":"INFO","msg":"server is closed"}
all_flow_matching/glue_best/wandb/wandb/run-20250924_061930-dnrnwv30/logs/debug-internal.log ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-09-24T06:19:30.626115817Z","level":"INFO","msg":"stream: starting","core version":"0.21.4"}
2
+ {"time":"2025-09-24T06:19:31.778111872Z","level":"INFO","msg":"stream: created new stream","id":"dnrnwv30"}
3
+ {"time":"2025-09-24T06:19:31.778147652Z","level":"INFO","msg":"stream: started","id":"dnrnwv30"}
4
+ {"time":"2025-09-24T06:19:31.778168212Z","level":"INFO","msg":"writer: started","stream_id":"dnrnwv30"}
5
+ {"time":"2025-09-24T06:19:31.778194323Z","level":"INFO","msg":"sender: started","stream_id":"dnrnwv30"}
6
+ {"time":"2025-09-24T06:19:31.778192543Z","level":"INFO","msg":"handler: started","stream_id":"dnrnwv30"}
7
+ {"time":"2025-09-24T06:19:32.023079005Z","level":"ERROR","msg":"HTTP error","status":403,"method":"POST","url":"https://api.wandb.ai/graphql"}
8
+ {"time":"2025-09-24T06:19:32.023158306Z","level":"ERROR","msg":"runupserter: failed to init run","error":"returned error 403: {\"data\":{\"upsertBucket\":null},\"errors\":[{\"message\":\"permission denied\",\"path\":[\"upsertBucket\"],\"extensions\":{\"code\":\"PERMISSION_ERROR\"}}]}"}
9
+ {"time":"2025-09-24T06:19:32.042439853Z","level":"INFO","msg":"stream: closing","id":"dnrnwv30"}
10
+ {"time":"2025-09-24T06:19:32.048537861Z","level":"ERROR","msg":"sender: uploadConfigFile: stream: no run"}
11
+ {"time":"2025-09-24T06:19:32.332555964Z","level":"ERROR","msg":"HTTP error","status":404,"method":"POST","url":"https://api.wandb.ai/graphql"}
12
+ {"time":"2025-09-24T06:19:32.332627655Z","level":"ERROR","msg":"runfiles: CreateRunFiles returned error: returned error 404: {\"data\":{\"createRunFiles\":null},\"errors\":[{\"message\":\"run a1-vla-xiaodan/dnrnwv30 not found during createRunFiles\",\"path\":[\"createRunFiles\"]}]}"}
13
+ {"time":"2025-09-24T06:19:32.336908289Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
14
+ {"time":"2025-09-24T06:19:32.33694275Z","level":"INFO","msg":"handler: closed","stream_id":"dnrnwv30"}
15
+ {"time":"2025-09-24T06:19:32.338990606Z","level":"INFO","msg":"sender: closed","stream_id":"dnrnwv30"}
16
+ {"time":"2025-09-24T06:19:32.338998046Z","level":"INFO","msg":"stream: closed","id":"dnrnwv30"}
all_flow_matching/glue_best/wandb/wandb/run-20250924_061930-dnrnwv30/logs/debug.log ADDED
File without changes
all_flow_matching/glue_best/wandb/wandb/run-20250924_062357-hmmpns57/files/output.log ADDED
File without changes
all_flow_matching/glue_best/wandb/wandb/run-20250924_062357-hmmpns57/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"_wandb":{"runtime":0},"_runtime":0}
all_flow_matching/glue_best/wandb/wandb/run-20250924_062357-hmmpns57/logs/debug-core.log ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-09-24T06:23:57.33371267Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmp7mdgm6nn/port-2190130.txt","pid":2190130,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
2
+ {"time":"2025-09-24T06:23:57.334085295Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":2190130}
3
+ {"time":"2025-09-24T06:23:57.334062305Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2190130-2190294-2704513851/socket","Net":"unix"}}
4
+ {"time":"2025-09-24T06:23:57.511386371Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
5
+ {"time":"2025-09-24T06:23:57.517858166Z","level":"INFO","msg":"handleInformInit: received","streamId":"hmmpns57","id":"1(@)"}
6
+ {"time":"2025-09-24T06:23:58.547393843Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"hmmpns57","id":"1(@)"}
7
+ {"time":"2025-09-24T06:23:58.805591645Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
8
+ {"time":"2025-09-24T06:23:58.805637046Z","level":"INFO","msg":"connection: closing","id":"1(@)"}
9
+ {"time":"2025-09-24T06:23:58.805655116Z","level":"INFO","msg":"server is shutting down"}
10
+ {"time":"2025-09-24T06:23:58.805681387Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
11
+ {"time":"2025-09-24T06:23:58.805837029Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2190130-2190294-2704513851/socket","Net":"unix"}}
12
+ {"time":"2025-09-24T06:23:59.098817455Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
13
+ {"time":"2025-09-24T06:23:59.098836546Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
14
+ {"time":"2025-09-24T06:23:59.098846216Z","level":"INFO","msg":"server is closed"}
all_flow_matching/glue_best/wandb/wandb/run-20250924_062357-hmmpns57/logs/debug-internal.log ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-09-24T06:23:57.51970738Z","level":"INFO","msg":"stream: starting","core version":"0.21.4"}
2
+ {"time":"2025-09-24T06:23:58.547344712Z","level":"INFO","msg":"stream: created new stream","id":"hmmpns57"}
3
+ {"time":"2025-09-24T06:23:58.547388123Z","level":"INFO","msg":"stream: started","id":"hmmpns57"}
4
+ {"time":"2025-09-24T06:23:58.547412053Z","level":"INFO","msg":"writer: started","stream_id":"hmmpns57"}
5
+ {"time":"2025-09-24T06:23:58.547423083Z","level":"INFO","msg":"handler: started","stream_id":"hmmpns57"}
6
+ {"time":"2025-09-24T06:23:58.547448154Z","level":"INFO","msg":"sender: started","stream_id":"hmmpns57"}
7
+ {"time":"2025-09-24T06:23:58.797771793Z","level":"ERROR","msg":"HTTP error","status":403,"method":"POST","url":"https://api.wandb.ai/graphql"}
8
+ {"time":"2025-09-24T06:23:58.797893015Z","level":"ERROR","msg":"runupserter: failed to init run","error":"returned error 403: {\"data\":{\"upsertBucket\":null},\"errors\":[{\"message\":\"permission denied\",\"path\":[\"upsertBucket\"],\"extensions\":{\"code\":\"PERMISSION_ERROR\"}}]}"}
9
+ {"time":"2025-09-24T06:23:58.805654996Z","level":"INFO","msg":"stream: closing","id":"hmmpns57"}
10
+ {"time":"2025-09-24T06:23:58.809349995Z","level":"ERROR","msg":"sender: uploadConfigFile: stream: no run"}
11
+ {"time":"2025-09-24T06:23:59.095800216Z","level":"ERROR","msg":"HTTP error","status":404,"method":"POST","url":"https://api.wandb.ai/graphql"}
12
+ {"time":"2025-09-24T06:23:59.095886847Z","level":"ERROR","msg":"runfiles: CreateRunFiles returned error: returned error 404: {\"data\":{\"createRunFiles\":null},\"errors\":[{\"message\":\"run a1-vla-xiaodan/hmmpns57 not found during createRunFiles\",\"path\":[\"createRunFiles\"]}]}"}
13
+ {"time":"2025-09-24T06:23:59.096209701Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
14
+ {"time":"2025-09-24T06:23:59.096244302Z","level":"INFO","msg":"handler: closed","stream_id":"hmmpns57"}
15
+ {"time":"2025-09-24T06:23:59.097755731Z","level":"INFO","msg":"sender: closed","stream_id":"hmmpns57"}
16
+ {"time":"2025-09-24T06:23:59.097762801Z","level":"INFO","msg":"stream: closed","id":"hmmpns57"}
all_flow_matching/glue_best/wandb/wandb/run-20250924_062357-hmmpns57/logs/debug.log ADDED
File without changes
all_flow_matching/glue_best/wandb/wandb/run-20250924_063128-wtatxotn/files/requirements.txt ADDED
@@ -0,0 +1,283 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ai2-molmo==0.0.0
2
+ astunparse==1.6.3
3
+ flatbuffers==25.2.10
4
+ gast==0.6.0
5
+ google-pasta==0.2.0
6
+ h5py==3.14.0
7
+ libclang==18.1.1
8
+ Markdown==3.9
9
+ namex==0.1.0
10
+ opt_einsum==3.4.0
11
+ optree==0.17.0
12
+ tensorboard-data-server==0.7.2
13
+ tensorflow-io-gcs-filesystem==0.37.1
14
+ tensorflow-rocm==2.16.2
15
+ termcolor==3.1.0
16
+ Werkzeug==3.1.3
17
+ Brotli==1.1.0
18
+ Farama-Notifications==0.0.4
19
+ MarkupSafe==2.1.5
20
+ PyYAML==6.0.2
21
+ absl-py==2.3.1
22
+ accelerate==1.10.1
23
+ ai2-molmo==0.0.0
24
+ aiofiles==24.1.0
25
+ aiohappyeyeballs==2.6.1
26
+ aiohttp==3.12.15
27
+ aiosignal==1.4.0
28
+ annotated-types==0.7.0
29
+ antlr4-python3-runtime==4.9.3
30
+ anyio==4.10.0
31
+ array_record==0.8.1
32
+ async-timeout==5.0.1
33
+ attrs==25.3.0
34
+ av==15.1.0
35
+ backports.tarfile==1.2.0
36
+ beaker-gantry==3.2.0
37
+ beaker-py==2.5.0
38
+ black==23.12.1
39
+ boltons==25.0.0
40
+ boto3==1.40.33
41
+ botocore==1.40.33
42
+ build==1.3.0
43
+ cached_path==1.7.3
44
+ cached-property==2.0.1
45
+ cachetools==5.5.2
46
+ certifi==2025.8.3
47
+ cffi==2.0.0
48
+ charset-normalizer==3.4.3
49
+ click==8.2.1
50
+ click-help-colors==0.9.4
51
+ click-option-group==0.5.7
52
+ cloudpickle==3.1.1
53
+ cmake==4.1.0
54
+ contourpy==1.3.2
55
+ cryptography==46.0.1
56
+ cycler==0.12.1
57
+ dataclass-extensions==0.2.3
58
+ datasets==3.6.0
59
+ decorator==5.2.1
60
+ deepdiff==8.6.1
61
+ diffusers==0.35.1
62
+ dill==0.3.8
63
+ distro==1.9.0
64
+ dlimp==0.0.1
65
+ dm-tree==0.1.9
66
+ docutils==0.22.1
67
+ draccus==0.10.0
68
+ editdistance==0.8.1
69
+ einops==0.8.1
70
+ einops-exts==0.0.4
71
+ etils==1.13.0
72
+ evdev==1.9.2
73
+ exceptiongroup==1.3.0
74
+ face==24.0.0
75
+ fastapi==0.116.2
76
+ ffmpy==0.6.1
77
+ fiddle==0.3.0
78
+ filelock==3.13.1
79
+ fonttools==4.60.0
80
+ frozenlist==1.7.0
81
+ fsspec==2023.9.2
82
+ ftfy==6.3.1
83
+ gcsfs==2023.9.2
84
+ gitdb==4.0.12
85
+ GitPython==3.1.45
86
+ glom==24.11.0
87
+ google-api-core==2.25.1
88
+ google-auth==2.40.3
89
+ google-auth-oauthlib==1.2.2
90
+ google-cloud-core==2.4.3
91
+ google-cloud-storage==2.19.0
92
+ google-crc32c==1.7.1
93
+ google-resumable-media==2.7.2
94
+ googleapis-common-protos==1.70.0
95
+ gradio==5.46.0
96
+ gradio_client==1.13.0
97
+ graphviz==0.21
98
+ groovy==0.1.2
99
+ grpcio==1.75.0
100
+ gymnasium==0.29.1
101
+ h11==0.16.0
102
+ hf_transfer==0.1.9
103
+ hf-xet==1.1.10
104
+ httpcore==1.0.9
105
+ httpx==0.28.1
106
+ huggingface-hub==0.35.0
107
+ id==1.5.0
108
+ idna==3.10
109
+ imageio==2.37.0
110
+ imageio-ffmpeg==0.6.0
111
+ importlib_metadata==8.7.0
112
+ importlib_resources==6.5.2
113
+ iniconfig==2.1.0
114
+ inquirerpy==0.3.4
115
+ isort==5.12.0
116
+ jaraco.classes==3.4.0
117
+ jaraco.context==6.0.1
118
+ jaraco.functools==4.3.0
119
+ jeepney==0.9.0
120
+ Jinja2==3.1.4
121
+ jiter==0.11.0
122
+ jmespath==1.0.1
123
+ joblib==1.5.2
124
+ jsonlines==4.0.0
125
+ keras==2.15.0
126
+ keyring==25.6.0
127
+ kiwisolver==1.4.9
128
+ latex2sympy2_extended==1.10.2
129
+ lerobot==0.3.4
130
+ Levenshtein==0.27.1
131
+ libcst==1.8.4
132
+ lightning-utilities==0.15.2
133
+ markdown-it-py==4.0.0
134
+ math-verify==0.8.0
135
+ matplotlib==3.10.6
136
+ mdurl==0.1.2
137
+ mergedeep==1.3.4
138
+ ml-dtypes==0.2.0
139
+ ml_dtypes==0.5.3
140
+ more-itertools==10.8.0
141
+ mpmath==1.3.0
142
+ msgspec==0.19.0
143
+ multidict==6.6.4
144
+ multiprocess==0.70.16
145
+ mypy==1.3.0
146
+ mypy_extensions==1.1.0
147
+ necessary==0.4.3
148
+ networkx==3.3
149
+ nh3==0.3.0
150
+ nltk==3.9.1
151
+ numpy==1.26.4
152
+ numpy==2.2.6
153
+ oauthlib==3.3.1
154
+ omegaconf==2.3.0
155
+ openai==1.108.0
156
+ opencv-python-headless==4.12.0.88
157
+ OpenEXR==3.4.0
158
+ orderly-set==5.5.0
159
+ orjson==3.11.3
160
+ packaging==25.0
161
+ pandas==2.3.2
162
+ pathspec==0.12.1
163
+ petname==2.6
164
+ pfzy==0.3.4
165
+ pillow==11.0.0
166
+ pip==25.2
167
+ platformdirs==4.4.0
168
+ pluggy==1.6.0
169
+ promise==2.3
170
+ prompt_toolkit==3.0.52
171
+ propcache==0.3.2
172
+ proto-plus==1.26.1
173
+ protobuf==4.21.12
174
+ protobuf==6.32.1
175
+ psutil==7.1.0
176
+ pyarrow==21.0.0
177
+ pyasn1==0.6.1
178
+ pyasn1_modules==0.4.2
179
+ pycparser==2.23
180
+ pydantic==2.11.9
181
+ pydantic_core==2.33.2
182
+ pydub==0.25.1
183
+ Pygments==2.19.2
184
+ pynput==1.8.1
185
+ pyparsing==3.2.4
186
+ pyproject_hooks==1.2.0
187
+ pyserial==3.5
188
+ pytest==8.4.2
189
+ pytest-sphinx==0.6.3
190
+ python-dateutil==2.9.0.post0
191
+ python-Levenshtein==0.27.1
192
+ python-multipart==0.0.20
193
+ python-xlib==0.33
194
+ pytorch-triton-rocm==3.4.0
195
+ pytz==2025.2
196
+ pyyaml-include==1.4.1
197
+ RapidFuzz==3.14.1
198
+ readme_renderer==44.0
199
+ regex==2025.9.1
200
+ requests==2.32.5
201
+ requests-oauthlib==2.0.0
202
+ requests-toolbelt==1.0.0
203
+ requirements-parser==0.13.0
204
+ rerun-sdk==0.22.1
205
+ rfc3986==2.0.0
206
+ rich==13.9.4
207
+ rsa==4.9.1
208
+ ruff==0.13.0
209
+ s3transfer==0.14.0
210
+ safehttpx==0.1.6
211
+ safetensors==0.6.2
212
+ scikit-learn==1.7.2
213
+ scipy==1.15.3
214
+ SecretStorage==3.4.0
215
+ semantic-version==2.10.0
216
+ sentencepiece==0.2.1
217
+ sentry-sdk==2.38.0
218
+ setuptools==78.1.1
219
+ shellingham==1.5.4
220
+ six==1.17.0
221
+ smart_open==7.3.1
222
+ smashed==0.21.5
223
+ smmap==5.0.2
224
+ sniffio==1.3.1
225
+ starlette==0.48.0
226
+ sympy==1.13.3
227
+ tensorboard==2.15.2
228
+ tensorboard==2.19.0
229
+ tensorflow==2.15.0
230
+ tensorflow-addons==0.23.0
231
+ tensorflow-datasets==4.9.3
232
+ tensorflow-estimator==2.15.0
233
+ tensorflow-graphics==2021.12.3
234
+ tensorflow-metadata==1.17.2
235
+ threadpoolctl==3.6.0
236
+ timm==1.0.19
237
+ tokenizers==0.22.0
238
+ toml==0.10.2
239
+ tomli==2.2.1
240
+ tomlkit==0.13.3
241
+ torch==2.8.0+rocm6.4
242
+ torchcodec==0.5
243
+ torchmetrics==1.8.2
244
+ torchvision==0.23.0+rocm6.4
245
+ tqdm==4.67.1
246
+ transformers==4.56.1
247
+ trimesh==4.8.2
248
+ trouting==0.3.3
249
+ twine==6.2.0
250
+ typeguard==2.13.3
251
+ typer==0.17.4
252
+ typing_extensions==4.15.0
253
+ typing-inspect==0.9.0
254
+ typing-inspection==0.4.1
255
+ tzdata==2025.2
256
+ urllib3==2.5.0
257
+ uvicorn==0.35.0
258
+ wandb==0.21.4
259
+ wcwidth==0.2.13
260
+ websockets==15.0.1
261
+ wheel==0.45.1
262
+ wrapt==1.14.2
263
+ xxhash==3.5.0
264
+ yarl==1.20.1
265
+ zipp==3.23.0
266
+ lerobot==0.3.4
267
+ minLoRA==0.1.0
268
+ autocommand==2.2.2
269
+ backports.tarfile==1.2.0
270
+ importlib_metadata==8.0.0
271
+ inflect==7.3.1
272
+ jaraco.collections==5.1.0
273
+ jaraco.context==5.3.0
274
+ jaraco.functools==4.0.1
275
+ jaraco.text==3.12.1
276
+ more-itertools==10.3.0
277
+ packaging==24.2
278
+ platformdirs==4.2.2
279
+ tomli==2.0.1
280
+ typeguard==4.3.0
281
+ typing_extensions==4.12.2
282
+ wheel==0.45.1
283
+ zipp==3.19.2
all_flow_matching/glue_best/wandb/wandb/run-20250924_063128-wtatxotn/files/wandb-metadata.json ADDED
@@ -0,0 +1,204 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.15.0-140-generic-x86_64-with-glibc2.35",
3
+ "python": "CPython 3.10.18",
4
+ "startedAt": "2025-09-24T06:31:28.005264Z",
5
+ "args": [
6
+ "qwen2_7b",
7
+ "save_folder=/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt",
8
+ "--vision_backbone",
9
+ "openai",
10
+ "--action_head",
11
+ "flow_matching",
12
+ "--seq_len",
13
+ "768",
14
+ "--lora_rank",
15
+ "32",
16
+ "--lora_llm",
17
+ "--checkpoint",
18
+ "/vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924",
19
+ "--device_train_microbatch_size",
20
+ "22",
21
+ "--global_batch_size",
22
+ "176",
23
+ "--dataset",
24
+ "vla_dataset_realworld",
25
+ "--llm_learning_rate",
26
+ "5e-5",
27
+ "--wandb_entity",
28
+ "henryeap",
29
+ "--wandb_project",
30
+ "a1-realworld",
31
+ "--wandb_run_name",
32
+ "glue",
33
+ "--save_overwrite"
34
+ ],
35
+ "program": "/vast/users/xiaodan/zhangjian/A1/launch_scripts/train_vla.py",
36
+ "codePath": "launch_scripts/train_vla.py",
37
+ "codePathLocal": "launch_scripts/train_vla.py",
38
+ "git": {
39
+ "remote": "https://github.com/Spatialtemporal-AI/A1.git",
40
+ "commit": "c13f2763af61e0d729a8b5ab4bdefc512205bcc5"
41
+ },
42
+ "email": "ihenrykwok@outlook.com",
43
+ "root": "/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/wandb",
44
+ "host": "auh7-1b-gpu-188",
45
+ "executable": "/vast/users/xiaodan/miniconda3/envs/a1/bin/python3.10",
46
+ "cpu_count": 64,
47
+ "cpu_count_logical": 128,
48
+ "gpu": "Instinct MI210",
49
+ "gpu_count": 8,
50
+ "disk": {
51
+ "/": {
52
+ "total": "470343073792",
53
+ "used": "51147874304"
54
+ }
55
+ },
56
+ "memory": {
57
+ "total": "2434606952448"
58
+ },
59
+ "gpu_amd": [
60
+ {
61
+ "id": "5",
62
+ "uniqueId": "0x137c9ede1bb1518e",
63
+ "vbiosVersion": "113-D67301V-073",
64
+ "performanceLevel": "auto",
65
+ "maxPower": "300.0",
66
+ "series": "Instinct MI210",
67
+ "model": "0x740f",
68
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
69
+ "sku": "D67301V",
70
+ "sclkRange": "500Mhz - 1700Mhz",
71
+ "mclkRange": "400Mhz - 1600Mhz"
72
+ },
73
+ {
74
+ "id": "7",
75
+ "uniqueId": "0x21a2e88d06c419dc",
76
+ "vbiosVersion": "113-D67301V-073",
77
+ "performanceLevel": "auto",
78
+ "maxPower": "300.0",
79
+ "series": "Instinct MI210",
80
+ "model": "0x740f",
81
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
82
+ "sku": "D67301V",
83
+ "sclkRange": "500Mhz - 1700Mhz",
84
+ "mclkRange": "400Mhz - 1600Mhz"
85
+ },
86
+ {
87
+ "id": "2",
88
+ "uniqueId": "0x399226d2b2bfa544",
89
+ "vbiosVersion": "113-D67301V-073",
90
+ "performanceLevel": "auto",
91
+ "maxPower": "300.0",
92
+ "series": "Instinct MI210",
93
+ "model": "0x740f",
94
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
95
+ "sku": "D67301V",
96
+ "sclkRange": "500Mhz - 1700Mhz",
97
+ "mclkRange": "400Mhz - 1600Mhz"
98
+ },
99
+ {
100
+ "id": "0",
101
+ "uniqueId": "0x3558c3014c813fdb",
102
+ "vbiosVersion": "113-D67301V-073",
103
+ "performanceLevel": "auto",
104
+ "maxPower": "300.0",
105
+ "series": "Instinct MI210",
106
+ "model": "0x740f",
107
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
108
+ "sku": "D67301V",
109
+ "sclkRange": "500Mhz - 1700Mhz",
110
+ "mclkRange": "400Mhz - 1600Mhz"
111
+ },
112
+ {
113
+ "id": "3",
114
+ "uniqueId": "0xf61ec17df11883bd",
115
+ "vbiosVersion": "113-D67301V-073",
116
+ "performanceLevel": "auto",
117
+ "maxPower": "300.0",
118
+ "series": "Instinct MI210",
119
+ "model": "0x740f",
120
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
121
+ "sku": "D67301V",
122
+ "sclkRange": "500Mhz - 1700Mhz",
123
+ "mclkRange": "400Mhz - 1600Mhz"
124
+ },
125
+ {
126
+ "id": "1",
127
+ "uniqueId": "0x9b5c1c302c8129f8",
128
+ "vbiosVersion": "113-D67301V-073",
129
+ "performanceLevel": "auto",
130
+ "maxPower": "300.0",
131
+ "series": "Instinct MI210",
132
+ "model": "0x740f",
133
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
134
+ "sku": "D67301V",
135
+ "sclkRange": "500Mhz - 1700Mhz",
136
+ "mclkRange": "400Mhz - 1600Mhz"
137
+ },
138
+ {
139
+ "id": "6",
140
+ "uniqueId": "0xfa8b85a4625b04f",
141
+ "vbiosVersion": "113-D67301V-073",
142
+ "performanceLevel": "auto",
143
+ "maxPower": "300.0",
144
+ "series": "Instinct MI210",
145
+ "model": "0x740f",
146
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
147
+ "sku": "D67301V",
148
+ "sclkRange": "500Mhz - 1700Mhz",
149
+ "mclkRange": "400Mhz - 1600Mhz"
150
+ },
151
+ {
152
+ "id": "4",
153
+ "uniqueId": "0xa515afd8ced1d39d",
154
+ "vbiosVersion": "113-D67301V-073",
155
+ "performanceLevel": "auto",
156
+ "maxPower": "300.0",
157
+ "series": "Instinct MI210",
158
+ "model": "0x740f",
159
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
160
+ "sku": "D67301V",
161
+ "sclkRange": "500Mhz - 1700Mhz",
162
+ "mclkRange": "400Mhz - 1600Mhz"
163
+ }
164
+ ],
165
+ "slurm": {
166
+ "cluster_name": "ai-04r",
167
+ "conf": "/etc/slurm/slurm.conf",
168
+ "cpus_on_node": "128",
169
+ "gpus_on_node": "8",
170
+ "gtids": "0",
171
+ "job_account": "faculty-acc",
172
+ "job_cpus_per_node": "128",
173
+ "job_end_time": "1758954648",
174
+ "job_gid": "2000",
175
+ "job_gpus": "0,1,2,3,4,5,6,7",
176
+ "job_id": "1605",
177
+ "job_name": "realworld_mh",
178
+ "job_nodelist": "auh7-1b-gpu-188",
179
+ "job_num_nodes": "1",
180
+ "job_partition": "faculty",
181
+ "job_qos": "xdqos",
182
+ "job_start_time": "1758695448",
183
+ "job_uid": "2013",
184
+ "job_user": "xiaodan",
185
+ "jobid": "1605",
186
+ "localid": "0",
187
+ "nnodes": "1",
188
+ "nodeid": "0",
189
+ "nodelist": "auh7-1b-gpu-188",
190
+ "nprocs": "1",
191
+ "ntasks": "1",
192
+ "ntasks_per_node": "1",
193
+ "oom_kill_step": "0",
194
+ "prio_process": "0",
195
+ "procid": "0",
196
+ "submit_dir": "/vast/users/xiaodan/zhangjian/A1/launch_scripts",
197
+ "submit_host": "auh-1b-cpu-login-001",
198
+ "task_pid": "2191329",
199
+ "tasks_per_node": "1",
200
+ "topology_addr": "auh7-1b-gpu-188",
201
+ "topology_addr_pattern": "node"
202
+ },
203
+ "writerId": "o421nvn5u6ub6ruog26gg83x0g2lmgbt"
204
+ }
all_flow_matching/glue_best/wandb/wandb/run-20250924_063128-wtatxotn/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"_runtime":2,"_wandb":{"runtime":2}}
all_flow_matching/glue_best/wandb/wandb/run-20250924_063128-wtatxotn/logs/debug-core.log ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-09-24T06:31:28.064738272Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpt5x3_6pq/port-2191415.txt","pid":2191415,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
2
+ {"time":"2025-09-24T06:31:28.066318992Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":2191415}
3
+ {"time":"2025-09-24T06:31:28.066291612Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2191415-2191580-2229050380/socket","Net":"unix"}}
4
+ {"time":"2025-09-24T06:31:28.241348786Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
5
+ {"time":"2025-09-24T06:31:28.248471068Z","level":"INFO","msg":"handleInformInit: received","streamId":"wtatxotn","id":"1(@)"}
6
+ {"time":"2025-09-24T06:31:29.271764603Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"wtatxotn","id":"1(@)"}
7
+ {"time":"2025-09-24T06:31:32.368379213Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
8
+ {"time":"2025-09-24T06:31:32.368730117Z","level":"INFO","msg":"server is shutting down"}
9
+ {"time":"2025-09-24T06:31:32.368724477Z","level":"INFO","msg":"connection: closing","id":"1(@)"}
10
+ {"time":"2025-09-24T06:31:32.368772508Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
11
+ {"time":"2025-09-24T06:31:32.368826649Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2191415-2191580-2229050380/socket","Net":"unix"}}
12
+ {"time":"2025-09-24T06:31:33.781258776Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
13
+ {"time":"2025-09-24T06:31:33.781274636Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
14
+ {"time":"2025-09-24T06:31:33.781290036Z","level":"INFO","msg":"server is closed"}
all_flow_matching/glue_best/wandb/wandb/run-20250924_063128-wtatxotn/logs/debug-internal.log ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-09-24T06:31:28.251016151Z","level":"INFO","msg":"stream: starting","core version":"0.21.4"}
2
+ {"time":"2025-09-24T06:31:29.271706933Z","level":"INFO","msg":"stream: created new stream","id":"wtatxotn"}
3
+ {"time":"2025-09-24T06:31:29.271758583Z","level":"INFO","msg":"stream: started","id":"wtatxotn"}
4
+ {"time":"2025-09-24T06:31:29.271781634Z","level":"INFO","msg":"handler: started","stream_id":"wtatxotn"}
5
+ {"time":"2025-09-24T06:31:29.271778354Z","level":"INFO","msg":"writer: started","stream_id":"wtatxotn"}
6
+ {"time":"2025-09-24T06:31:29.271793104Z","level":"INFO","msg":"sender: started","stream_id":"wtatxotn"}
7
+ {"time":"2025-09-24T06:31:32.368726307Z","level":"INFO","msg":"stream: closing","id":"wtatxotn"}
8
+ {"time":"2025-09-24T06:31:33.421915218Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
9
+ {"time":"2025-09-24T06:31:33.778168056Z","level":"INFO","msg":"handler: closed","stream_id":"wtatxotn"}
10
+ {"time":"2025-09-24T06:31:33.780128001Z","level":"INFO","msg":"sender: closed","stream_id":"wtatxotn"}
11
+ {"time":"2025-09-24T06:31:33.780158132Z","level":"INFO","msg":"stream: closed","id":"wtatxotn"}
all_flow_matching/glue_best/wandb/wandb/run-20250924_063128-wtatxotn/logs/debug.log ADDED
@@ -0,0 +1 @@
 
 
1
+ 2025-09-24 06:31:32,368 INFO wandb-AsyncioManager-main:2191415 [service_client.py:_forward_responses():84] Reached EOF.
all_flow_matching/glue_best/wandb/wandb/run-20250924_063642-6tj2c8pr/logs/debug-core.log ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-09-24T06:36:42.858452895Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmphjacd0ae/port-2192743.txt","pid":2192743,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
2
+ {"time":"2025-09-24T06:36:42.859464168Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":2192743}
3
+ {"time":"2025-09-24T06:36:42.859402097Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-2192743-2192908-1607882430/socket","Net":"unix"}}
4
+ {"time":"2025-09-24T06:36:43.043042785Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
5
+ {"time":"2025-09-24T06:36:43.049816992Z","level":"INFO","msg":"handleInformInit: received","streamId":"6tj2c8pr","id":"1(@)"}
6
+ {"time":"2025-09-24T06:36:44.180372147Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"6tj2c8pr","id":"1(@)"}
7
+ {"time":"2025-09-24T06:36:47.096248789Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
8
+ {"time":"2025-09-24T06:36:47.096289969Z","level":"INFO","msg":"connection: closing","id":"1(@)"}
9
+ {"time":"2025-09-24T06:36:47.09631223Z","level":"INFO","msg":"server is shutting down"}
10
+ {"time":"2025-09-24T06:36:47.09637208Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
11
+ {"time":"2025-09-24T06:36:47.096556563Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-2192743-2192908-1607882430/socket","Net":"unix"}}
12
+ {"time":"2025-09-24T06:36:48.708711296Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
13
+ {"time":"2025-09-24T06:36:48.709060661Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
14
+ {"time":"2025-09-24T06:36:48.709072391Z","level":"INFO","msg":"server is closed"}
all_flow_matching/glue_best/wandb/wandb/run-20250924_063642-6tj2c8pr/logs/debug.log ADDED
@@ -0,0 +1 @@
 
 
1
+ 2025-09-24 06:36:47,096 INFO wandb-AsyncioManager-main:2192743 [service_client.py:_forward_responses():84] Reached EOF.
all_l1/step8000-action-head/metadata.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:883bb1160e5c5ece44d882f29247664bba7a98ff980d816e4b7684797ae2d601
3
+ size 1331
all_l1/step8500-action-head/metadata.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80287bb60307f9ac09aacfce4b1df8f509d0f9c0571939e3a38e6f558e2a874c
3
+ size 1331
all_l1/step8500-unsharded/config.yaml ADDED
@@ -0,0 +1,322 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ run_name: realworld_20250930_131219
2
+ seed: 6198
3
+ epoch: null
4
+ dry_run: false
5
+ model:
6
+ d_model: 3584
7
+ n_heads: 28
8
+ n_kv_heads: 4
9
+ qkv_bias: true
10
+ clip_qkv: null
11
+ n_layers: 28
12
+ mlp_ratio: 4
13
+ mlp_hidden_size: 37888
14
+ activation_type: swiglu
15
+ block_type: sequential
16
+ block_group_size: 1
17
+ rope: true
18
+ rope_full_precision: true
19
+ rope_theta: 1000000.0
20
+ vision_backbone:
21
+ image_model_type: openai
22
+ image_default_input_size:
23
+ - 336
24
+ - 336
25
+ image_patch_size: 14
26
+ image_pos_patch_size: 14
27
+ image_emb_dim: 1024
28
+ image_num_heads: 16
29
+ image_num_key_value_heads: 16
30
+ image_num_layers: 23
31
+ image_head_dim: 64
32
+ image_mlp_dim: 4096
33
+ image_mlp_activations: quick_gelu
34
+ image_dropout_rate: 0.0
35
+ image_num_pos: 577
36
+ image_norm_eps: 1.0e-05
37
+ attention_dropout: 0.0
38
+ residual_dropout: 0.0
39
+ initializer_range: 0.02
40
+ fsdp_wrap: false
41
+ resize_mode: default
42
+ vit_load_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/vit-l-14-336.pt
43
+ llm_load_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen2-7b.pt
44
+ low_cpu_fsdp: true
45
+ attention_type: sdpa
46
+ float32_attention: true
47
+ attention_dropout: 0.0
48
+ attention_layer_norm: false
49
+ residual_dropout: 0.1
50
+ response_residual_dropout: 0.0
51
+ embedding_dropout: 0.0
52
+ layer_norm_type: rms
53
+ layer_norm_with_affine: true
54
+ layer_norm_eps: 1.0e-06
55
+ attention_layer_norm_with_affine: true
56
+ max_sequence_length: 4096
57
+ max_position_embeddings: null
58
+ include_bias: false
59
+ bias_for_layer_norm: null
60
+ scale_logits: false
61
+ vocab_size: 152064
62
+ embedding_size: 152064
63
+ ff_out_size: 0
64
+ additional_vocab_size: 128
65
+ new_embedding_init_range: 0.02
66
+ weight_tying: false
67
+ init_device: null
68
+ init_fn: normal
69
+ init_std: 0.02
70
+ init_cutoff_factor: null
71
+ norm_after: false
72
+ precision: amp_bf16
73
+ max_crops: 12
74
+ crop_mode: overlap-and-resize-c2
75
+ use_col_tokens: true
76
+ prompt_type: uber_model
77
+ system_prompt_kind: demo_or_style
78
+ message_formatting: role
79
+ always_start_with_space: true
80
+ multi_annotation_weighting: root_subsegments
81
+ default_inference_len: 65
82
+ overlap_margins:
83
+ - 4
84
+ - 4
85
+ pad_value: 0.0
86
+ image_padding_embed: pad_and_partial_pad
87
+ fix_image_padding: true
88
+ vit_layers:
89
+ - -2
90
+ - -9
91
+ image_pooling_h: 2
92
+ image_pooling_w: 2
93
+ image_pooling_2d: attention_meanq
94
+ image_projector: mlp
95
+ image_feature_dropout: 0.0
96
+ initializer_range: 0.02
97
+ normalize_input_embeds: false
98
+ use_position_ids: true
99
+ head_dim: null
100
+ action_tokenizer:
101
+ identifier: physical-intelligence/fast
102
+ tokenizer_dir: null
103
+ action_dim: 7
104
+ horizon: 8
105
+ tokenizer:
106
+ identifier: Qwen/Qwen2-7B
107
+ tokenizer_dir: null
108
+ pad_tokenizer: true
109
+ moe_num_experts: 8
110
+ moe_top_k: 2
111
+ moe_mlp_impl: sparse
112
+ moe_log_expert_assignment: false
113
+ moe_shared_expert: false
114
+ moe_lbl_in_fp32: false
115
+ moe_interleave: false
116
+ moe_loss_weight: 0.1
117
+ moe_zloss_weight: null
118
+ moe_dropless: true
119
+ moe_capacity_factor: 1.25
120
+ action_head: l1_regression
121
+ num_diffusion_steps: 1000
122
+ num_diffusion_inference_steps: 30
123
+ use_proprio: true
124
+ action_head_dit_hidden_size: 1152
125
+ action_head_dit_depth: 28
126
+ action_head_dit_num_heads: 16
127
+ llm_causal_attention: false
128
+ action_use_left_eef: true
129
+ action_use_mobile_base: false
130
+ allow_resume: false
131
+ ft_llm: true
132
+ ft_vit: false
133
+ ft_connector: false
134
+ ft_embedding: lm_head
135
+ lora: false
136
+ use_lora: true
137
+ lora_rank: 8
138
+ lora_llm: false
139
+ lora_vit: false
140
+ lora_connector: false
141
+ early_exit: false
142
+ train_exit_random_layer: false
143
+ optimizer:
144
+ name: adamw
145
+ learning_rate: 0.0001
146
+ weight_decay: 0.01
147
+ betas:
148
+ - 0.9
149
+ - 0.95
150
+ eps: 1.0e-05
151
+ connector_learning_rate: 0.0002
152
+ vit_learning_rate: 6.0e-06
153
+ llm_learning_rate: 5.0e-05
154
+ connector_weight_decay: 0.0
155
+ vit_weight_decay: 0.0
156
+ llm_weight_decay: 0.0
157
+ connector_betas:
158
+ - 0.9
159
+ - 0.95
160
+ vit_betas:
161
+ - 0.9
162
+ - 0.95
163
+ llm_betas:
164
+ - 0.9
165
+ - 0.95
166
+ connector_eps: 1.0e-06
167
+ vit_eps: 1.0e-06
168
+ llm_eps: 1.0e-06
169
+ metrics_log_interval: 20
170
+ scheduler:
171
+ name: multimodal
172
+ units: steps
173
+ t_warmup: 100
174
+ t_max: null
175
+ alpha_f: 0.1
176
+ connector_t_warmup: 200
177
+ vit_t_warmup: 2000
178
+ llm_t_warmup: 2000
179
+ grad_clip_warmup_steps: null
180
+ grad_clip_warmup_factor: null
181
+ warmup_min_lr: 0.0
182
+ data:
183
+ dataset: vla_dataset_realworld
184
+ mixture: null
185
+ root_size_mixture: null
186
+ split: train
187
+ seed: 95818
188
+ shuffle_messages: false
189
+ pad: to_max
190
+ sequence_length: 1600
191
+ shuffle: true
192
+ for_inference: false
193
+ multi_modal: torch
194
+ num_workers: 0
195
+ drop_last: true
196
+ pin_memory: true
197
+ prefetch_factor: null
198
+ persistent_workers: false
199
+ timeout: 0
200
+ rlds_dataset_name: libero_4_task_suites_no_noops
201
+ rlds_data_root_dir: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_Wipe
202
+ use_wrist_image: true
203
+ use_proprio: true
204
+ rlds_shuffle_buffer_size: 100000
205
+ rlds_traj_threads: 8
206
+ rlds_read_threads: 8
207
+ lerobot_episode_index_start: null
208
+ lerobot_episode_index_end: null
209
+ restore_dataloader: true
210
+ fast_forward_batches: null
211
+ evaluators:
212
+ - label: val
213
+ data:
214
+ dataset: vla_dataset_realworld
215
+ mixture: null
216
+ root_size_mixture: null
217
+ split: validation
218
+ seed: null
219
+ shuffle_messages: false
220
+ pad: to_max
221
+ sequence_length: 1600
222
+ shuffle: false
223
+ for_inference: false
224
+ multi_modal: torch
225
+ num_workers: 0
226
+ drop_last: true
227
+ pin_memory: true
228
+ prefetch_factor: null
229
+ persistent_workers: true
230
+ timeout: 0
231
+ rlds_dataset_name: libero_4_task_suites_no_noops
232
+ rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
233
+ use_wrist_image: true
234
+ use_proprio: true
235
+ rlds_shuffle_buffer_size: 256000
236
+ rlds_traj_threads: 8
237
+ rlds_read_threads: 8
238
+ lerobot_episode_index_start: 353
239
+ lerobot_episode_index_end: 765
240
+ device_eval_batch_size: null
241
+ subset_num_batches: 64
242
+ max_examples: null
243
+ max_new_tokens: 448
244
+ mm_evaluator: null
245
+ save_dir: null
246
+ save_to_checkpoint_dir: false
247
+ eval_name: null
248
+ skip_if_metrics_cached: true
249
+ eval_interval: 0
250
+ inf_eval_interval: -1
251
+ inf_evaluators: []
252
+ save_folder: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/all_l1
253
+ remote_save_folder: null
254
+ canceled_check_interval: 50
255
+ save_interval: 500
256
+ save_interval_unsharded: 500
257
+ save_interval_ephemeral: null
258
+ save_interval_action_head: 500
259
+ save_num_checkpoints_to_keep: 1
260
+ save_num_unsharded_checkpoints_to_keep: 1
261
+ save_num_action_head_checkpoints_to_keep: 2
262
+ save_overwrite: true
263
+ force_save_unsharded: false
264
+ no_pre_train_checkpoint: true
265
+ initial_model_checkpoint: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
266
+ load_model_config: null
267
+ checkpoint_dir: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
268
+ load_path: null
269
+ load_path_sharded_checkpointer: null
270
+ reset_optimizer_state: false
271
+ reset_trainer_state: false
272
+ save_dataloader_state: false
273
+ reset_dataloader_state: false
274
+ keep_lr_on_load: true
275
+ sharded_checkpointer: torch_legacy
276
+ max_duration: 500000
277
+ global_train_batch_size: 126
278
+ device_train_batch_size: 15
279
+ device_train_microbatch_size: 16
280
+ device_eval_batch_size: 4
281
+ eval_subset_num_batches: -1
282
+ eval_on_load: false
283
+ device_inf_eval_batch_size: 16
284
+ inf_eval_subset_num_batches: -1
285
+ device_train_grad_accum: 0
286
+ max_grad_norm: 1.0
287
+ multi_component_grad_norm: true
288
+ batch_divisor: global_batch
289
+ max_grad_norm_ratio: null
290
+ precision: amp_bf16
291
+ wandb:
292
+ project: a1-realworld
293
+ entity: henryeap
294
+ group: null
295
+ name: realworld_20250930_131219
296
+ tags:
297
+ - watching
298
+ log_artifacts: false
299
+ rank_zero_only: true
300
+ log_interval: 1
301
+ speed_monitor:
302
+ window_size: 20
303
+ gpu_flops_available: null
304
+ console_log_interval: 1
305
+ gen1_gc_interval: 1
306
+ compile: null
307
+ fsdp:
308
+ use_orig_params: true
309
+ sharding_strategy: FULL_SHARD
310
+ wrapping_strategy: by_block_and_size
311
+ precision: float
312
+ hybrid_sharding_num_model_replicas: null
313
+ softmax_auxiliary_loss: true
314
+ softmax_auxiliary_loss_scale: 0.0001
315
+ time_limit: null
316
+ extra_steps_after_cancel: 10
317
+ python_profiling: false
318
+ torch_profiling: false
319
+ stop_at: 500000
320
+ stop_after: null
321
+ activation_checkpointing: whole_layer
322
+ fused_loss: null
all_l1/step8500-unsharded/lora.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b09055f15a54dd092b4dd30833406731057005822da0c55c16231cf2e68f7f6
3
+ size 1243
all_l1/step8500-unsharded/train.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a978718ae923b1476f2fe06dd89422aa8d20b5aca459e47e40db6b02d009001
3
+ size 15061
all_l1/wandb/wandb/debug-internal.log ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-09-30T13:12:50.753807221Z","level":"INFO","msg":"stream: starting","core version":"0.21.4"}
2
+ {"time":"2025-09-30T13:12:51.777951571Z","level":"INFO","msg":"stream: created new stream","id":"ea1k0g3y"}
3
+ {"time":"2025-09-30T13:12:51.778008702Z","level":"INFO","msg":"stream: started","id":"ea1k0g3y"}
4
+ {"time":"2025-09-30T13:12:51.778019413Z","level":"INFO","msg":"handler: started","stream_id":"ea1k0g3y"}
5
+ {"time":"2025-09-30T13:12:51.778022263Z","level":"INFO","msg":"sender: started","stream_id":"ea1k0g3y"}
6
+ {"time":"2025-09-30T13:12:51.778055373Z","level":"INFO","msg":"writer: started","stream_id":"ea1k0g3y"}
7
+ {"time":"2025-10-01T17:48:53.058127813Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/henryeap/a1-realworld/ea1k0g3y/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
8
+ {"time":"2025-10-02T06:50:26.681725611Z","level":"INFO","msg":"api: retrying HTTP error","status":502,"url":"https://api.wandb.ai/files/henryeap/a1-realworld/ea1k0g3y/file_stream","body":"\n<html><head>\n<meta http-equiv=\"content-type\" content=\"text/html;charset=utf-8\">\n<title>502 Server Error</title>\n</head>\n<body text=#000000 bgcolor=#ffffff>\n<h1>Error: Server Error</h1>\n<h2>The server encountered a temporary error and could not complete your request.<p>Please try again in 30 seconds.</h2>\n<h2></h2>\n</body></html>\n"}
9
+ {"time":"2025-10-02T07:20:37.728905409Z","level":"INFO","msg":"api: retrying error","error":"Post \"https://api.wandb.ai/graphql\": net/http: request canceled (Client.Timeout exceeded while awaiting headers)"}
all_l1/wandb/wandb/debug.log ADDED
File without changes
all_l1/wandb/wandb/run-20250929_170512-2lq20p1f/files/config.yaml ADDED
@@ -0,0 +1,621 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _wandb:
2
+ value:
3
+ cli_version: 0.21.4
4
+ e:
5
+ zpxgtt7ztfwstl3gysdjfozjodgqnvkm:
6
+ args:
7
+ - qwen2_7b
8
+ - save_folder=/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/all_l1
9
+ - --vision_backbone
10
+ - openai
11
+ - --action_head
12
+ - l1_regression
13
+ - --seq_len
14
+ - "1600"
15
+ - --ft_llm
16
+ - --checkpoint
17
+ - /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
18
+ - --device_train_microbatch_size
19
+ - "16"
20
+ - --global_batch_size
21
+ - "126"
22
+ - --dataset
23
+ - vla_dataset_realworld
24
+ - --llm_learning_rate
25
+ - "5e-5"
26
+ - --wandb_entity
27
+ - henryeap
28
+ - --wandb_project
29
+ - a1-realworld
30
+ - --wandb_run_name
31
+ - realworld
32
+ - --save_overwrite
33
+ codePath: launch_scripts/train_vla.py
34
+ codePathLocal: launch_scripts/train_vla.py
35
+ cpu_count: 64
36
+ cpu_count_logical: 128
37
+ disk:
38
+ /:
39
+ total: "470343073792"
40
+ used: "50842279936"
41
+ email: ihenrykwok@outlook.com
42
+ executable: /vast/users/xiaodan/miniconda3/envs/a1/bin/python3.10
43
+ git:
44
+ commit: 50cf9fd3143e218eb94104381c16c0482ac52f0d
45
+ remote: https://github.com/Spatialtemporal-AI/A1.git
46
+ gpu: Instinct MI210
47
+ gpu_amd:
48
+ - id: "0"
49
+ maxPower: "300.0"
50
+ mclkRange: 400Mhz - 1600Mhz
51
+ model: "0x740f"
52
+ performanceLevel: auto
53
+ sclkRange: 500Mhz - 1700Mhz
54
+ series: Instinct MI210
55
+ sku: D67301V
56
+ uniqueId: "0xc6ed7c5159e83b1"
57
+ vbiosVersion: 113-D67301V-073
58
+ vendor: Advanced Micro Devices, Inc. [AMD/ATI]
59
+ - id: "5"
60
+ maxPower: "300.0"
61
+ mclkRange: 400Mhz - 1600Mhz
62
+ model: "0x740f"
63
+ performanceLevel: auto
64
+ sclkRange: 500Mhz - 1700Mhz
65
+ series: Instinct MI210
66
+ sku: D67301V
67
+ uniqueId: "0xa95e252cd7a6e54e"
68
+ vbiosVersion: 113-D67301V-073
69
+ vendor: Advanced Micro Devices, Inc. [AMD/ATI]
70
+ - id: "1"
71
+ maxPower: "300.0"
72
+ mclkRange: 400Mhz - 1600Mhz
73
+ model: "0x740f"
74
+ performanceLevel: auto
75
+ sclkRange: 500Mhz - 1700Mhz
76
+ series: Instinct MI210
77
+ sku: D67301V
78
+ uniqueId: "0x333c966d5f3b3375"
79
+ vbiosVersion: 113-D67301V-073
80
+ vendor: Advanced Micro Devices, Inc. [AMD/ATI]
81
+ - id: "7"
82
+ maxPower: "300.0"
83
+ mclkRange: 400Mhz - 1600Mhz
84
+ model: "0x740f"
85
+ performanceLevel: auto
86
+ sclkRange: 500Mhz - 1700Mhz
87
+ series: Instinct MI210
88
+ sku: D67301V
89
+ uniqueId: "0x5656afec2788d569"
90
+ vbiosVersion: 113-D67301V-073
91
+ vendor: Advanced Micro Devices, Inc. [AMD/ATI]
92
+ - id: "6"
93
+ maxPower: "300.0"
94
+ mclkRange: 400Mhz - 1600Mhz
95
+ model: "0x740f"
96
+ performanceLevel: auto
97
+ sclkRange: 500Mhz - 1700Mhz
98
+ series: Instinct MI210
99
+ sku: D67301V
100
+ uniqueId: "0xad1bf75f50313455"
101
+ vbiosVersion: 113-D67301V-073
102
+ vendor: Advanced Micro Devices, Inc. [AMD/ATI]
103
+ - id: "3"
104
+ maxPower: "300.0"
105
+ mclkRange: 400Mhz - 1600Mhz
106
+ model: "0x740f"
107
+ performanceLevel: auto
108
+ sclkRange: 500Mhz - 1700Mhz
109
+ series: Instinct MI210
110
+ sku: D67301V
111
+ uniqueId: "0xbabaa83bdb6fe877"
112
+ vbiosVersion: 113-D67301V-073
113
+ vendor: Advanced Micro Devices, Inc. [AMD/ATI]
114
+ - id: "4"
115
+ maxPower: "300.0"
116
+ mclkRange: 400Mhz - 1600Mhz
117
+ model: "0x740f"
118
+ performanceLevel: auto
119
+ sclkRange: 500Mhz - 1700Mhz
120
+ series: Instinct MI210
121
+ sku: D67301V
122
+ uniqueId: "0xdee6d87744a36ae"
123
+ vbiosVersion: 113-D67301V-073
124
+ vendor: Advanced Micro Devices, Inc. [AMD/ATI]
125
+ - id: "2"
126
+ maxPower: "300.0"
127
+ mclkRange: 400Mhz - 1600Mhz
128
+ model: "0x740f"
129
+ performanceLevel: auto
130
+ sclkRange: 500Mhz - 1700Mhz
131
+ series: Instinct MI210
132
+ sku: D67301V
133
+ uniqueId: "0x2a954aa975e59d15"
134
+ vbiosVersion: 113-D67301V-073
135
+ vendor: Advanced Micro Devices, Inc. [AMD/ATI]
136
+ gpu_count: 8
137
+ host: auh7-1b-gpu-292
138
+ memory:
139
+ total: "2434606923776"
140
+ os: Linux-5.15.0-140-generic-x86_64-with-glibc2.35
141
+ program: /vast/users/xiaodan/zhangjian/A1/launch_scripts/train_vla.py
142
+ python: CPython 3.10.18
143
+ root: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/all_l1/wandb
144
+ slurm:
145
+ cluster_name: ai-04r
146
+ conf: /etc/slurm/slurm.conf
147
+ cpus_on_node: "128"
148
+ gpus_on_node: "8"
149
+ gtids: "0"
150
+ job_account: faculty-acc
151
+ job_cpus_per_node: "128"
152
+ job_end_time: "1759424668"
153
+ job_gid: "2000"
154
+ job_gpus: 0,1,2,3,4,5,6,7
155
+ job_id: "1934"
156
+ job_name: mh_realworld
157
+ job_nodelist: auh7-1b-gpu-292
158
+ job_num_nodes: "1"
159
+ job_partition: faculty
160
+ job_qos: xdqos
161
+ job_start_time: "1759165468"
162
+ job_uid: "2013"
163
+ job_user: xiaodan
164
+ jobid: "1934"
165
+ localid: "0"
166
+ nnodes: "1"
167
+ nodeid: "0"
168
+ nodelist: auh7-1b-gpu-292
169
+ nprocs: "1"
170
+ ntasks: "1"
171
+ ntasks_per_node: "1"
172
+ oom_kill_step: "0"
173
+ prio_process: "0"
174
+ procid: "0"
175
+ submit_dir: /vast/users/xiaodan/zhangjian/A1/launch_scripts
176
+ submit_host: auh-1b-cpu-login-001
177
+ task_pid: "1357871"
178
+ tasks_per_node: "1"
179
+ topology_addr: auh7-1b-gpu-292
180
+ topology_addr_pattern: node
181
+ startedAt: "2025-09-29T17:05:12.548535Z"
182
+ writerId: zpxgtt7ztfwstl3gysdjfozjodgqnvkm
183
+ m: []
184
+ python_version: 3.10.18
185
+ t:
186
+ "1":
187
+ - 1
188
+ - 2
189
+ - 3
190
+ - 5
191
+ - 11
192
+ - 41
193
+ - 49
194
+ - 51
195
+ - 53
196
+ - 63
197
+ - 71
198
+ - 83
199
+ - 95
200
+ - 105
201
+ "2":
202
+ - 1
203
+ - 2
204
+ - 3
205
+ - 5
206
+ - 11
207
+ - 41
208
+ - 49
209
+ - 51
210
+ - 53
211
+ - 63
212
+ - 71
213
+ - 83
214
+ - 95
215
+ - 105
216
+ "3":
217
+ - 2
218
+ - 13
219
+ - 15
220
+ - 16
221
+ - 61
222
+ "4": 3.10.18
223
+ "5": 0.21.4
224
+ "6": 4.56.1
225
+ "10":
226
+ - 19
227
+ "12": 0.21.4
228
+ "13": linux-x86_64
229
+ activation_checkpointing:
230
+ value: whole_layer
231
+ allow_resume:
232
+ value: false
233
+ batch_divisor:
234
+ value: global_batch
235
+ canceled_check_interval:
236
+ value: 50
237
+ checkpoint_dir:
238
+ value: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
239
+ compile:
240
+ value: null
241
+ console_log_interval:
242
+ value: 1
243
+ data:
244
+ value:
245
+ dataset: vla_dataset_realworld
246
+ drop_last: true
247
+ for_inference: false
248
+ lerobot_episode_index_end: null
249
+ lerobot_episode_index_start: null
250
+ mixture: null
251
+ multi_modal: torch
252
+ num_workers: 0
253
+ pad: to_max
254
+ persistent_workers: false
255
+ pin_memory: true
256
+ prefetch_factor: null
257
+ rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
258
+ rlds_dataset_name: libero_4_task_suites_no_noops
259
+ rlds_read_threads: 8
260
+ rlds_shuffle_buffer_size: 100000
261
+ rlds_traj_threads: 8
262
+ root_size_mixture: null
263
+ seed: 95818
264
+ sequence_length: 1600
265
+ shuffle: true
266
+ shuffle_messages: false
267
+ split: train
268
+ timeout: 0
269
+ use_proprio: true
270
+ use_wrist_image: true
271
+ device_eval_batch_size:
272
+ value: 4
273
+ device_inf_eval_batch_size:
274
+ value: 16
275
+ device_train_batch_size:
276
+ value: 15
277
+ device_train_grad_accum:
278
+ value: 0
279
+ device_train_microbatch_size:
280
+ value: 16
281
+ dry_run:
282
+ value: false
283
+ early_exit:
284
+ value: false
285
+ epoch:
286
+ value: null
287
+ eval_interval:
288
+ value: 0
289
+ eval_on_load:
290
+ value: false
291
+ eval_subset_num_batches:
292
+ value: -1
293
+ evaluators:
294
+ value:
295
+ - data:
296
+ dataset: vla_dataset_realworld
297
+ drop_last: true
298
+ for_inference: false
299
+ lerobot_episode_index_end: 765
300
+ lerobot_episode_index_start: 353
301
+ mixture: null
302
+ multi_modal: torch
303
+ num_workers: 0
304
+ pad: to_max
305
+ persistent_workers: true
306
+ pin_memory: true
307
+ prefetch_factor: null
308
+ rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
309
+ rlds_dataset_name: libero_4_task_suites_no_noops
310
+ rlds_read_threads: 8
311
+ rlds_shuffle_buffer_size: 256000
312
+ rlds_traj_threads: 8
313
+ root_size_mixture: null
314
+ seed: null
315
+ sequence_length: 1600
316
+ shuffle: false
317
+ shuffle_messages: false
318
+ split: validation
319
+ timeout: 0
320
+ use_proprio: true
321
+ use_wrist_image: true
322
+ device_eval_batch_size: null
323
+ eval_name: null
324
+ label: val
325
+ max_examples: null
326
+ max_new_tokens: 448
327
+ mm_evaluator: null
328
+ save_dir: null
329
+ save_to_checkpoint_dir: false
330
+ skip_if_metrics_cached: true
331
+ subset_num_batches: 64
332
+ extra_steps_after_cancel:
333
+ value: 10
334
+ fast_forward_batches:
335
+ value: null
336
+ force_save_unsharded:
337
+ value: false
338
+ fsdp:
339
+ value:
340
+ hybrid_sharding_num_model_replicas: null
341
+ precision: float
342
+ sharding_strategy: FULL_SHARD
343
+ use_orig_params: true
344
+ wrapping_strategy: by_block_and_size
345
+ ft_connector:
346
+ value: false
347
+ ft_embedding:
348
+ value: lm_head
349
+ ft_llm:
350
+ value: true
351
+ ft_vit:
352
+ value: false
353
+ fused_loss:
354
+ value: null
355
+ gen1_gc_interval:
356
+ value: 1
357
+ global_train_batch_size:
358
+ value: 126
359
+ inf_eval_interval:
360
+ value: -1
361
+ inf_eval_subset_num_batches:
362
+ value: -1
363
+ inf_evaluators:
364
+ value: []
365
+ initial_model_checkpoint:
366
+ value: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
367
+ keep_lr_on_load:
368
+ value: true
369
+ load_model_config:
370
+ value: null
371
+ load_path:
372
+ value: null
373
+ load_path_sharded_checkpointer:
374
+ value: null
375
+ lora:
376
+ value: false
377
+ lora_connector:
378
+ value: false
379
+ lora_llm:
380
+ value: false
381
+ lora_rank:
382
+ value: 8
383
+ lora_vit:
384
+ value: false
385
+ max_duration:
386
+ value: 500000
387
+ max_grad_norm:
388
+ value: 1
389
+ max_grad_norm_ratio:
390
+ value: null
391
+ model:
392
+ value:
393
+ action_dim: 7
394
+ action_head: l1_regression
395
+ action_head_dit_depth: 28
396
+ action_head_dit_hidden_size: 1152
397
+ action_head_dit_num_heads: 16
398
+ action_tokenizer:
399
+ identifier: physical-intelligence/fast
400
+ tokenizer_dir: null
401
+ action_use_left_eef: false
402
+ action_use_mobile_base: false
403
+ activation_type: swiglu
404
+ additional_vocab_size: 128
405
+ always_start_with_space: true
406
+ attention_dropout: 0
407
+ attention_layer_norm: false
408
+ attention_layer_norm_with_affine: true
409
+ attention_type: sdpa
410
+ bias_for_layer_norm: null
411
+ block_group_size: 1
412
+ block_type: sequential
413
+ clip_qkv: null
414
+ crop_mode: overlap-and-resize-c2
415
+ d_model: 3584
416
+ default_inference_len: 65
417
+ embedding_dropout: 0
418
+ embedding_size: 152064
419
+ ff_out_size: 0
420
+ fix_image_padding: true
421
+ float32_attention: true
422
+ head_dim: null
423
+ horizon: 8
424
+ image_feature_dropout: 0
425
+ image_padding_embed: pad_and_partial_pad
426
+ image_pooling_2d: attention_meanq
427
+ image_pooling_h: 2
428
+ image_pooling_w: 2
429
+ image_projector: mlp
430
+ include_bias: false
431
+ init_cutoff_factor: null
432
+ init_device: null
433
+ init_fn: normal
434
+ init_std: 0.02
435
+ initializer_range: 0.02
436
+ layer_norm_eps: 1e-06
437
+ layer_norm_type: rms
438
+ layer_norm_with_affine: true
439
+ llm_causal_attention: false
440
+ llm_load_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen2-7b.pt
441
+ low_cpu_fsdp: true
442
+ max_crops: 12
443
+ max_position_embeddings: null
444
+ max_sequence_length: 4096
445
+ message_formatting: role
446
+ mlp_hidden_size: 37888
447
+ mlp_ratio: 4
448
+ moe_capacity_factor: 1.25
449
+ moe_dropless: true
450
+ moe_interleave: false
451
+ moe_lbl_in_fp32: false
452
+ moe_log_expert_assignment: false
453
+ moe_loss_weight: 0.1
454
+ moe_mlp_impl: sparse
455
+ moe_num_experts: 8
456
+ moe_shared_expert: false
457
+ moe_top_k: 2
458
+ moe_zloss_weight: null
459
+ multi_annotation_weighting: root_subsegments
460
+ n_heads: 28
461
+ n_kv_heads: 4
462
+ n_layers: 28
463
+ new_embedding_init_range: 0.02
464
+ norm_after: false
465
+ normalize_input_embeds: false
466
+ num_diffusion_inference_steps: 30
467
+ num_diffusion_steps: 1000
468
+ overlap_margins:
469
+ - 4
470
+ - 4
471
+ pad_tokenizer: true
472
+ pad_value: 0
473
+ precision: amp_bf16
474
+ prompt_type: uber_model
475
+ qkv_bias: true
476
+ residual_dropout: 0.1
477
+ response_residual_dropout: 0
478
+ rope: true
479
+ rope_full_precision: true
480
+ rope_theta: 1e+06
481
+ scale_logits: false
482
+ system_prompt_kind: demo_or_style
483
+ tokenizer:
484
+ identifier: Qwen/Qwen2-7B
485
+ tokenizer_dir: null
486
+ use_col_tokens: true
487
+ use_position_ids: true
488
+ use_proprio: true
489
+ vision_backbone:
490
+ attention_dropout: 0
491
+ fsdp_wrap: false
492
+ image_default_input_size:
493
+ - 336
494
+ - 336
495
+ image_dropout_rate: 0
496
+ image_emb_dim: 1024
497
+ image_head_dim: 64
498
+ image_mlp_activations: quick_gelu
499
+ image_mlp_dim: 4096
500
+ image_model_type: openai
501
+ image_norm_eps: 1e-05
502
+ image_num_heads: 16
503
+ image_num_key_value_heads: 16
504
+ image_num_layers: 23
505
+ image_num_pos: 577
506
+ image_patch_size: 14
507
+ image_pos_patch_size: 14
508
+ initializer_range: 0.02
509
+ residual_dropout: 0
510
+ resize_mode: default
511
+ vit_layers:
512
+ - -2
513
+ - -9
514
+ vit_load_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/vit-l-14-336.pt
515
+ vocab_size: 152064
516
+ weight_tying: false
517
+ multi_component_grad_norm:
518
+ value: true
519
+ no_pre_train_checkpoint:
520
+ value: true
521
+ optimizer:
522
+ value:
523
+ betas:
524
+ - 0.9
525
+ - 0.95
526
+ connector_betas:
527
+ - 0.9
528
+ - 0.95
529
+ connector_eps: 1e-06
530
+ connector_learning_rate: 0.0002
531
+ connector_weight_decay: 0
532
+ eps: 1e-05
533
+ learning_rate: 0.0001
534
+ llm_betas:
535
+ - 0.9
536
+ - 0.95
537
+ llm_eps: 1e-06
538
+ llm_learning_rate: 5e-05
539
+ llm_weight_decay: 0
540
+ metrics_log_interval: 20
541
+ name: adamw
542
+ vit_betas:
543
+ - 0.9
544
+ - 0.95
545
+ vit_eps: 1e-06
546
+ vit_learning_rate: 6e-06
547
+ vit_weight_decay: 0
548
+ weight_decay: 0.01
549
+ precision:
550
+ value: amp_bf16
551
+ python_profiling:
552
+ value: false
553
+ remote_save_folder:
554
+ value: null
555
+ reset_dataloader_state:
556
+ value: false
557
+ reset_optimizer_state:
558
+ value: false
559
+ reset_trainer_state:
560
+ value: false
561
+ restore_dataloader:
562
+ value: true
563
+ run_name:
564
+ value: realworld_20250929_170441
565
+ save_dataloader_state:
566
+ value: false
567
+ save_folder:
568
+ value: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/all_l1
569
+ save_interval:
570
+ value: 500
571
+ save_interval_action_head:
572
+ value: 500
573
+ save_interval_ephemeral:
574
+ value: null
575
+ save_interval_unsharded:
576
+ value: 500
577
+ save_num_action_head_checkpoints_to_keep:
578
+ value: 2
579
+ save_num_checkpoints_to_keep:
580
+ value: 1
581
+ save_num_unsharded_checkpoints_to_keep:
582
+ value: 1
583
+ save_overwrite:
584
+ value: true
585
+ scheduler:
586
+ value:
587
+ alpha_f: 0.1
588
+ connector_t_warmup: 200
589
+ grad_clip_warmup_factor: null
590
+ grad_clip_warmup_steps: null
591
+ llm_t_warmup: 2000
592
+ name: multimodal
593
+ t_max: null
594
+ t_warmup: 100
595
+ units: steps
596
+ vit_t_warmup: 2000
597
+ warmup_min_lr: 0
598
+ seed:
599
+ value: 6198
600
+ sharded_checkpointer:
601
+ value: torch_legacy
602
+ softmax_auxiliary_loss:
603
+ value: true
604
+ softmax_auxiliary_loss_scale:
605
+ value: 0.0001
606
+ speed_monitor:
607
+ value:
608
+ gpu_flops_available: null
609
+ window_size: 20
610
+ stop_after:
611
+ value: null
612
+ stop_at:
613
+ value: 500000
614
+ time_limit:
615
+ value: null
616
+ torch_profiling:
617
+ value: false
618
+ train_exit_random_layer:
619
+ value: false
620
+ use_lora:
621
+ value: true
all_l1/wandb/wandb/run-20250929_170512-2lq20p1f/files/output.log ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wandb: Detected [openai] in use.
2
+ wandb: Use W&B Weave for improved LLM call tracing. Install Weave with `pip install weave` then add `import weave` to the top of your script.
3
+ wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/
4
+ 09/29 [17:05:14] WARNING | >> /vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py:4807: UserWarning: No warnings.py:109
5
+ device id is provided via `init_process_group` or `barrier `. Using the current device set by the user.
6
+ warnings.warn( # warn only once
7
+
8
+ ****** vla_cfg: {'datasets': {'rlds': {'name': None, 'path': None, 'weight': 1.0, 'action_proprio_normalization_type': 'bounds_q99', 'image_augmentation': False}, 'lerobot': [['/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_CleanDesk', 0.1, 'bounds'], ['/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_CleanDesk50', 0.1, 'bounds'], ['/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_Eraser', 0.1, 'bounds'], ['/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_Glue', 0.1, 'bounds'], ['/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_Pen', 0.1, 'bounds'], ['/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_StickyNote', 0.1, 'bounds'], ['/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_Test_Glue', 0.1, 'bounds'], ['/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_Wipe', 0.1, 'bounds']], 'open-source-real-world': {'rlds': {'name': 'a1_real_world', 'path': '/vast/users/xiaodan/zhangjian/datasets/OXE', 'weight': 0.1, 'action_proprio_normalization_type': 'bounds_q99', 'image_augmentation': False}, 'lerobot': [], 'agibot': {'path': None, 'weight': 0.1, 'action_proprio_normalization_type': None}}}, 'model': {'action_head': {'action_dim': 16, 'proprio_dim': 16, 'num_actions_chunk': 8, 'action_tokens_mapping': {'left_end_effector': 8, 'right_end_effector': 8}, 'use_left_eef': True, 'use_mobile_base': False}}}
9
+ ****** Skip RLDS main; path not found: None
10
+ ****** start build LeRobot main...
11
+ build_tokenizer, cache_dir None tokenizer_dir None
12
+ 09/29 [17:05:28] INFO | >> Padding tokenizer with 418 tokens tokenizer.py:130
13
+ 09/29 [17:05:29] INFO | >> Loading train dataset: vla_dataset_realworld/train __init__.py:435
14
+ ****** before LeRobot dataset...
15
+ ****** data_config.rlds_data_root_dir: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_CleanDesk
16
+ ****** length of the dataset: 72641
17
+ 09/29 [17:05:37] INFO | >> Loading train dataset: vla_dataset_realworld/train __init__.py:435
18
+ ****** before LeRobot dataset...
19
+ ****** data_config.rlds_data_root_dir: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_CleanDesk50
20
+ ****** length of the dataset: 27906
21
+ 09/29 [17:05:44] INFO | >> Loading train dataset: vla_dataset_realworld/train __init__.py:435
22
+ ****** before LeRobot dataset...
23
+ ****** data_config.rlds_data_root_dir: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_Eraser
24
+ ****** length of the dataset: 13441
25
+ 09/29 [17:05:48] INFO | >> Loading train dataset: vla_dataset_realworld/train __init__.py:435
26
+ ****** before LeRobot dataset...
27
+ ****** data_config.rlds_data_root_dir: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_Glue
28
+ ****** length of the dataset: 10316
29
+ 09/29 [17:05:49] INFO | >> Loading train dataset: vla_dataset_realworld/train __init__.py:435
30
+ ****** before LeRobot dataset...
31
+ ****** data_config.rlds_data_root_dir: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_Pen
32
+ ****** length of the dataset: 17131
33
+ 09/29 [17:05:50] INFO | >> Loading train dataset: vla_dataset_realworld/train __init__.py:435
34
+ ****** before LeRobot dataset...
35
+ ****** data_config.rlds_data_root_dir: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_StickyNote
36
+ ****** length of the dataset: 15765
37
+ 09/29 [17:05:51] INFO | >> Loading train dataset: vla_dataset_realworld/train __init__.py:435
38
+ ****** before LeRobot dataset...
39
+ ****** data_config.rlds_data_root_dir: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_Test_Glue
40
+ ****** length of the dataset: 90
41
+ 09/29 [17:05:52] INFO | >> Loading train dataset: vla_dataset_realworld/train __init__.py:435
42
+ ****** before LeRobot dataset...
43
+ ****** data_config.rlds_data_root_dir: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_Wipe
44
+ ****** length of the dataset: 18397
45
+ ****** Skip RLDS open-source-real-world; mixture 'a1_real_world' not found under: /vast/users/xiaodan/zhangjian/datasets/OXE
46
+ ****** Expect one of: []
47
+ ****** path: None
48
+ ****** Skip AgiBotWorld-Alpha open-source-real-world; path not found: None
49
+ ****** After build vla train dataset...
50
+ ****** iterable_sources: [<olmo.data.dataset.IterableDatasetWrapper object at 0x7fd400f07970>, <olmo.data.dataset.IterableDatasetWrapper object at 0x7fd40649db70>, <olmo.data.dataset.IterableDatasetWrapper object at 0x7fd4063572b0>, <olmo.data.dataset.IterableDatasetWrapper object at 0x7fd405f41ae0>, <olmo.data.dataset.IterableDatasetWrapper object at 0x7fd40647a710>, <olmo.data.dataset.IterableDatasetWrapper object at 0x7fd40637cd30>, <olmo.data.dataset.IterableDatasetWrapper object at 0x7fd406013e50>, <olmo.data.dataset.IterableDatasetWrapper object at 0x7fd405f42ef0>]
51
+ ****** Before build mixed iterable dataset...
52
+ ****** Build vla train dataloader successfully!
53
+ ************************* Build train_dataloader successful!
54
+ ************************* Before build_inf_evaluators
55
+ 09/29 [17:05:53] WARNING | >> /vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py:4807: UserWarning: No warnings.py:109
56
+ device id is provided via `init_process_group` or `barrier `. Using the current device set by the user.
57
+ warnings.warn( # warn only once
58
+
59
+ ************************* Build evaluators successful!
60
+ ************************* Early exit flags: early_exit=False
61
+ ************************* Initialize model successful!
62
+ ***** state_dict_path: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924/model.pt
63
+ ***** Load checkpoint successful!
64
+ missing keys: ['action_head.model.layer_norm1.weight', 'action_head.model.layer_norm1.bias', 'action_head.model.fc1.weight', 'action_head.model.fc1.bias', 'action_head.model.mlp_resnet_blocks.0.ffn.0.weight', 'action_head.model.mlp_resnet_blocks.0.ffn.0.bias', 'action_head.model.mlp_resnet_blocks.0.ffn.1.weight', 'action_head.model.mlp_resnet_blocks.0.ffn.1.bias', 'action_head.model.mlp_resnet_blocks.1.ffn.0.weight', 'action_head.model.mlp_resnet_blocks.1.ffn.0.bias', 'action_head.model.mlp_resnet_blocks.1.ffn.1.weight', 'action_head.model.mlp_resnet_blocks.1.ffn.1.bias', 'action_head.model.layer_norm2.weight', 'action_head.model.layer_norm2.bias', 'action_head.model.fc2.weight', 'action_head.model.fc2.bias', 'proprio_projector.fc1.weight', 'proprio_projector.fc1.bias', 'proprio_projector.fc2.weight', 'proprio_projector.fc2.bias']
65
+ unexpected keys: []
66
+ ************************* Initialize model successful!
67
+ ************************* LoRA flags: use_lora=True, lora_llm=False, lora_vit=False, lora_connector=False
68
+ ************************* Before add lora to model
69
+ ************************* Before FSDP model wrapping
70
+ ************************* FSDP model wrapping successful!
71
+ ************************* Before building optimizer and scheduler
72
+ ************* Before get lora params
73
+ ************* After get lora params successfully
74
+ 09/29 [17:07:04] INFO | >> Constructing optimizer with 2 param groups optim.py:1283
75
+ **************************************************
76
+ After building optimizer and scheduler and model, before training, peak GPU memory (MB): 35614
77
+ ************************* VLATrainer initialized successfully!
78
+ ************************* Before trainer.fit()
79
+ Pre-train system metrics
80
+ System/Peak GPU Memory (MB)=35,614
81
+ 09/29 [17:07:05] WARNING | >> /vast/users/xiaodan/zhangjian/A1/olmo/data/collator.py:200: UserWarning: To copy construct from a tensor, it is recommended to use warnings.py:109
82
+ sourceTensor.detach().clone() or sourceTensor.detach().clone().requires_grad_(True), rather than torch.tensor(sourceTensor).
83
+ timestep_list = [torch.tensor(ex["timestep"], dtype=torch.int64) for ex in batch]
84
+
85
+ !!!Training failed: Given normalized_shape=[57344], expected input with shape [*, 57344], but got input of size[15, 8, 28672]
86
+ Traceback (most recent call last):
87
+ File "/vast/users/xiaodan/zhangjian/A1/scripts/train_for_action.py", line 593, in main
88
+ trainer.fit()
89
+ File "/vast/users/xiaodan/zhangjian/A1/olmo/train.py", line 2331, in fit
90
+ metrics = self.train_step(batch, reduce_global_loss=should_log_this_step)
91
+ File "/vast/users/xiaodan/zhangjian/A1/olmo/train.py", line 2038, in train_step
92
+ ce_batch_loss, z_batch_loss, batch_accuracy, lb_batch_loss, moe_z_batch_loss, expert_assignments,action_loss = self.train_batch(batch)
93
+ File "/vast/users/xiaodan/zhangjian/A1/olmo/train.py", line 1956, in train_batch
94
+ accuracy, ce_loss, z_loss, logits, action_loss = self.model_forward(
95
+ File "/vast/users/xiaodan/zhangjian/A1/olmo/train.py", line 1813, in model_forward
96
+ outputs = self.fsdp_model.forward(
97
+ File "/vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/site-packages/torch/distributed/fsdp/fully_sharded_data_parallel.py", line 854, in forward
98
+ output = self._fsdp_wrapped_module(*args, **kwargs)
99
+ File "/vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl
100
+ return self._call_impl(*args, **kwargs)
101
+ File "/vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl
102
+ return forward_call(*args, **kwargs)
103
+ File "/vast/users/xiaodan/zhangjian/A1/olmo/vla/affordvla.py", line 1042, in forward
104
+ predicted_actions = self.action_head.predict_action(action_hidden_states)
105
+ File "/vast/users/xiaodan/zhangjian/A1/olmo/vla/action_heads.py", line 249, in predict_action
106
+ action = self.model(rearranged_actions_hidden_states)
107
+ File "/vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl
108
+ return self._call_impl(*args, **kwargs)
109
+ File "/vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl
110
+ return forward_call(*args, **kwargs)
111
+ File "/vast/users/xiaodan/zhangjian/A1/olmo/vla/action_heads.py", line 210, in forward
112
+ x = self.layer_norm1(x) # shape: (batch_size, input_dim)
113
+ File "/vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1773, in _wrapped_call_impl
114
+ return self._call_impl(*args, **kwargs)
115
+ File "/vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1784, in _call_impl
116
+ return forward_call(*args, **kwargs)
117
+ File "/vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/site-packages/torch/nn/modules/normalization.py", line 217, in forward
118
+ return F.layer_norm(
119
+ File "/vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/site-packages/torch/nn/functional.py", line 2905, in layer_norm
120
+ return torch.layer_norm(
121
+ RuntimeError: Given normalized_shape=[57344], expected input with shape [*, 57344], but got input of size[15, 8, 28672]
122
+ wandb: WARNING The `quiet` argument to `wandb.run.finish()` is deprecated, use `wandb.Settings(quiet=...)` to set this instead.
all_l1/wandb/wandb/run-20250929_170512-2lq20p1f/files/requirements.txt ADDED
@@ -0,0 +1,284 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ai2-molmo==0.0.0
2
+ astunparse==1.6.3
3
+ flatbuffers==25.2.10
4
+ gast==0.6.0
5
+ google-pasta==0.2.0
6
+ h5py==3.14.0
7
+ libclang==18.1.1
8
+ Markdown==3.9
9
+ namex==0.1.0
10
+ opt_einsum==3.4.0
11
+ optree==0.17.0
12
+ tensorboard-data-server==0.7.2
13
+ tensorflow-io-gcs-filesystem==0.37.1
14
+ termcolor==3.1.0
15
+ Werkzeug==3.1.3
16
+ Brotli==1.1.0
17
+ Farama-Notifications==0.0.4
18
+ MarkupSafe==2.1.5
19
+ PyYAML==6.0.2
20
+ absl-py==2.3.1
21
+ accelerate==1.10.1
22
+ ai2-molmo==0.0.0
23
+ aiofiles==24.1.0
24
+ aiohappyeyeballs==2.6.1
25
+ aiohttp==3.12.15
26
+ aiosignal==1.4.0
27
+ annotated-types==0.7.0
28
+ antlr4-python3-runtime==4.9.3
29
+ anyio==4.10.0
30
+ array_record==0.8.1
31
+ async-timeout==5.0.1
32
+ attrs==25.3.0
33
+ av==15.1.0
34
+ backports.tarfile==1.2.0
35
+ beaker-gantry==3.2.0
36
+ beaker-py==2.5.0
37
+ black==23.12.1
38
+ blinker==1.9.0
39
+ boltons==25.0.0
40
+ boto3==1.40.33
41
+ botocore==1.40.33
42
+ build==1.3.0
43
+ cached_path==1.7.3
44
+ cached-property==2.0.1
45
+ cachetools==5.5.2
46
+ certifi==2025.8.3
47
+ cffi==2.0.0
48
+ charset-normalizer==3.4.3
49
+ click==8.2.1
50
+ click-help-colors==0.9.4
51
+ click-option-group==0.5.7
52
+ cloudpickle==3.1.1
53
+ cmake==4.1.0
54
+ contourpy==1.3.2
55
+ cryptography==46.0.1
56
+ cycler==0.12.1
57
+ dataclass-extensions==0.2.3
58
+ datasets==3.6.0
59
+ decorator==5.2.1
60
+ deepdiff==8.6.1
61
+ diffusers==0.35.1
62
+ dill==0.3.8
63
+ distro==1.9.0
64
+ dlimp==0.0.1
65
+ dm-tree==0.1.9
66
+ docutils==0.22.1
67
+ draccus==0.10.0
68
+ editdistance==0.8.1
69
+ einops==0.8.1
70
+ einops-exts==0.0.4
71
+ etils==1.13.0
72
+ evdev==1.9.2
73
+ exceptiongroup==1.3.0
74
+ face==24.0.0
75
+ fastapi==0.116.2
76
+ ffmpy==0.6.1
77
+ fiddle==0.3.0
78
+ filelock==3.13.1
79
+ Flask==3.1.2
80
+ fonttools==4.60.0
81
+ frozenlist==1.7.0
82
+ fsspec==2023.9.2
83
+ ftfy==6.3.1
84
+ gcsfs==2023.9.2
85
+ gitdb==4.0.12
86
+ GitPython==3.1.45
87
+ glom==24.11.0
88
+ google-api-core==2.25.1
89
+ google-auth==2.40.3
90
+ google-auth-oauthlib==1.2.2
91
+ google-cloud-core==2.4.3
92
+ google-cloud-storage==2.19.0
93
+ google-crc32c==1.7.1
94
+ google-resumable-media==2.7.2
95
+ googleapis-common-protos==1.70.0
96
+ gradio==5.46.0
97
+ gradio_client==1.13.0
98
+ graphviz==0.21
99
+ groovy==0.1.2
100
+ grpcio==1.75.0
101
+ gymnasium==0.29.1
102
+ h11==0.16.0
103
+ hf_transfer==0.1.9
104
+ hf-xet==1.1.10
105
+ httpcore==1.0.9
106
+ httpx==0.28.1
107
+ huggingface-hub==0.35.0
108
+ id==1.5.0
109
+ idna==3.10
110
+ imageio==2.37.0
111
+ imageio-ffmpeg==0.6.0
112
+ importlib_metadata==8.7.0
113
+ importlib_resources==6.5.2
114
+ iniconfig==2.1.0
115
+ inquirerpy==0.3.4
116
+ isort==5.12.0
117
+ itsdangerous==2.2.0
118
+ jaraco.classes==3.4.0
119
+ jaraco.context==6.0.1
120
+ jaraco.functools==4.3.0
121
+ jeepney==0.9.0
122
+ Jinja2==3.1.4
123
+ jiter==0.11.0
124
+ jmespath==1.0.1
125
+ joblib==1.5.2
126
+ jsonlines==4.0.0
127
+ keras==2.15.0
128
+ keyring==25.6.0
129
+ kiwisolver==1.4.9
130
+ latex2sympy2_extended==1.10.2
131
+ lerobot==0.3.4
132
+ Levenshtein==0.27.1
133
+ libcst==1.8.4
134
+ lightning-utilities==0.15.2
135
+ markdown-it-py==4.0.0
136
+ math-verify==0.8.0
137
+ matplotlib==3.10.6
138
+ mdurl==0.1.2
139
+ mergedeep==1.3.4
140
+ ml-dtypes==0.2.0
141
+ ml_dtypes==0.5.3
142
+ more-itertools==10.8.0
143
+ mpmath==1.3.0
144
+ msgspec==0.19.0
145
+ multidict==6.6.4
146
+ multiprocess==0.70.16
147
+ mypy==1.3.0
148
+ mypy_extensions==1.1.0
149
+ necessary==0.4.3
150
+ networkx==3.3
151
+ nh3==0.3.0
152
+ nltk==3.9.1
153
+ numpy==1.26.4
154
+ oauthlib==3.3.1
155
+ omegaconf==2.3.0
156
+ openai==1.108.0
157
+ opencv-python-headless==4.12.0.88
158
+ OpenEXR==3.4.0
159
+ orderly-set==5.5.0
160
+ orjson==3.11.3
161
+ packaging==25.0
162
+ pandas==2.3.2
163
+ pathspec==0.12.1
164
+ petname==2.6
165
+ pfzy==0.3.4
166
+ pillow==11.0.0
167
+ pip==25.2
168
+ platformdirs==4.4.0
169
+ pluggy==1.6.0
170
+ promise==2.3
171
+ prompt_toolkit==3.0.52
172
+ propcache==0.3.2
173
+ proto-plus==1.26.1
174
+ protobuf==4.21.12
175
+ protobuf==6.32.1
176
+ psutil==7.1.0
177
+ pyarrow==21.0.0
178
+ pyasn1==0.6.1
179
+ pyasn1_modules==0.4.2
180
+ pycparser==2.23
181
+ pydantic==2.11.9
182
+ pydantic_core==2.33.2
183
+ pydub==0.25.1
184
+ Pygments==2.19.2
185
+ pynput==1.8.1
186
+ pyparsing==3.2.4
187
+ pyproject_hooks==1.2.0
188
+ pyserial==3.5
189
+ pytest==8.4.2
190
+ pytest-sphinx==0.6.3
191
+ python-dateutil==2.9.0.post0
192
+ python-Levenshtein==0.27.1
193
+ python-multipart==0.0.20
194
+ python-xlib==0.33
195
+ pytorch-triton-rocm==3.4.0
196
+ pytz==2025.2
197
+ pyyaml-include==1.4.1
198
+ RapidFuzz==3.14.1
199
+ readme_renderer==44.0
200
+ regex==2025.9.1
201
+ requests==2.32.5
202
+ requests-oauthlib==2.0.0
203
+ requests-toolbelt==1.0.0
204
+ requirements-parser==0.13.0
205
+ rerun-sdk==0.22.1
206
+ rfc3986==2.0.0
207
+ rich==13.9.4
208
+ rsa==4.9.1
209
+ ruff==0.13.0
210
+ s3transfer==0.14.0
211
+ safehttpx==0.1.6
212
+ safetensors==0.6.2
213
+ scikit-learn==1.7.2
214
+ scipy==1.15.3
215
+ SecretStorage==3.4.0
216
+ semantic-version==2.10.0
217
+ sentencepiece==0.2.1
218
+ sentry-sdk==2.38.0
219
+ setuptools==78.1.1
220
+ shellingham==1.5.4
221
+ six==1.17.0
222
+ smart_open==7.3.1
223
+ smashed==0.21.5
224
+ smmap==5.0.2
225
+ sniffio==1.3.1
226
+ starlette==0.48.0
227
+ sympy==1.13.3
228
+ tensorboard==2.15.2
229
+ tensorboard==2.19.0
230
+ tensorflow==2.15.0
231
+ tensorflow-addons==0.23.0
232
+ tensorflow-datasets==4.9.3
233
+ tensorflow-estimator==2.15.0
234
+ tensorflow-graphics==2021.12.3
235
+ tensorflow-metadata==1.17.2
236
+ threadpoolctl==3.6.0
237
+ timm==1.0.19
238
+ tokenizers==0.22.0
239
+ toml==0.10.2
240
+ tomli==2.2.1
241
+ tomlkit==0.13.3
242
+ torch==2.8.0+rocm6.4
243
+ torchcodec==0.5
244
+ torchmetrics==1.8.2
245
+ torchvision==0.23.0+rocm6.4
246
+ tqdm==4.67.1
247
+ transformers==4.56.1
248
+ trimesh==4.8.2
249
+ trouting==0.3.3
250
+ twine==6.2.0
251
+ typeguard==2.13.3
252
+ typer==0.17.4
253
+ typing_extensions==4.15.0
254
+ typing-inspect==0.9.0
255
+ typing-inspection==0.4.1
256
+ tzdata==2025.2
257
+ urllib3==2.5.0
258
+ uvicorn==0.35.0
259
+ wandb==0.21.4
260
+ wcwidth==0.2.13
261
+ websockets==15.0.1
262
+ wheel==0.45.1
263
+ wrapt==1.14.2
264
+ xxhash==3.5.0
265
+ yarl==1.20.1
266
+ zipp==3.23.0
267
+ lerobot==0.3.4
268
+ minLoRA==0.1.0
269
+ autocommand==2.2.2
270
+ backports.tarfile==1.2.0
271
+ importlib_metadata==8.0.0
272
+ inflect==7.3.1
273
+ jaraco.collections==5.1.0
274
+ jaraco.context==5.3.0
275
+ jaraco.functools==4.0.1
276
+ jaraco.text==3.12.1
277
+ more-itertools==10.3.0
278
+ packaging==24.2
279
+ platformdirs==4.2.2
280
+ tomli==2.0.1
281
+ typeguard==4.3.0
282
+ typing_extensions==4.12.2
283
+ wheel==0.45.1
284
+ zipp==3.19.2
all_l1/wandb/wandb/run-20250929_170512-2lq20p1f/files/wandb-metadata.json ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.15.0-140-generic-x86_64-with-glibc2.35",
3
+ "python": "CPython 3.10.18",
4
+ "startedAt": "2025-09-29T17:05:12.548535Z",
5
+ "args": [
6
+ "qwen2_7b",
7
+ "save_folder=/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/all_l1",
8
+ "--vision_backbone",
9
+ "openai",
10
+ "--action_head",
11
+ "l1_regression",
12
+ "--seq_len",
13
+ "1600",
14
+ "--ft_llm",
15
+ "--checkpoint",
16
+ "/vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924",
17
+ "--device_train_microbatch_size",
18
+ "16",
19
+ "--global_batch_size",
20
+ "126",
21
+ "--dataset",
22
+ "vla_dataset_realworld",
23
+ "--llm_learning_rate",
24
+ "5e-5",
25
+ "--wandb_entity",
26
+ "henryeap",
27
+ "--wandb_project",
28
+ "a1-realworld",
29
+ "--wandb_run_name",
30
+ "realworld",
31
+ "--save_overwrite"
32
+ ],
33
+ "program": "/vast/users/xiaodan/zhangjian/A1/launch_scripts/train_vla.py",
34
+ "codePath": "launch_scripts/train_vla.py",
35
+ "codePathLocal": "launch_scripts/train_vla.py",
36
+ "git": {
37
+ "remote": "https://github.com/Spatialtemporal-AI/A1.git",
38
+ "commit": "50cf9fd3143e218eb94104381c16c0482ac52f0d"
39
+ },
40
+ "email": "ihenrykwok@outlook.com",
41
+ "root": "/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/all_l1/wandb",
42
+ "host": "auh7-1b-gpu-292",
43
+ "executable": "/vast/users/xiaodan/miniconda3/envs/a1/bin/python3.10",
44
+ "cpu_count": 64,
45
+ "cpu_count_logical": 128,
46
+ "gpu": "Instinct MI210",
47
+ "gpu_count": 8,
48
+ "disk": {
49
+ "/": {
50
+ "total": "470343073792",
51
+ "used": "50842279936"
52
+ }
53
+ },
54
+ "memory": {
55
+ "total": "2434606923776"
56
+ },
57
+ "gpu_amd": [
58
+ {
59
+ "id": "0",
60
+ "uniqueId": "0xc6ed7c5159e83b1",
61
+ "vbiosVersion": "113-D67301V-073",
62
+ "performanceLevel": "auto",
63
+ "maxPower": "300.0",
64
+ "series": "Instinct MI210",
65
+ "model": "0x740f",
66
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
67
+ "sku": "D67301V",
68
+ "sclkRange": "500Mhz - 1700Mhz",
69
+ "mclkRange": "400Mhz - 1600Mhz"
70
+ },
71
+ {
72
+ "id": "5",
73
+ "uniqueId": "0xa95e252cd7a6e54e",
74
+ "vbiosVersion": "113-D67301V-073",
75
+ "performanceLevel": "auto",
76
+ "maxPower": "300.0",
77
+ "series": "Instinct MI210",
78
+ "model": "0x740f",
79
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
80
+ "sku": "D67301V",
81
+ "sclkRange": "500Mhz - 1700Mhz",
82
+ "mclkRange": "400Mhz - 1600Mhz"
83
+ },
84
+ {
85
+ "id": "1",
86
+ "uniqueId": "0x333c966d5f3b3375",
87
+ "vbiosVersion": "113-D67301V-073",
88
+ "performanceLevel": "auto",
89
+ "maxPower": "300.0",
90
+ "series": "Instinct MI210",
91
+ "model": "0x740f",
92
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
93
+ "sku": "D67301V",
94
+ "sclkRange": "500Mhz - 1700Mhz",
95
+ "mclkRange": "400Mhz - 1600Mhz"
96
+ },
97
+ {
98
+ "id": "7",
99
+ "uniqueId": "0x5656afec2788d569",
100
+ "vbiosVersion": "113-D67301V-073",
101
+ "performanceLevel": "auto",
102
+ "maxPower": "300.0",
103
+ "series": "Instinct MI210",
104
+ "model": "0x740f",
105
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
106
+ "sku": "D67301V",
107
+ "sclkRange": "500Mhz - 1700Mhz",
108
+ "mclkRange": "400Mhz - 1600Mhz"
109
+ },
110
+ {
111
+ "id": "6",
112
+ "uniqueId": "0xad1bf75f50313455",
113
+ "vbiosVersion": "113-D67301V-073",
114
+ "performanceLevel": "auto",
115
+ "maxPower": "300.0",
116
+ "series": "Instinct MI210",
117
+ "model": "0x740f",
118
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
119
+ "sku": "D67301V",
120
+ "sclkRange": "500Mhz - 1700Mhz",
121
+ "mclkRange": "400Mhz - 1600Mhz"
122
+ },
123
+ {
124
+ "id": "3",
125
+ "uniqueId": "0xbabaa83bdb6fe877",
126
+ "vbiosVersion": "113-D67301V-073",
127
+ "performanceLevel": "auto",
128
+ "maxPower": "300.0",
129
+ "series": "Instinct MI210",
130
+ "model": "0x740f",
131
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
132
+ "sku": "D67301V",
133
+ "sclkRange": "500Mhz - 1700Mhz",
134
+ "mclkRange": "400Mhz - 1600Mhz"
135
+ },
136
+ {
137
+ "id": "4",
138
+ "uniqueId": "0xdee6d87744a36ae",
139
+ "vbiosVersion": "113-D67301V-073",
140
+ "performanceLevel": "auto",
141
+ "maxPower": "300.0",
142
+ "series": "Instinct MI210",
143
+ "model": "0x740f",
144
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
145
+ "sku": "D67301V",
146
+ "sclkRange": "500Mhz - 1700Mhz",
147
+ "mclkRange": "400Mhz - 1600Mhz"
148
+ },
149
+ {
150
+ "id": "2",
151
+ "uniqueId": "0x2a954aa975e59d15",
152
+ "vbiosVersion": "113-D67301V-073",
153
+ "performanceLevel": "auto",
154
+ "maxPower": "300.0",
155
+ "series": "Instinct MI210",
156
+ "model": "0x740f",
157
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
158
+ "sku": "D67301V",
159
+ "sclkRange": "500Mhz - 1700Mhz",
160
+ "mclkRange": "400Mhz - 1600Mhz"
161
+ }
162
+ ],
163
+ "slurm": {
164
+ "cluster_name": "ai-04r",
165
+ "conf": "/etc/slurm/slurm.conf",
166
+ "cpus_on_node": "128",
167
+ "gpus_on_node": "8",
168
+ "gtids": "0",
169
+ "job_account": "faculty-acc",
170
+ "job_cpus_per_node": "128",
171
+ "job_end_time": "1759424668",
172
+ "job_gid": "2000",
173
+ "job_gpus": "0,1,2,3,4,5,6,7",
174
+ "job_id": "1934",
175
+ "job_name": "mh_realworld",
176
+ "job_nodelist": "auh7-1b-gpu-292",
177
+ "job_num_nodes": "1",
178
+ "job_partition": "faculty",
179
+ "job_qos": "xdqos",
180
+ "job_start_time": "1759165468",
181
+ "job_uid": "2013",
182
+ "job_user": "xiaodan",
183
+ "jobid": "1934",
184
+ "localid": "0",
185
+ "nnodes": "1",
186
+ "nodeid": "0",
187
+ "nodelist": "auh7-1b-gpu-292",
188
+ "nprocs": "1",
189
+ "ntasks": "1",
190
+ "ntasks_per_node": "1",
191
+ "oom_kill_step": "0",
192
+ "prio_process": "0",
193
+ "procid": "0",
194
+ "submit_dir": "/vast/users/xiaodan/zhangjian/A1/launch_scripts",
195
+ "submit_host": "auh-1b-cpu-login-001",
196
+ "task_pid": "1357871",
197
+ "tasks_per_node": "1",
198
+ "topology_addr": "auh7-1b-gpu-292",
199
+ "topology_addr_pattern": "node"
200
+ },
201
+ "writerId": "zpxgtt7ztfwstl3gysdjfozjodgqnvkm"
202
+ }
all_l1/wandb/wandb/run-20250929_170512-2lq20p1f/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"_step":0,"System/Peak GPU Memory (MB)":35614.78125,"_timestamp":1.759165624842901e+09,"_wandb":{"runtime":118},"_runtime":118.9149238}
all_l1/wandb/wandb/run-20250929_170512-2lq20p1f/logs/debug-core.log ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-09-29T17:05:12.599767574Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpblcfcrd2/port-1358048.txt","pid":1358048,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
2
+ {"time":"2025-09-29T17:05:12.601129497Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":1358048}
3
+ {"time":"2025-09-29T17:05:12.601134567Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-1358048-1358213-1657513860/socket","Net":"unix"}}
4
+ {"time":"2025-09-29T17:05:12.784034321Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
5
+ {"time":"2025-09-29T17:05:12.788802342Z","level":"INFO","msg":"handleInformInit: received","streamId":"2lq20p1f","id":"1(@)"}
6
+ {"time":"2025-09-29T17:05:13.92427811Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"2lq20p1f","id":"1(@)"}
7
+ {"time":"2025-09-29T17:07:15.469972392Z","level":"INFO","msg":"handleInformFinish: finish message received","streamId":"2lq20p1f","id":"1(@)"}
8
+ {"time":"2025-09-29T17:07:15.473846438Z","level":"INFO","msg":"handleInformFinish: stream closed","streamId":"2lq20p1f","id":"1(@)"}
9
+ {"time":"2025-09-29T17:07:15.473852038Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
10
+ {"time":"2025-09-29T17:07:15.473858278Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
11
+ {"time":"2025-09-29T17:07:15.473865358Z","level":"INFO","msg":"server is shutting down"}
12
+ {"time":"2025-09-29T17:07:15.473865688Z","level":"INFO","msg":"connection: closing","id":"1(@)"}
13
+ {"time":"2025-09-29T17:07:15.473901259Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
14
+ {"time":"2025-09-29T17:07:15.473904939Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
15
+ {"time":"2025-09-29T17:07:15.473942719Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-1358048-1358213-1657513860/socket","Net":"unix"}}
16
+ {"time":"2025-09-29T17:07:15.47396587Z","level":"INFO","msg":"server is closed"}
all_l1/wandb/wandb/run-20250929_170512-2lq20p1f/logs/debug-internal.log ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-09-29T17:05:12.790661794Z","level":"INFO","msg":"stream: starting","core version":"0.21.4"}
2
+ {"time":"2025-09-29T17:05:13.924227629Z","level":"INFO","msg":"stream: created new stream","id":"2lq20p1f"}
3
+ {"time":"2025-09-29T17:05:13.92427238Z","level":"INFO","msg":"stream: started","id":"2lq20p1f"}
4
+ {"time":"2025-09-29T17:05:13.92428108Z","level":"INFO","msg":"writer: started","stream_id":"2lq20p1f"}
5
+ {"time":"2025-09-29T17:05:13.924301101Z","level":"INFO","msg":"handler: started","stream_id":"2lq20p1f"}
6
+ {"time":"2025-09-29T17:05:13.924330541Z","level":"INFO","msg":"sender: started","stream_id":"2lq20p1f"}
7
+ {"time":"2025-09-29T17:07:13.196500373Z","level":"INFO","msg":"handler: operation stats","stats":{"operations":[{"desc":"updating run metadata","runtime_seconds":0.001296762}],"total_operations":1}}
8
+ {"time":"2025-09-29T17:07:15.018224116Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
9
+ {"time":"2025-09-29T17:07:15.470258007Z","level":"INFO","msg":"stream: closing","id":"2lq20p1f"}
10
+ {"time":"2025-09-29T17:07:15.470272437Z","level":"INFO","msg":"handler: closed","stream_id":"2lq20p1f"}
11
+ {"time":"2025-09-29T17:07:15.472123178Z","level":"INFO","msg":"sender: closed","stream_id":"2lq20p1f"}
12
+ {"time":"2025-09-29T17:07:15.472129189Z","level":"INFO","msg":"stream: closed","id":"2lq20p1f"}
all_l1/wandb/wandb/run-20250929_170512-2lq20p1f/logs/debug.log ADDED
File without changes
all_l1/wandb/wandb/run-20250929_170512-2lq20p1f/run-2lq20p1f.wandb ADDED
Binary file (56.3 kB). View file
 
all_l1/wandb/wandb/run-20250930_085206-50kj35c0/files/config.yaml ADDED
@@ -0,0 +1,617 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _wandb:
2
+ value:
3
+ cli_version: 0.21.4
4
+ e:
5
+ xw28krbghcx1p9g77lw10b5hvgwgqmco:
6
+ args:
7
+ - qwen2_7b
8
+ - save_folder=/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/all_l1
9
+ - --vision_backbone
10
+ - openai
11
+ - --action_head
12
+ - l1_regression
13
+ - --seq_len
14
+ - "1600"
15
+ - --ft_llm
16
+ - --checkpoint
17
+ - /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
18
+ - --device_train_microbatch_size
19
+ - "16"
20
+ - --global_batch_size
21
+ - "126"
22
+ - --dataset
23
+ - vla_dataset_realworld
24
+ - --llm_learning_rate
25
+ - "5e-5"
26
+ - --wandb_entity
27
+ - henryeap
28
+ - --wandb_project
29
+ - a1-realworld
30
+ - --wandb_run_name
31
+ - realworld
32
+ - --save_overwrite
33
+ codePath: launch_scripts/train_vla.py
34
+ codePathLocal: launch_scripts/train_vla.py
35
+ cpu_count: 64
36
+ cpu_count_logical: 128
37
+ disk:
38
+ /:
39
+ total: "470343073792"
40
+ used: "50128396288"
41
+ email: ihenrykwok@outlook.com
42
+ executable: /vast/users/xiaodan/miniconda3/envs/a1/bin/python3.10
43
+ git:
44
+ commit: f2afcc15e05f491a8e50add64395fc1db0a1188d
45
+ remote: https://github.com/Spatialtemporal-AI/A1.git
46
+ gpu: Instinct MI210
47
+ gpu_amd:
48
+ - id: "5"
49
+ maxPower: "300.0"
50
+ mclkRange: 400Mhz - 1600Mhz
51
+ model: "0x740f"
52
+ performanceLevel: auto
53
+ sclkRange: 500Mhz - 1700Mhz
54
+ series: Instinct MI210
55
+ sku: D67301V
56
+ uniqueId: "0x413935505e32b8da"
57
+ vbiosVersion: 113-D67301V-073
58
+ vendor: Advanced Micro Devices, Inc. [AMD/ATI]
59
+ - id: "3"
60
+ maxPower: "300.0"
61
+ mclkRange: 400Mhz - 1600Mhz
62
+ model: "0x740f"
63
+ performanceLevel: auto
64
+ sclkRange: 500Mhz - 1700Mhz
65
+ series: Instinct MI210
66
+ sku: D67301V
67
+ uniqueId: "0x95be8fdc770fcfd7"
68
+ vbiosVersion: 113-D67301V-073
69
+ vendor: Advanced Micro Devices, Inc. [AMD/ATI]
70
+ - id: "1"
71
+ maxPower: "300.0"
72
+ mclkRange: 400Mhz - 1600Mhz
73
+ model: "0x740f"
74
+ performanceLevel: auto
75
+ sclkRange: 500Mhz - 1700Mhz
76
+ series: Instinct MI210
77
+ sku: D67301V
78
+ uniqueId: "0x27087f06439a527d"
79
+ vbiosVersion: 113-D67301V-073
80
+ vendor: Advanced Micro Devices, Inc. [AMD/ATI]
81
+ - id: "0"
82
+ maxPower: "300.0"
83
+ mclkRange: 400Mhz - 1600Mhz
84
+ model: "0x740f"
85
+ performanceLevel: auto
86
+ sclkRange: 500Mhz - 1700Mhz
87
+ series: Instinct MI210
88
+ sku: D67301V
89
+ uniqueId: "0x82728d7f9bd937e4"
90
+ vbiosVersion: 113-D67301V-073
91
+ vendor: Advanced Micro Devices, Inc. [AMD/ATI]
92
+ - id: "7"
93
+ maxPower: "300.0"
94
+ mclkRange: 400Mhz - 1600Mhz
95
+ model: "0x740f"
96
+ performanceLevel: auto
97
+ sclkRange: 500Mhz - 1700Mhz
98
+ series: Instinct MI210
99
+ sku: D67301V
100
+ uniqueId: "0xa0442ab3bdd405c1"
101
+ vbiosVersion: 113-D67301V-073
102
+ vendor: Advanced Micro Devices, Inc. [AMD/ATI]
103
+ - id: "2"
104
+ maxPower: "300.0"
105
+ mclkRange: 400Mhz - 1600Mhz
106
+ model: "0x740f"
107
+ performanceLevel: auto
108
+ sclkRange: 500Mhz - 1700Mhz
109
+ series: Instinct MI210
110
+ sku: D67301V
111
+ uniqueId: "0xaabcddaa244a3d6e"
112
+ vbiosVersion: 113-D67301V-073
113
+ vendor: Advanced Micro Devices, Inc. [AMD/ATI]
114
+ - id: "6"
115
+ maxPower: "300.0"
116
+ mclkRange: 400Mhz - 1600Mhz
117
+ model: "0x740f"
118
+ performanceLevel: auto
119
+ sclkRange: 500Mhz - 1700Mhz
120
+ series: Instinct MI210
121
+ sku: D67301V
122
+ uniqueId: "0x12140cd9e24f12e9"
123
+ vbiosVersion: 113-D67301V-073
124
+ vendor: Advanced Micro Devices, Inc. [AMD/ATI]
125
+ - id: "4"
126
+ maxPower: "300.0"
127
+ mclkRange: 400Mhz - 1600Mhz
128
+ model: "0x740f"
129
+ performanceLevel: auto
130
+ sclkRange: 500Mhz - 1700Mhz
131
+ series: Instinct MI210
132
+ sku: D67301V
133
+ uniqueId: "0x24ee801b7c402006"
134
+ vbiosVersion: 113-D67301V-073
135
+ vendor: Advanced Micro Devices, Inc. [AMD/ATI]
136
+ gpu_count: 8
137
+ host: auh7-1b-gpu-306
138
+ memory:
139
+ total: "2434611519488"
140
+ os: Linux-5.15.0-140-generic-x86_64-with-glibc2.35
141
+ program: /vast/users/xiaodan/zhangjian/A1/launch_scripts/train_vla.py
142
+ python: CPython 3.10.18
143
+ root: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/all_l1/wandb
144
+ slurm:
145
+ cluster_name: ai-04r
146
+ conf: /etc/slurm/slurm.conf
147
+ cpus_on_node: "128"
148
+ gpus_on_node: "8"
149
+ gtids: "0"
150
+ job_account: faculty-acc
151
+ job_cpus_per_node: "128"
152
+ job_end_time: "1759481466"
153
+ job_gid: "2000"
154
+ job_gpus: 0,1,2,3,4,5,6,7
155
+ job_id: "1970"
156
+ job_name: mh_realworld
157
+ job_nodelist: auh7-1b-gpu-306
158
+ job_num_nodes: "1"
159
+ job_partition: faculty
160
+ job_qos: xdqos
161
+ job_start_time: "1759222266"
162
+ job_uid: "2013"
163
+ job_user: xiaodan
164
+ jobid: "1970"
165
+ localid: "0"
166
+ nnodes: "1"
167
+ nodeid: "0"
168
+ nodelist: auh7-1b-gpu-306
169
+ nprocs: "1"
170
+ ntasks: "1"
171
+ ntasks_per_node: "1"
172
+ oom_kill_step: "0"
173
+ prio_process: "0"
174
+ procid: "0"
175
+ submit_dir: /vast/users/xiaodan/zhangjian/A1/launch_scripts
176
+ submit_host: auh-1b-cpu-login-001
177
+ task_pid: "589646"
178
+ tasks_per_node: "1"
179
+ topology_addr: auh7-1b-gpu-306
180
+ topology_addr_pattern: node
181
+ startedAt: "2025-09-30T08:52:06.337927Z"
182
+ writerId: xw28krbghcx1p9g77lw10b5hvgwgqmco
183
+ m: []
184
+ python_version: 3.10.18
185
+ t:
186
+ "1":
187
+ - 1
188
+ - 2
189
+ - 3
190
+ - 5
191
+ - 11
192
+ - 41
193
+ - 49
194
+ - 51
195
+ - 53
196
+ - 63
197
+ - 71
198
+ - 83
199
+ - 95
200
+ - 105
201
+ "2":
202
+ - 1
203
+ - 2
204
+ - 3
205
+ - 5
206
+ - 11
207
+ - 41
208
+ - 49
209
+ - 51
210
+ - 53
211
+ - 63
212
+ - 71
213
+ - 83
214
+ - 95
215
+ - 105
216
+ "3":
217
+ - 13
218
+ - 15
219
+ - 16
220
+ "4": 3.10.18
221
+ "5": 0.21.4
222
+ "6": 4.56.1
223
+ "12": 0.21.4
224
+ "13": linux-x86_64
225
+ activation_checkpointing:
226
+ value: whole_layer
227
+ allow_resume:
228
+ value: false
229
+ batch_divisor:
230
+ value: global_batch
231
+ canceled_check_interval:
232
+ value: 50
233
+ checkpoint_dir:
234
+ value: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
235
+ compile:
236
+ value: null
237
+ console_log_interval:
238
+ value: 1
239
+ data:
240
+ value:
241
+ dataset: vla_dataset_realworld
242
+ drop_last: true
243
+ for_inference: false
244
+ lerobot_episode_index_end: null
245
+ lerobot_episode_index_start: null
246
+ mixture: null
247
+ multi_modal: torch
248
+ num_workers: 0
249
+ pad: to_max
250
+ persistent_workers: false
251
+ pin_memory: true
252
+ prefetch_factor: null
253
+ rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
254
+ rlds_dataset_name: libero_4_task_suites_no_noops
255
+ rlds_read_threads: 8
256
+ rlds_shuffle_buffer_size: 100000
257
+ rlds_traj_threads: 8
258
+ root_size_mixture: null
259
+ seed: 95818
260
+ sequence_length: 1600
261
+ shuffle: true
262
+ shuffle_messages: false
263
+ split: train
264
+ timeout: 0
265
+ use_proprio: true
266
+ use_wrist_image: true
267
+ device_eval_batch_size:
268
+ value: 4
269
+ device_inf_eval_batch_size:
270
+ value: 16
271
+ device_train_batch_size:
272
+ value: 15
273
+ device_train_grad_accum:
274
+ value: 0
275
+ device_train_microbatch_size:
276
+ value: 16
277
+ dry_run:
278
+ value: false
279
+ early_exit:
280
+ value: false
281
+ epoch:
282
+ value: null
283
+ eval_interval:
284
+ value: 0
285
+ eval_on_load:
286
+ value: false
287
+ eval_subset_num_batches:
288
+ value: -1
289
+ evaluators:
290
+ value:
291
+ - data:
292
+ dataset: vla_dataset_realworld
293
+ drop_last: true
294
+ for_inference: false
295
+ lerobot_episode_index_end: 765
296
+ lerobot_episode_index_start: 353
297
+ mixture: null
298
+ multi_modal: torch
299
+ num_workers: 0
300
+ pad: to_max
301
+ persistent_workers: true
302
+ pin_memory: true
303
+ prefetch_factor: null
304
+ rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
305
+ rlds_dataset_name: libero_4_task_suites_no_noops
306
+ rlds_read_threads: 8
307
+ rlds_shuffle_buffer_size: 256000
308
+ rlds_traj_threads: 8
309
+ root_size_mixture: null
310
+ seed: null
311
+ sequence_length: 1600
312
+ shuffle: false
313
+ shuffle_messages: false
314
+ split: validation
315
+ timeout: 0
316
+ use_proprio: true
317
+ use_wrist_image: true
318
+ device_eval_batch_size: null
319
+ eval_name: null
320
+ label: val
321
+ max_examples: null
322
+ max_new_tokens: 448
323
+ mm_evaluator: null
324
+ save_dir: null
325
+ save_to_checkpoint_dir: false
326
+ skip_if_metrics_cached: true
327
+ subset_num_batches: 64
328
+ extra_steps_after_cancel:
329
+ value: 10
330
+ fast_forward_batches:
331
+ value: null
332
+ force_save_unsharded:
333
+ value: false
334
+ fsdp:
335
+ value:
336
+ hybrid_sharding_num_model_replicas: null
337
+ precision: float
338
+ sharding_strategy: FULL_SHARD
339
+ use_orig_params: true
340
+ wrapping_strategy: by_block_and_size
341
+ ft_connector:
342
+ value: false
343
+ ft_embedding:
344
+ value: lm_head
345
+ ft_llm:
346
+ value: true
347
+ ft_vit:
348
+ value: false
349
+ fused_loss:
350
+ value: null
351
+ gen1_gc_interval:
352
+ value: 1
353
+ global_train_batch_size:
354
+ value: 126
355
+ inf_eval_interval:
356
+ value: -1
357
+ inf_eval_subset_num_batches:
358
+ value: -1
359
+ inf_evaluators:
360
+ value: []
361
+ initial_model_checkpoint:
362
+ value: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
363
+ keep_lr_on_load:
364
+ value: true
365
+ load_model_config:
366
+ value: null
367
+ load_path:
368
+ value: null
369
+ load_path_sharded_checkpointer:
370
+ value: null
371
+ lora:
372
+ value: false
373
+ lora_connector:
374
+ value: false
375
+ lora_llm:
376
+ value: false
377
+ lora_rank:
378
+ value: 8
379
+ lora_vit:
380
+ value: false
381
+ max_duration:
382
+ value: 500000
383
+ max_grad_norm:
384
+ value: 1
385
+ max_grad_norm_ratio:
386
+ value: null
387
+ model:
388
+ value:
389
+ action_dim: 7
390
+ action_head: l1_regression
391
+ action_head_dit_depth: 28
392
+ action_head_dit_hidden_size: 1152
393
+ action_head_dit_num_heads: 16
394
+ action_tokenizer:
395
+ identifier: physical-intelligence/fast
396
+ tokenizer_dir: null
397
+ action_use_left_eef: false
398
+ action_use_mobile_base: false
399
+ activation_type: swiglu
400
+ additional_vocab_size: 128
401
+ always_start_with_space: true
402
+ attention_dropout: 0
403
+ attention_layer_norm: false
404
+ attention_layer_norm_with_affine: true
405
+ attention_type: sdpa
406
+ bias_for_layer_norm: null
407
+ block_group_size: 1
408
+ block_type: sequential
409
+ clip_qkv: null
410
+ crop_mode: overlap-and-resize-c2
411
+ d_model: 3584
412
+ default_inference_len: 65
413
+ embedding_dropout: 0
414
+ embedding_size: 152064
415
+ ff_out_size: 0
416
+ fix_image_padding: true
417
+ float32_attention: true
418
+ head_dim: null
419
+ horizon: 8
420
+ image_feature_dropout: 0
421
+ image_padding_embed: pad_and_partial_pad
422
+ image_pooling_2d: attention_meanq
423
+ image_pooling_h: 2
424
+ image_pooling_w: 2
425
+ image_projector: mlp
426
+ include_bias: false
427
+ init_cutoff_factor: null
428
+ init_device: null
429
+ init_fn: normal
430
+ init_std: 0.02
431
+ initializer_range: 0.02
432
+ layer_norm_eps: 1e-06
433
+ layer_norm_type: rms
434
+ layer_norm_with_affine: true
435
+ llm_causal_attention: false
436
+ llm_load_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen2-7b.pt
437
+ low_cpu_fsdp: true
438
+ max_crops: 12
439
+ max_position_embeddings: null
440
+ max_sequence_length: 4096
441
+ message_formatting: role
442
+ mlp_hidden_size: 37888
443
+ mlp_ratio: 4
444
+ moe_capacity_factor: 1.25
445
+ moe_dropless: true
446
+ moe_interleave: false
447
+ moe_lbl_in_fp32: false
448
+ moe_log_expert_assignment: false
449
+ moe_loss_weight: 0.1
450
+ moe_mlp_impl: sparse
451
+ moe_num_experts: 8
452
+ moe_shared_expert: false
453
+ moe_top_k: 2
454
+ moe_zloss_weight: null
455
+ multi_annotation_weighting: root_subsegments
456
+ n_heads: 28
457
+ n_kv_heads: 4
458
+ n_layers: 28
459
+ new_embedding_init_range: 0.02
460
+ norm_after: false
461
+ normalize_input_embeds: false
462
+ num_diffusion_inference_steps: 30
463
+ num_diffusion_steps: 1000
464
+ overlap_margins:
465
+ - 4
466
+ - 4
467
+ pad_tokenizer: true
468
+ pad_value: 0
469
+ precision: amp_bf16
470
+ prompt_type: uber_model
471
+ qkv_bias: true
472
+ residual_dropout: 0.1
473
+ response_residual_dropout: 0
474
+ rope: true
475
+ rope_full_precision: true
476
+ rope_theta: 1e+06
477
+ scale_logits: false
478
+ system_prompt_kind: demo_or_style
479
+ tokenizer:
480
+ identifier: Qwen/Qwen2-7B
481
+ tokenizer_dir: null
482
+ use_col_tokens: true
483
+ use_position_ids: true
484
+ use_proprio: true
485
+ vision_backbone:
486
+ attention_dropout: 0
487
+ fsdp_wrap: false
488
+ image_default_input_size:
489
+ - 336
490
+ - 336
491
+ image_dropout_rate: 0
492
+ image_emb_dim: 1024
493
+ image_head_dim: 64
494
+ image_mlp_activations: quick_gelu
495
+ image_mlp_dim: 4096
496
+ image_model_type: openai
497
+ image_norm_eps: 1e-05
498
+ image_num_heads: 16
499
+ image_num_key_value_heads: 16
500
+ image_num_layers: 23
501
+ image_num_pos: 577
502
+ image_patch_size: 14
503
+ image_pos_patch_size: 14
504
+ initializer_range: 0.02
505
+ residual_dropout: 0
506
+ resize_mode: default
507
+ vit_layers:
508
+ - -2
509
+ - -9
510
+ vit_load_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/vit-l-14-336.pt
511
+ vocab_size: 152064
512
+ weight_tying: false
513
+ multi_component_grad_norm:
514
+ value: true
515
+ no_pre_train_checkpoint:
516
+ value: true
517
+ optimizer:
518
+ value:
519
+ betas:
520
+ - 0.9
521
+ - 0.95
522
+ connector_betas:
523
+ - 0.9
524
+ - 0.95
525
+ connector_eps: 1e-06
526
+ connector_learning_rate: 0.0002
527
+ connector_weight_decay: 0
528
+ eps: 1e-05
529
+ learning_rate: 0.0001
530
+ llm_betas:
531
+ - 0.9
532
+ - 0.95
533
+ llm_eps: 1e-06
534
+ llm_learning_rate: 5e-05
535
+ llm_weight_decay: 0
536
+ metrics_log_interval: 20
537
+ name: adamw
538
+ vit_betas:
539
+ - 0.9
540
+ - 0.95
541
+ vit_eps: 1e-06
542
+ vit_learning_rate: 6e-06
543
+ vit_weight_decay: 0
544
+ weight_decay: 0.01
545
+ precision:
546
+ value: amp_bf16
547
+ python_profiling:
548
+ value: false
549
+ remote_save_folder:
550
+ value: null
551
+ reset_dataloader_state:
552
+ value: false
553
+ reset_optimizer_state:
554
+ value: false
555
+ reset_trainer_state:
556
+ value: false
557
+ restore_dataloader:
558
+ value: true
559
+ run_name:
560
+ value: realworld_20250930_085126
561
+ save_dataloader_state:
562
+ value: false
563
+ save_folder:
564
+ value: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/all_l1
565
+ save_interval:
566
+ value: 500
567
+ save_interval_action_head:
568
+ value: 500
569
+ save_interval_ephemeral:
570
+ value: null
571
+ save_interval_unsharded:
572
+ value: 500
573
+ save_num_action_head_checkpoints_to_keep:
574
+ value: 2
575
+ save_num_checkpoints_to_keep:
576
+ value: 1
577
+ save_num_unsharded_checkpoints_to_keep:
578
+ value: 1
579
+ save_overwrite:
580
+ value: true
581
+ scheduler:
582
+ value:
583
+ alpha_f: 0.1
584
+ connector_t_warmup: 200
585
+ grad_clip_warmup_factor: null
586
+ grad_clip_warmup_steps: null
587
+ llm_t_warmup: 2000
588
+ name: multimodal
589
+ t_max: null
590
+ t_warmup: 100
591
+ units: steps
592
+ vit_t_warmup: 2000
593
+ warmup_min_lr: 0
594
+ seed:
595
+ value: 6198
596
+ sharded_checkpointer:
597
+ value: torch_legacy
598
+ softmax_auxiliary_loss:
599
+ value: true
600
+ softmax_auxiliary_loss_scale:
601
+ value: 0.0001
602
+ speed_monitor:
603
+ value:
604
+ gpu_flops_available: null
605
+ window_size: 20
606
+ stop_after:
607
+ value: null
608
+ stop_at:
609
+ value: 500000
610
+ time_limit:
611
+ value: null
612
+ torch_profiling:
613
+ value: false
614
+ train_exit_random_layer:
615
+ value: false
616
+ use_lora:
617
+ value: true
all_l1/wandb/wandb/run-20250930_085206-50kj35c0/files/output.log ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wandb: Detected [openai] in use.
2
+ wandb: Use W&B Weave for improved LLM call tracing. Install Weave with `pip install weave` then add `import weave` to the top of your script.
3
+ wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/
4
+ 09/30 [08:52:08] WARNING | >> /vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py:4807: UserWarning: No warnings.py:109
5
+ device id is provided via `init_process_group` or `barrier `. Using the current device set by the user.
6
+ warnings.warn( # warn only once
7
+
8
+ ****** vla_cfg: {'datasets': {'rlds': {'name': None, 'path': None, 'weight': 1.0, 'action_proprio_normalization_type': 'bounds_q99', 'image_augmentation': False}, 'lerobot': [['/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_CleanDesk', 1, 'bounds'], ['/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_CleanDesk50', 1, 'bounds'], ['/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_Eraser', 1, 'bounds'], ['/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_Glue', 1, 'bounds'], ['/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_Pen', 1, 'bounds'], ['/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_StickyNote', 1, 'bounds'], ['/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_Test_Glue', 1, 'bounds'], ['/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_Wipe', 1, 'bounds']], 'open-source-real-world': {'rlds': {'name': 'a1_real_world', 'path': '/vast/users/xiaodan/zhangjian/datasets/OXE', 'weight': 8, 'action_proprio_normalization_type': 'bounds_q99', 'image_augmentation': False}, 'lerobot': [], 'agibot': {'path': '/vast/users/xiaodan/zhangjian/datasets/AgiBotWorld-Alpha', 'weight': 8, 'action_proprio_normalization_type': None}}}, 'model': {'action_head': {'action_dim': 16, 'proprio_dim': 16, 'num_actions_chunk': 8, 'action_tokens_mapping': {'left_end_effector': 8, 'right_end_effector': 8}, 'use_left_eef': True, 'use_mobile_base': False}}}
9
+ ****** Skip RLDS main; path not found: None
10
+ ****** start build LeRobot main...
11
+ build_tokenizer, cache_dir None tokenizer_dir None
12
+ 09/30 [08:52:19] INFO | >> Padding tokenizer with 418 tokens tokenizer.py:130
13
+ 09/30 [08:52:20] INFO | >> Loading train dataset: vla_dataset_realworld/train __init__.py:435
14
+ ****** before LeRobot dataset...
15
+ ****** data_config.rlds_data_root_dir: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_CleanDesk
16
+ ****** length of the dataset: 72641
17
+ 09/30 [08:52:26] INFO | >> Loading train dataset: vla_dataset_realworld/train __init__.py:435
18
+ ****** before LeRobot dataset...
19
+ ****** data_config.rlds_data_root_dir: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_CleanDesk50
20
+ ****** length of the dataset: 27906
21
+ 09/30 [08:52:27] INFO | >> Loading train dataset: vla_dataset_realworld/train __init__.py:435
22
+ ****** before LeRobot dataset...
23
+ ****** data_config.rlds_data_root_dir: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_Eraser
24
+ ****** length of the dataset: 13441
25
+ 09/30 [08:52:28] INFO | >> Loading train dataset: vla_dataset_realworld/train __init__.py:435
26
+ ****** before LeRobot dataset...
27
+ ****** data_config.rlds_data_root_dir: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_Glue
28
+ ****** length of the dataset: 10316
29
+ 09/30 [08:52:29] INFO | >> Loading train dataset: vla_dataset_realworld/train __init__.py:435
30
+ ****** before LeRobot dataset...
31
+ ****** data_config.rlds_data_root_dir: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_Pen
32
+ ****** length of the dataset: 17131
33
+ 09/30 [08:52:31] INFO | >> Loading train dataset: vla_dataset_realworld/train __init__.py:435
34
+ ****** before LeRobot dataset...
35
+ ****** data_config.rlds_data_root_dir: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_StickyNote
36
+ ****** length of the dataset: 15765
37
+ 09/30 [08:52:32] INFO | >> Loading train dataset: vla_dataset_realworld/train __init__.py:435
38
+ ****** before LeRobot dataset...
39
+ ****** data_config.rlds_data_root_dir: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_Test_Glue
40
+ ****** length of the dataset: 90
41
+ INFO | >> Loading train dataset: vla_dataset_realworld/train __init__.py:435
42
+ ****** before LeRobot dataset...
43
+ ****** data_config.rlds_data_root_dir: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_Wipe
44
+ ****** length of the dataset: 18397
45
+ ****** Skip RLDS open-source-real-world; mixture 'a1_real_world' not found under: /vast/users/xiaodan/zhangjian/datasets/OXE
46
+ ****** Expect one of: []
47
+ ****** path: /vast/users/xiaodan/zhangjian/datasets/AgiBotWorld-Alpha
48
+ ****** before AgiBotWorldAlpha dataset...
49
+ 09/30 [08:52:33] INFO | >> Loading train dataset: vla_dataset_realworld/train __init__.py:485
50
+ Traceback (most recent call last):
51
+ File "/vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/site-packages/pandas/compat/_optional.py", line 135, in import_optional_dependency
52
+ module = importlib.import_module(name)
53
+ File "/vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/importlib/__init__.py", line 126, in import_module
54
+ return _bootstrap._gcd_import(name[level:], package, level)
55
+ File "<frozen importlib._bootstrap>", line 1050, in _gcd_import
56
+ File "<frozen importlib._bootstrap>", line 1027, in _find_and_load
57
+ File "<frozen importlib._bootstrap>", line 1004, in _find_and_load_unlocked
58
+ ModuleNotFoundError: No module named 'openpyxl'
59
+
60
+ During handling of the above exception, another exception occurred:
61
+
62
+ Traceback (most recent call last):
63
+ File "/vast/users/xiaodan/zhangjian/A1/launch_scripts/train_vla.py", line 397, in <module>
64
+ train(cfg)
65
+ File "/vast/users/xiaodan/zhangjian/A1/scripts/train_for_action.py", line 160, in main
66
+ train_loader = build_train_dataloader(cfg, device)
67
+ File "/vast/users/xiaodan/zhangjian/A1/olmo/data/__init__.py", line 197, in build_train_dataloader
68
+ return build_vla_train_dataloader(train_config, device)
69
+ File "/vast/users/xiaodan/zhangjian/A1/olmo/data/__init__.py", line 384, in build_vla_train_dataloader
70
+ ds = build_agibot_train_dataset(train_config, normalization_type, device)
71
+ File "/vast/users/xiaodan/zhangjian/A1/olmo/data/__init__.py", line 487, in build_agibot_train_dataset
72
+ dataset = AgiBotWorldAlphaDataset(
73
+ File "<string>", line 13, in __init__
74
+ File "/vast/users/xiaodan/zhangjian/A1/olmo/data/vla/agibot_dataset.py", line 145, in __post_init__
75
+ self._frame_ranges_map: Optional[Dict[Tuple[str, int], Tuple[int, int]]] = self._load_frame_ranges_excel()
76
+ File "/vast/users/xiaodan/zhangjian/A1/olmo/data/vla/agibot_dataset.py", line 411, in _load_frame_ranges_excel
77
+ df = pd.read_excel(excel_path)
78
+ File "/vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/site-packages/pandas/io/excel/_base.py", line 495, in read_excel
79
+ io = ExcelFile(
80
+ File "/vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/site-packages/pandas/io/excel/_base.py", line 1567, in __init__
81
+ self._reader = self._engines[engine](
82
+ File "/vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/site-packages/pandas/io/excel/_openpyxl.py", line 552, in __init__
83
+ import_optional_dependency("openpyxl")
84
+ File "/vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/site-packages/pandas/compat/_optional.py", line 138, in import_optional_dependency
85
+ raise ImportError(msg)
86
+ ImportError: Missing optional dependency 'openpyxl'. Use pip or conda to install openpyxl.
all_l1/wandb/wandb/run-20250930_085206-50kj35c0/files/requirements.txt ADDED
@@ -0,0 +1,284 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ai2-molmo==0.0.0
2
+ astunparse==1.6.3
3
+ flatbuffers==25.2.10
4
+ gast==0.6.0
5
+ google-pasta==0.2.0
6
+ h5py==3.14.0
7
+ libclang==18.1.1
8
+ Markdown==3.9
9
+ namex==0.1.0
10
+ opt_einsum==3.4.0
11
+ optree==0.17.0
12
+ tensorboard-data-server==0.7.2
13
+ tensorflow-io-gcs-filesystem==0.37.1
14
+ termcolor==3.1.0
15
+ Werkzeug==3.1.3
16
+ Brotli==1.1.0
17
+ Farama-Notifications==0.0.4
18
+ MarkupSafe==2.1.5
19
+ PyYAML==6.0.2
20
+ absl-py==2.3.1
21
+ accelerate==1.10.1
22
+ ai2-molmo==0.0.0
23
+ aiofiles==24.1.0
24
+ aiohappyeyeballs==2.6.1
25
+ aiohttp==3.12.15
26
+ aiosignal==1.4.0
27
+ annotated-types==0.7.0
28
+ antlr4-python3-runtime==4.9.3
29
+ anyio==4.10.0
30
+ array_record==0.8.1
31
+ async-timeout==5.0.1
32
+ attrs==25.3.0
33
+ av==15.1.0
34
+ backports.tarfile==1.2.0
35
+ beaker-gantry==3.2.0
36
+ beaker-py==2.5.0
37
+ black==23.12.1
38
+ blinker==1.9.0
39
+ boltons==25.0.0
40
+ boto3==1.40.33
41
+ botocore==1.40.33
42
+ build==1.3.0
43
+ cached_path==1.7.3
44
+ cached-property==2.0.1
45
+ cachetools==5.5.2
46
+ certifi==2025.8.3
47
+ cffi==2.0.0
48
+ charset-normalizer==3.4.3
49
+ click==8.2.1
50
+ click-help-colors==0.9.4
51
+ click-option-group==0.5.7
52
+ cloudpickle==3.1.1
53
+ cmake==4.1.0
54
+ contourpy==1.3.2
55
+ cryptography==46.0.1
56
+ cycler==0.12.1
57
+ dataclass-extensions==0.2.3
58
+ datasets==3.6.0
59
+ decorator==5.2.1
60
+ deepdiff==8.6.1
61
+ diffusers==0.35.1
62
+ dill==0.3.8
63
+ distro==1.9.0
64
+ dlimp==0.0.1
65
+ dm-tree==0.1.9
66
+ docutils==0.22.1
67
+ draccus==0.10.0
68
+ editdistance==0.8.1
69
+ einops==0.8.1
70
+ einops-exts==0.0.4
71
+ etils==1.13.0
72
+ evdev==1.9.2
73
+ exceptiongroup==1.3.0
74
+ face==24.0.0
75
+ fastapi==0.116.2
76
+ ffmpy==0.6.1
77
+ fiddle==0.3.0
78
+ filelock==3.13.1
79
+ Flask==3.1.2
80
+ fonttools==4.60.0
81
+ frozenlist==1.7.0
82
+ fsspec==2023.9.2
83
+ ftfy==6.3.1
84
+ gcsfs==2023.9.2
85
+ gitdb==4.0.12
86
+ GitPython==3.1.45
87
+ glom==24.11.0
88
+ google-api-core==2.25.1
89
+ google-auth==2.40.3
90
+ google-auth-oauthlib==1.2.2
91
+ google-cloud-core==2.4.3
92
+ google-cloud-storage==2.19.0
93
+ google-crc32c==1.7.1
94
+ google-resumable-media==2.7.2
95
+ googleapis-common-protos==1.70.0
96
+ gradio==5.46.0
97
+ gradio_client==1.13.0
98
+ graphviz==0.21
99
+ groovy==0.1.2
100
+ grpcio==1.75.0
101
+ gymnasium==0.29.1
102
+ h11==0.16.0
103
+ hf_transfer==0.1.9
104
+ hf-xet==1.1.10
105
+ httpcore==1.0.9
106
+ httpx==0.28.1
107
+ huggingface-hub==0.35.0
108
+ id==1.5.0
109
+ idna==3.10
110
+ imageio==2.37.0
111
+ imageio-ffmpeg==0.6.0
112
+ importlib_metadata==8.7.0
113
+ importlib_resources==6.5.2
114
+ iniconfig==2.1.0
115
+ inquirerpy==0.3.4
116
+ isort==5.12.0
117
+ itsdangerous==2.2.0
118
+ jaraco.classes==3.4.0
119
+ jaraco.context==6.0.1
120
+ jaraco.functools==4.3.0
121
+ jeepney==0.9.0
122
+ Jinja2==3.1.4
123
+ jiter==0.11.0
124
+ jmespath==1.0.1
125
+ joblib==1.5.2
126
+ jsonlines==4.0.0
127
+ keras==2.15.0
128
+ keyring==25.6.0
129
+ kiwisolver==1.4.9
130
+ latex2sympy2_extended==1.10.2
131
+ lerobot==0.3.4
132
+ Levenshtein==0.27.1
133
+ libcst==1.8.4
134
+ lightning-utilities==0.15.2
135
+ markdown-it-py==4.0.0
136
+ math-verify==0.8.0
137
+ matplotlib==3.10.6
138
+ mdurl==0.1.2
139
+ mergedeep==1.3.4
140
+ ml-dtypes==0.2.0
141
+ ml_dtypes==0.5.3
142
+ more-itertools==10.8.0
143
+ mpmath==1.3.0
144
+ msgspec==0.19.0
145
+ multidict==6.6.4
146
+ multiprocess==0.70.16
147
+ mypy==1.3.0
148
+ mypy_extensions==1.1.0
149
+ necessary==0.4.3
150
+ networkx==3.3
151
+ nh3==0.3.0
152
+ nltk==3.9.1
153
+ numpy==1.26.4
154
+ oauthlib==3.3.1
155
+ omegaconf==2.3.0
156
+ openai==1.108.0
157
+ opencv-python-headless==4.12.0.88
158
+ OpenEXR==3.4.0
159
+ orderly-set==5.5.0
160
+ orjson==3.11.3
161
+ packaging==25.0
162
+ pandas==2.3.2
163
+ pathspec==0.12.1
164
+ petname==2.6
165
+ pfzy==0.3.4
166
+ pillow==11.0.0
167
+ pip==25.2
168
+ platformdirs==4.4.0
169
+ pluggy==1.6.0
170
+ promise==2.3
171
+ prompt_toolkit==3.0.52
172
+ propcache==0.3.2
173
+ proto-plus==1.26.1
174
+ protobuf==4.21.12
175
+ protobuf==6.32.1
176
+ psutil==7.1.0
177
+ pyarrow==21.0.0
178
+ pyasn1==0.6.1
179
+ pyasn1_modules==0.4.2
180
+ pycparser==2.23
181
+ pydantic==2.11.9
182
+ pydantic_core==2.33.2
183
+ pydub==0.25.1
184
+ Pygments==2.19.2
185
+ pynput==1.8.1
186
+ pyparsing==3.2.4
187
+ pyproject_hooks==1.2.0
188
+ pyserial==3.5
189
+ pytest==8.4.2
190
+ pytest-sphinx==0.6.3
191
+ python-dateutil==2.9.0.post0
192
+ python-Levenshtein==0.27.1
193
+ python-multipart==0.0.20
194
+ python-xlib==0.33
195
+ pytorch-triton-rocm==3.4.0
196
+ pytz==2025.2
197
+ pyyaml-include==1.4.1
198
+ RapidFuzz==3.14.1
199
+ readme_renderer==44.0
200
+ regex==2025.9.1
201
+ requests==2.32.5
202
+ requests-oauthlib==2.0.0
203
+ requests-toolbelt==1.0.0
204
+ requirements-parser==0.13.0
205
+ rerun-sdk==0.22.1
206
+ rfc3986==2.0.0
207
+ rich==13.9.4
208
+ rsa==4.9.1
209
+ ruff==0.13.0
210
+ s3transfer==0.14.0
211
+ safehttpx==0.1.6
212
+ safetensors==0.6.2
213
+ scikit-learn==1.7.2
214
+ scipy==1.15.3
215
+ SecretStorage==3.4.0
216
+ semantic-version==2.10.0
217
+ sentencepiece==0.2.1
218
+ sentry-sdk==2.38.0
219
+ setuptools==78.1.1
220
+ shellingham==1.5.4
221
+ six==1.17.0
222
+ smart_open==7.3.1
223
+ smashed==0.21.5
224
+ smmap==5.0.2
225
+ sniffio==1.3.1
226
+ starlette==0.48.0
227
+ sympy==1.13.3
228
+ tensorboard==2.15.2
229
+ tensorboard==2.19.0
230
+ tensorflow==2.15.0
231
+ tensorflow-addons==0.23.0
232
+ tensorflow-datasets==4.9.3
233
+ tensorflow-estimator==2.15.0
234
+ tensorflow-graphics==2021.12.3
235
+ tensorflow-metadata==1.17.2
236
+ threadpoolctl==3.6.0
237
+ timm==1.0.19
238
+ tokenizers==0.22.0
239
+ toml==0.10.2
240
+ tomli==2.2.1
241
+ tomlkit==0.13.3
242
+ torch==2.8.0+rocm6.4
243
+ torchcodec==0.5
244
+ torchmetrics==1.8.2
245
+ torchvision==0.23.0+rocm6.4
246
+ tqdm==4.67.1
247
+ transformers==4.56.1
248
+ trimesh==4.8.2
249
+ trouting==0.3.3
250
+ twine==6.2.0
251
+ typeguard==2.13.3
252
+ typer==0.17.4
253
+ typing_extensions==4.15.0
254
+ typing-inspect==0.9.0
255
+ typing-inspection==0.4.1
256
+ tzdata==2025.2
257
+ urllib3==2.5.0
258
+ uvicorn==0.35.0
259
+ wandb==0.21.4
260
+ wcwidth==0.2.13
261
+ websockets==15.0.1
262
+ wheel==0.45.1
263
+ wrapt==1.14.2
264
+ xxhash==3.5.0
265
+ yarl==1.20.1
266
+ zipp==3.23.0
267
+ lerobot==0.3.4
268
+ minLoRA==0.1.0
269
+ autocommand==2.2.2
270
+ backports.tarfile==1.2.0
271
+ importlib_metadata==8.0.0
272
+ inflect==7.3.1
273
+ jaraco.collections==5.1.0
274
+ jaraco.context==5.3.0
275
+ jaraco.functools==4.0.1
276
+ jaraco.text==3.12.1
277
+ more-itertools==10.3.0
278
+ packaging==24.2
279
+ platformdirs==4.2.2
280
+ tomli==2.0.1
281
+ typeguard==4.3.0
282
+ typing_extensions==4.12.2
283
+ wheel==0.45.1
284
+ zipp==3.19.2
all_l1/wandb/wandb/run-20250930_085206-50kj35c0/files/wandb-metadata.json ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.15.0-140-generic-x86_64-with-glibc2.35",
3
+ "python": "CPython 3.10.18",
4
+ "startedAt": "2025-09-30T08:52:06.337927Z",
5
+ "args": [
6
+ "qwen2_7b",
7
+ "save_folder=/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/all_l1",
8
+ "--vision_backbone",
9
+ "openai",
10
+ "--action_head",
11
+ "l1_regression",
12
+ "--seq_len",
13
+ "1600",
14
+ "--ft_llm",
15
+ "--checkpoint",
16
+ "/vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924",
17
+ "--device_train_microbatch_size",
18
+ "16",
19
+ "--global_batch_size",
20
+ "126",
21
+ "--dataset",
22
+ "vla_dataset_realworld",
23
+ "--llm_learning_rate",
24
+ "5e-5",
25
+ "--wandb_entity",
26
+ "henryeap",
27
+ "--wandb_project",
28
+ "a1-realworld",
29
+ "--wandb_run_name",
30
+ "realworld",
31
+ "--save_overwrite"
32
+ ],
33
+ "program": "/vast/users/xiaodan/zhangjian/A1/launch_scripts/train_vla.py",
34
+ "codePath": "launch_scripts/train_vla.py",
35
+ "codePathLocal": "launch_scripts/train_vla.py",
36
+ "git": {
37
+ "remote": "https://github.com/Spatialtemporal-AI/A1.git",
38
+ "commit": "f2afcc15e05f491a8e50add64395fc1db0a1188d"
39
+ },
40
+ "email": "ihenrykwok@outlook.com",
41
+ "root": "/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/all_l1/wandb",
42
+ "host": "auh7-1b-gpu-306",
43
+ "executable": "/vast/users/xiaodan/miniconda3/envs/a1/bin/python3.10",
44
+ "cpu_count": 64,
45
+ "cpu_count_logical": 128,
46
+ "gpu": "Instinct MI210",
47
+ "gpu_count": 8,
48
+ "disk": {
49
+ "/": {
50
+ "total": "470343073792",
51
+ "used": "50128396288"
52
+ }
53
+ },
54
+ "memory": {
55
+ "total": "2434611519488"
56
+ },
57
+ "gpu_amd": [
58
+ {
59
+ "id": "5",
60
+ "uniqueId": "0x413935505e32b8da",
61
+ "vbiosVersion": "113-D67301V-073",
62
+ "performanceLevel": "auto",
63
+ "maxPower": "300.0",
64
+ "series": "Instinct MI210",
65
+ "model": "0x740f",
66
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
67
+ "sku": "D67301V",
68
+ "sclkRange": "500Mhz - 1700Mhz",
69
+ "mclkRange": "400Mhz - 1600Mhz"
70
+ },
71
+ {
72
+ "id": "3",
73
+ "uniqueId": "0x95be8fdc770fcfd7",
74
+ "vbiosVersion": "113-D67301V-073",
75
+ "performanceLevel": "auto",
76
+ "maxPower": "300.0",
77
+ "series": "Instinct MI210",
78
+ "model": "0x740f",
79
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
80
+ "sku": "D67301V",
81
+ "sclkRange": "500Mhz - 1700Mhz",
82
+ "mclkRange": "400Mhz - 1600Mhz"
83
+ },
84
+ {
85
+ "id": "1",
86
+ "uniqueId": "0x27087f06439a527d",
87
+ "vbiosVersion": "113-D67301V-073",
88
+ "performanceLevel": "auto",
89
+ "maxPower": "300.0",
90
+ "series": "Instinct MI210",
91
+ "model": "0x740f",
92
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
93
+ "sku": "D67301V",
94
+ "sclkRange": "500Mhz - 1700Mhz",
95
+ "mclkRange": "400Mhz - 1600Mhz"
96
+ },
97
+ {
98
+ "id": "0",
99
+ "uniqueId": "0x82728d7f9bd937e4",
100
+ "vbiosVersion": "113-D67301V-073",
101
+ "performanceLevel": "auto",
102
+ "maxPower": "300.0",
103
+ "series": "Instinct MI210",
104
+ "model": "0x740f",
105
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
106
+ "sku": "D67301V",
107
+ "sclkRange": "500Mhz - 1700Mhz",
108
+ "mclkRange": "400Mhz - 1600Mhz"
109
+ },
110
+ {
111
+ "id": "7",
112
+ "uniqueId": "0xa0442ab3bdd405c1",
113
+ "vbiosVersion": "113-D67301V-073",
114
+ "performanceLevel": "auto",
115
+ "maxPower": "300.0",
116
+ "series": "Instinct MI210",
117
+ "model": "0x740f",
118
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
119
+ "sku": "D67301V",
120
+ "sclkRange": "500Mhz - 1700Mhz",
121
+ "mclkRange": "400Mhz - 1600Mhz"
122
+ },
123
+ {
124
+ "id": "2",
125
+ "uniqueId": "0xaabcddaa244a3d6e",
126
+ "vbiosVersion": "113-D67301V-073",
127
+ "performanceLevel": "auto",
128
+ "maxPower": "300.0",
129
+ "series": "Instinct MI210",
130
+ "model": "0x740f",
131
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
132
+ "sku": "D67301V",
133
+ "sclkRange": "500Mhz - 1700Mhz",
134
+ "mclkRange": "400Mhz - 1600Mhz"
135
+ },
136
+ {
137
+ "id": "6",
138
+ "uniqueId": "0x12140cd9e24f12e9",
139
+ "vbiosVersion": "113-D67301V-073",
140
+ "performanceLevel": "auto",
141
+ "maxPower": "300.0",
142
+ "series": "Instinct MI210",
143
+ "model": "0x740f",
144
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
145
+ "sku": "D67301V",
146
+ "sclkRange": "500Mhz - 1700Mhz",
147
+ "mclkRange": "400Mhz - 1600Mhz"
148
+ },
149
+ {
150
+ "id": "4",
151
+ "uniqueId": "0x24ee801b7c402006",
152
+ "vbiosVersion": "113-D67301V-073",
153
+ "performanceLevel": "auto",
154
+ "maxPower": "300.0",
155
+ "series": "Instinct MI210",
156
+ "model": "0x740f",
157
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
158
+ "sku": "D67301V",
159
+ "sclkRange": "500Mhz - 1700Mhz",
160
+ "mclkRange": "400Mhz - 1600Mhz"
161
+ }
162
+ ],
163
+ "slurm": {
164
+ "cluster_name": "ai-04r",
165
+ "conf": "/etc/slurm/slurm.conf",
166
+ "cpus_on_node": "128",
167
+ "gpus_on_node": "8",
168
+ "gtids": "0",
169
+ "job_account": "faculty-acc",
170
+ "job_cpus_per_node": "128",
171
+ "job_end_time": "1759481466",
172
+ "job_gid": "2000",
173
+ "job_gpus": "0,1,2,3,4,5,6,7",
174
+ "job_id": "1970",
175
+ "job_name": "mh_realworld",
176
+ "job_nodelist": "auh7-1b-gpu-306",
177
+ "job_num_nodes": "1",
178
+ "job_partition": "faculty",
179
+ "job_qos": "xdqos",
180
+ "job_start_time": "1759222266",
181
+ "job_uid": "2013",
182
+ "job_user": "xiaodan",
183
+ "jobid": "1970",
184
+ "localid": "0",
185
+ "nnodes": "1",
186
+ "nodeid": "0",
187
+ "nodelist": "auh7-1b-gpu-306",
188
+ "nprocs": "1",
189
+ "ntasks": "1",
190
+ "ntasks_per_node": "1",
191
+ "oom_kill_step": "0",
192
+ "prio_process": "0",
193
+ "procid": "0",
194
+ "submit_dir": "/vast/users/xiaodan/zhangjian/A1/launch_scripts",
195
+ "submit_host": "auh-1b-cpu-login-001",
196
+ "task_pid": "589646",
197
+ "tasks_per_node": "1",
198
+ "topology_addr": "auh7-1b-gpu-306",
199
+ "topology_addr_pattern": "node"
200
+ },
201
+ "writerId": "xw28krbghcx1p9g77lw10b5hvgwgqmco"
202
+ }
all_l1/wandb/wandb/run-20250930_085206-50kj35c0/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"_wandb":{"runtime":25},"_runtime":25}
all_l1/wandb/wandb/run-20250930_085206-50kj35c0/logs/debug-core.log ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-09-30T08:52:06.638339714Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmpyjj93vky/port-589823.txt","pid":589823,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
2
+ {"time":"2025-09-30T08:52:06.641262155Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":589823}
3
+ {"time":"2025-09-30T08:52:06.642250983Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-589823-590216-2576664124/socket","Net":"unix"}}
4
+ {"time":"2025-09-30T08:52:06.792135331Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
5
+ {"time":"2025-09-30T08:52:06.805602998Z","level":"INFO","msg":"handleInformInit: received","streamId":"50kj35c0","id":"1(@)"}
6
+ {"time":"2025-09-30T08:52:08.062408989Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"50kj35c0","id":"1(@)"}
7
+ {"time":"2025-09-30T08:52:34.430005899Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
8
+ {"time":"2025-09-30T08:52:34.430229963Z","level":"INFO","msg":"server is shutting down"}
9
+ {"time":"2025-09-30T08:52:34.430227373Z","level":"INFO","msg":"connection: closing","id":"1(@)"}
10
+ {"time":"2025-09-30T08:52:34.430267094Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
11
+ {"time":"2025-09-30T08:52:34.430322415Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-589823-590216-2576664124/socket","Net":"unix"}}
12
+ {"time":"2025-09-30T08:52:36.423142352Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
13
+ {"time":"2025-09-30T08:52:36.423156882Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
14
+ {"time":"2025-09-30T08:52:36.423167352Z","level":"INFO","msg":"server is closed"}
all_l1/wandb/wandb/run-20250930_085206-50kj35c0/logs/debug-internal.log ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-09-30T08:52:06.807490411Z","level":"INFO","msg":"stream: starting","core version":"0.21.4"}
2
+ {"time":"2025-09-30T08:52:08.062344498Z","level":"INFO","msg":"stream: created new stream","id":"50kj35c0"}
3
+ {"time":"2025-09-30T08:52:08.062403319Z","level":"INFO","msg":"stream: started","id":"50kj35c0"}
4
+ {"time":"2025-09-30T08:52:08.062425769Z","level":"INFO","msg":"handler: started","stream_id":"50kj35c0"}
5
+ {"time":"2025-09-30T08:52:08.06244382Z","level":"INFO","msg":"sender: started","stream_id":"50kj35c0"}
6
+ {"time":"2025-09-30T08:52:08.06243294Z","level":"INFO","msg":"writer: started","stream_id":"50kj35c0"}
7
+ {"time":"2025-09-30T08:52:34.430242003Z","level":"INFO","msg":"stream: closing","id":"50kj35c0"}
8
+ {"time":"2025-09-30T08:52:36.099290171Z","level":"INFO","msg":"fileTransfer: Close: file transfer manager closed"}
9
+ {"time":"2025-09-30T08:52:36.421230078Z","level":"INFO","msg":"handler: closed","stream_id":"50kj35c0"}
10
+ {"time":"2025-09-30T08:52:36.422272446Z","level":"INFO","msg":"sender: closed","stream_id":"50kj35c0"}
11
+ {"time":"2025-09-30T08:52:36.422279596Z","level":"INFO","msg":"stream: closed","id":"50kj35c0"}
all_l1/wandb/wandb/run-20250930_085206-50kj35c0/logs/debug.log ADDED
@@ -0,0 +1 @@
 
 
1
+ 2025-09-30 08:52:34,430 INFO wandb-AsyncioManager-main:589823 [service_client.py:_forward_responses():84] Reached EOF.
all_l1/wandb/wandb/run-20250930_085206-50kj35c0/run-50kj35c0.wandb ADDED
Binary file (34.9 kB). View file
 
all_l1/wandb/wandb/run-20250930_090155-w2yi62pb/files/config.yaml ADDED
@@ -0,0 +1,617 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _wandb:
2
+ value:
3
+ cli_version: 0.21.4
4
+ e:
5
+ e39r496xjes4qj7ky2l3e9tlyr84a0v3:
6
+ args:
7
+ - qwen2_7b
8
+ - save_folder=/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/all_l1
9
+ - --vision_backbone
10
+ - openai
11
+ - --action_head
12
+ - l1_regression
13
+ - --seq_len
14
+ - "1600"
15
+ - --ft_llm
16
+ - --checkpoint
17
+ - /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
18
+ - --device_train_microbatch_size
19
+ - "16"
20
+ - --global_batch_size
21
+ - "126"
22
+ - --dataset
23
+ - vla_dataset_realworld
24
+ - --llm_learning_rate
25
+ - "5e-5"
26
+ - --wandb_entity
27
+ - henryeap
28
+ - --wandb_project
29
+ - a1-realworld
30
+ - --wandb_run_name
31
+ - realworld
32
+ - --save_overwrite
33
+ codePath: launch_scripts/train_vla.py
34
+ codePathLocal: launch_scripts/train_vla.py
35
+ cpu_count: 64
36
+ cpu_count_logical: 128
37
+ disk:
38
+ /:
39
+ total: "470343073792"
40
+ used: "50128465920"
41
+ email: ihenrykwok@outlook.com
42
+ executable: /vast/users/xiaodan/miniconda3/envs/a1/bin/python3.10
43
+ git:
44
+ commit: f2afcc15e05f491a8e50add64395fc1db0a1188d
45
+ remote: https://github.com/Spatialtemporal-AI/A1.git
46
+ gpu: Instinct MI210
47
+ gpu_amd:
48
+ - id: "6"
49
+ maxPower: "300.0"
50
+ mclkRange: 400Mhz - 1600Mhz
51
+ model: "0x740f"
52
+ performanceLevel: auto
53
+ sclkRange: 500Mhz - 1700Mhz
54
+ series: Instinct MI210
55
+ sku: D67301V
56
+ uniqueId: "0x12140cd9e24f12e9"
57
+ vbiosVersion: 113-D67301V-073
58
+ vendor: Advanced Micro Devices, Inc. [AMD/ATI]
59
+ - id: "3"
60
+ maxPower: "300.0"
61
+ mclkRange: 400Mhz - 1600Mhz
62
+ model: "0x740f"
63
+ performanceLevel: auto
64
+ sclkRange: 500Mhz - 1700Mhz
65
+ series: Instinct MI210
66
+ sku: D67301V
67
+ uniqueId: "0x95be8fdc770fcfd7"
68
+ vbiosVersion: 113-D67301V-073
69
+ vendor: Advanced Micro Devices, Inc. [AMD/ATI]
70
+ - id: "7"
71
+ maxPower: "300.0"
72
+ mclkRange: 400Mhz - 1600Mhz
73
+ model: "0x740f"
74
+ performanceLevel: auto
75
+ sclkRange: 500Mhz - 1700Mhz
76
+ series: Instinct MI210
77
+ sku: D67301V
78
+ uniqueId: "0xa0442ab3bdd405c1"
79
+ vbiosVersion: 113-D67301V-073
80
+ vendor: Advanced Micro Devices, Inc. [AMD/ATI]
81
+ - id: "4"
82
+ maxPower: "300.0"
83
+ mclkRange: 400Mhz - 1600Mhz
84
+ model: "0x740f"
85
+ performanceLevel: auto
86
+ sclkRange: 500Mhz - 1700Mhz
87
+ series: Instinct MI210
88
+ sku: D67301V
89
+ uniqueId: "0x24ee801b7c402006"
90
+ vbiosVersion: 113-D67301V-073
91
+ vendor: Advanced Micro Devices, Inc. [AMD/ATI]
92
+ - id: "2"
93
+ maxPower: "300.0"
94
+ mclkRange: 400Mhz - 1600Mhz
95
+ model: "0x740f"
96
+ performanceLevel: auto
97
+ sclkRange: 500Mhz - 1700Mhz
98
+ series: Instinct MI210
99
+ sku: D67301V
100
+ uniqueId: "0xaabcddaa244a3d6e"
101
+ vbiosVersion: 113-D67301V-073
102
+ vendor: Advanced Micro Devices, Inc. [AMD/ATI]
103
+ - id: "0"
104
+ maxPower: "300.0"
105
+ mclkRange: 400Mhz - 1600Mhz
106
+ model: "0x740f"
107
+ performanceLevel: auto
108
+ sclkRange: 500Mhz - 1700Mhz
109
+ series: Instinct MI210
110
+ sku: D67301V
111
+ uniqueId: "0x82728d7f9bd937e4"
112
+ vbiosVersion: 113-D67301V-073
113
+ vendor: Advanced Micro Devices, Inc. [AMD/ATI]
114
+ - id: "5"
115
+ maxPower: "300.0"
116
+ mclkRange: 400Mhz - 1600Mhz
117
+ model: "0x740f"
118
+ performanceLevel: auto
119
+ sclkRange: 500Mhz - 1700Mhz
120
+ series: Instinct MI210
121
+ sku: D67301V
122
+ uniqueId: "0x413935505e32b8da"
123
+ vbiosVersion: 113-D67301V-073
124
+ vendor: Advanced Micro Devices, Inc. [AMD/ATI]
125
+ - id: "1"
126
+ maxPower: "300.0"
127
+ mclkRange: 400Mhz - 1600Mhz
128
+ model: "0x740f"
129
+ performanceLevel: auto
130
+ sclkRange: 500Mhz - 1700Mhz
131
+ series: Instinct MI210
132
+ sku: D67301V
133
+ uniqueId: "0x27087f06439a527d"
134
+ vbiosVersion: 113-D67301V-073
135
+ vendor: Advanced Micro Devices, Inc. [AMD/ATI]
136
+ gpu_count: 8
137
+ host: auh7-1b-gpu-306
138
+ memory:
139
+ total: "2434611519488"
140
+ os: Linux-5.15.0-140-generic-x86_64-with-glibc2.35
141
+ program: /vast/users/xiaodan/zhangjian/A1/launch_scripts/train_vla.py
142
+ python: CPython 3.10.18
143
+ root: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/all_l1/wandb
144
+ slurm:
145
+ cluster_name: ai-04r
146
+ conf: /etc/slurm/slurm.conf
147
+ cpus_on_node: "128"
148
+ gpus_on_node: "8"
149
+ gtids: "0"
150
+ job_account: faculty-acc
151
+ job_cpus_per_node: "128"
152
+ job_end_time: "1759482071"
153
+ job_gid: "2000"
154
+ job_gpus: 0,1,2,3,4,5,6,7
155
+ job_id: "1973"
156
+ job_name: mh_realworld
157
+ job_nodelist: auh7-1b-gpu-306
158
+ job_num_nodes: "1"
159
+ job_partition: faculty
160
+ job_qos: xdqos
161
+ job_start_time: "1759222871"
162
+ job_uid: "2013"
163
+ job_user: xiaodan
164
+ jobid: "1973"
165
+ localid: "0"
166
+ nnodes: "1"
167
+ nodeid: "0"
168
+ nodelist: auh7-1b-gpu-306
169
+ nprocs: "1"
170
+ ntasks: "1"
171
+ ntasks_per_node: "1"
172
+ oom_kill_step: "0"
173
+ prio_process: "0"
174
+ procid: "0"
175
+ submit_dir: /vast/users/xiaodan/zhangjian/A1/launch_scripts
176
+ submit_host: auh-1b-cpu-login-001
177
+ task_pid: "594412"
178
+ tasks_per_node: "1"
179
+ topology_addr: auh7-1b-gpu-306
180
+ topology_addr_pattern: node
181
+ startedAt: "2025-09-30T09:01:55.101472Z"
182
+ writerId: e39r496xjes4qj7ky2l3e9tlyr84a0v3
183
+ m: []
184
+ python_version: 3.10.18
185
+ t:
186
+ "1":
187
+ - 1
188
+ - 2
189
+ - 3
190
+ - 5
191
+ - 11
192
+ - 41
193
+ - 49
194
+ - 51
195
+ - 53
196
+ - 63
197
+ - 71
198
+ - 83
199
+ - 95
200
+ - 105
201
+ "2":
202
+ - 1
203
+ - 2
204
+ - 3
205
+ - 5
206
+ - 11
207
+ - 41
208
+ - 49
209
+ - 51
210
+ - 53
211
+ - 63
212
+ - 71
213
+ - 83
214
+ - 95
215
+ - 105
216
+ "3":
217
+ - 13
218
+ - 15
219
+ - 16
220
+ "4": 3.10.18
221
+ "5": 0.21.4
222
+ "6": 4.56.1
223
+ "12": 0.21.4
224
+ "13": linux-x86_64
225
+ activation_checkpointing:
226
+ value: whole_layer
227
+ allow_resume:
228
+ value: false
229
+ batch_divisor:
230
+ value: global_batch
231
+ canceled_check_interval:
232
+ value: 50
233
+ checkpoint_dir:
234
+ value: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
235
+ compile:
236
+ value: null
237
+ console_log_interval:
238
+ value: 1
239
+ data:
240
+ value:
241
+ dataset: vla_dataset_realworld
242
+ drop_last: true
243
+ for_inference: false
244
+ lerobot_episode_index_end: null
245
+ lerobot_episode_index_start: null
246
+ mixture: null
247
+ multi_modal: torch
248
+ num_workers: 0
249
+ pad: to_max
250
+ persistent_workers: false
251
+ pin_memory: true
252
+ prefetch_factor: null
253
+ rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
254
+ rlds_dataset_name: libero_4_task_suites_no_noops
255
+ rlds_read_threads: 8
256
+ rlds_shuffle_buffer_size: 100000
257
+ rlds_traj_threads: 8
258
+ root_size_mixture: null
259
+ seed: 95818
260
+ sequence_length: 1600
261
+ shuffle: true
262
+ shuffle_messages: false
263
+ split: train
264
+ timeout: 0
265
+ use_proprio: true
266
+ use_wrist_image: true
267
+ device_eval_batch_size:
268
+ value: 4
269
+ device_inf_eval_batch_size:
270
+ value: 16
271
+ device_train_batch_size:
272
+ value: 15
273
+ device_train_grad_accum:
274
+ value: 0
275
+ device_train_microbatch_size:
276
+ value: 16
277
+ dry_run:
278
+ value: false
279
+ early_exit:
280
+ value: false
281
+ epoch:
282
+ value: null
283
+ eval_interval:
284
+ value: 0
285
+ eval_on_load:
286
+ value: false
287
+ eval_subset_num_batches:
288
+ value: -1
289
+ evaluators:
290
+ value:
291
+ - data:
292
+ dataset: vla_dataset_realworld
293
+ drop_last: true
294
+ for_inference: false
295
+ lerobot_episode_index_end: 765
296
+ lerobot_episode_index_start: 353
297
+ mixture: null
298
+ multi_modal: torch
299
+ num_workers: 0
300
+ pad: to_max
301
+ persistent_workers: true
302
+ pin_memory: true
303
+ prefetch_factor: null
304
+ rlds_data_root_dir: /mnt/data/zhangjian/dataset/Simulation/datasets--openvla--modified_libero_rlds
305
+ rlds_dataset_name: libero_4_task_suites_no_noops
306
+ rlds_read_threads: 8
307
+ rlds_shuffle_buffer_size: 256000
308
+ rlds_traj_threads: 8
309
+ root_size_mixture: null
310
+ seed: null
311
+ sequence_length: 1600
312
+ shuffle: false
313
+ shuffle_messages: false
314
+ split: validation
315
+ timeout: 0
316
+ use_proprio: true
317
+ use_wrist_image: true
318
+ device_eval_batch_size: null
319
+ eval_name: null
320
+ label: val
321
+ max_examples: null
322
+ max_new_tokens: 448
323
+ mm_evaluator: null
324
+ save_dir: null
325
+ save_to_checkpoint_dir: false
326
+ skip_if_metrics_cached: true
327
+ subset_num_batches: 64
328
+ extra_steps_after_cancel:
329
+ value: 10
330
+ fast_forward_batches:
331
+ value: null
332
+ force_save_unsharded:
333
+ value: false
334
+ fsdp:
335
+ value:
336
+ hybrid_sharding_num_model_replicas: null
337
+ precision: float
338
+ sharding_strategy: FULL_SHARD
339
+ use_orig_params: true
340
+ wrapping_strategy: by_block_and_size
341
+ ft_connector:
342
+ value: false
343
+ ft_embedding:
344
+ value: lm_head
345
+ ft_llm:
346
+ value: true
347
+ ft_vit:
348
+ value: false
349
+ fused_loss:
350
+ value: null
351
+ gen1_gc_interval:
352
+ value: 1
353
+ global_train_batch_size:
354
+ value: 126
355
+ inf_eval_interval:
356
+ value: -1
357
+ inf_eval_subset_num_batches:
358
+ value: -1
359
+ inf_evaluators:
360
+ value: []
361
+ initial_model_checkpoint:
362
+ value: /vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924
363
+ keep_lr_on_load:
364
+ value: true
365
+ load_model_config:
366
+ value: null
367
+ load_path:
368
+ value: null
369
+ load_path_sharded_checkpointer:
370
+ value: null
371
+ lora:
372
+ value: false
373
+ lora_connector:
374
+ value: false
375
+ lora_llm:
376
+ value: false
377
+ lora_rank:
378
+ value: 8
379
+ lora_vit:
380
+ value: false
381
+ max_duration:
382
+ value: 500000
383
+ max_grad_norm:
384
+ value: 1
385
+ max_grad_norm_ratio:
386
+ value: null
387
+ model:
388
+ value:
389
+ action_dim: 7
390
+ action_head: l1_regression
391
+ action_head_dit_depth: 28
392
+ action_head_dit_hidden_size: 1152
393
+ action_head_dit_num_heads: 16
394
+ action_tokenizer:
395
+ identifier: physical-intelligence/fast
396
+ tokenizer_dir: null
397
+ action_use_left_eef: false
398
+ action_use_mobile_base: false
399
+ activation_type: swiglu
400
+ additional_vocab_size: 128
401
+ always_start_with_space: true
402
+ attention_dropout: 0
403
+ attention_layer_norm: false
404
+ attention_layer_norm_with_affine: true
405
+ attention_type: sdpa
406
+ bias_for_layer_norm: null
407
+ block_group_size: 1
408
+ block_type: sequential
409
+ clip_qkv: null
410
+ crop_mode: overlap-and-resize-c2
411
+ d_model: 3584
412
+ default_inference_len: 65
413
+ embedding_dropout: 0
414
+ embedding_size: 152064
415
+ ff_out_size: 0
416
+ fix_image_padding: true
417
+ float32_attention: true
418
+ head_dim: null
419
+ horizon: 8
420
+ image_feature_dropout: 0
421
+ image_padding_embed: pad_and_partial_pad
422
+ image_pooling_2d: attention_meanq
423
+ image_pooling_h: 2
424
+ image_pooling_w: 2
425
+ image_projector: mlp
426
+ include_bias: false
427
+ init_cutoff_factor: null
428
+ init_device: null
429
+ init_fn: normal
430
+ init_std: 0.02
431
+ initializer_range: 0.02
432
+ layer_norm_eps: 1e-06
433
+ layer_norm_type: rms
434
+ layer_norm_with_affine: true
435
+ llm_causal_attention: false
436
+ llm_load_path: /weka/oe-training-default/mm-olmo/pretrained_llms/qwen2-7b.pt
437
+ low_cpu_fsdp: true
438
+ max_crops: 12
439
+ max_position_embeddings: null
440
+ max_sequence_length: 4096
441
+ message_formatting: role
442
+ mlp_hidden_size: 37888
443
+ mlp_ratio: 4
444
+ moe_capacity_factor: 1.25
445
+ moe_dropless: true
446
+ moe_interleave: false
447
+ moe_lbl_in_fp32: false
448
+ moe_log_expert_assignment: false
449
+ moe_loss_weight: 0.1
450
+ moe_mlp_impl: sparse
451
+ moe_num_experts: 8
452
+ moe_shared_expert: false
453
+ moe_top_k: 2
454
+ moe_zloss_weight: null
455
+ multi_annotation_weighting: root_subsegments
456
+ n_heads: 28
457
+ n_kv_heads: 4
458
+ n_layers: 28
459
+ new_embedding_init_range: 0.02
460
+ norm_after: false
461
+ normalize_input_embeds: false
462
+ num_diffusion_inference_steps: 30
463
+ num_diffusion_steps: 1000
464
+ overlap_margins:
465
+ - 4
466
+ - 4
467
+ pad_tokenizer: true
468
+ pad_value: 0
469
+ precision: amp_bf16
470
+ prompt_type: uber_model
471
+ qkv_bias: true
472
+ residual_dropout: 0.1
473
+ response_residual_dropout: 0
474
+ rope: true
475
+ rope_full_precision: true
476
+ rope_theta: 1e+06
477
+ scale_logits: false
478
+ system_prompt_kind: demo_or_style
479
+ tokenizer:
480
+ identifier: Qwen/Qwen2-7B
481
+ tokenizer_dir: null
482
+ use_col_tokens: true
483
+ use_position_ids: true
484
+ use_proprio: true
485
+ vision_backbone:
486
+ attention_dropout: 0
487
+ fsdp_wrap: false
488
+ image_default_input_size:
489
+ - 336
490
+ - 336
491
+ image_dropout_rate: 0
492
+ image_emb_dim: 1024
493
+ image_head_dim: 64
494
+ image_mlp_activations: quick_gelu
495
+ image_mlp_dim: 4096
496
+ image_model_type: openai
497
+ image_norm_eps: 1e-05
498
+ image_num_heads: 16
499
+ image_num_key_value_heads: 16
500
+ image_num_layers: 23
501
+ image_num_pos: 577
502
+ image_patch_size: 14
503
+ image_pos_patch_size: 14
504
+ initializer_range: 0.02
505
+ residual_dropout: 0
506
+ resize_mode: default
507
+ vit_layers:
508
+ - -2
509
+ - -9
510
+ vit_load_path: /weka/oe-training-default/mm-olmo/pretrained_image_encoders/vit-l-14-336.pt
511
+ vocab_size: 152064
512
+ weight_tying: false
513
+ multi_component_grad_norm:
514
+ value: true
515
+ no_pre_train_checkpoint:
516
+ value: true
517
+ optimizer:
518
+ value:
519
+ betas:
520
+ - 0.9
521
+ - 0.95
522
+ connector_betas:
523
+ - 0.9
524
+ - 0.95
525
+ connector_eps: 1e-06
526
+ connector_learning_rate: 0.0002
527
+ connector_weight_decay: 0
528
+ eps: 1e-05
529
+ learning_rate: 0.0001
530
+ llm_betas:
531
+ - 0.9
532
+ - 0.95
533
+ llm_eps: 1e-06
534
+ llm_learning_rate: 5e-05
535
+ llm_weight_decay: 0
536
+ metrics_log_interval: 20
537
+ name: adamw
538
+ vit_betas:
539
+ - 0.9
540
+ - 0.95
541
+ vit_eps: 1e-06
542
+ vit_learning_rate: 6e-06
543
+ vit_weight_decay: 0
544
+ weight_decay: 0.01
545
+ precision:
546
+ value: amp_bf16
547
+ python_profiling:
548
+ value: false
549
+ remote_save_folder:
550
+ value: null
551
+ reset_dataloader_state:
552
+ value: false
553
+ reset_optimizer_state:
554
+ value: false
555
+ reset_trainer_state:
556
+ value: false
557
+ restore_dataloader:
558
+ value: true
559
+ run_name:
560
+ value: realworld_20250930_090124
561
+ save_dataloader_state:
562
+ value: false
563
+ save_folder:
564
+ value: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/all_l1
565
+ save_interval:
566
+ value: 500
567
+ save_interval_action_head:
568
+ value: 500
569
+ save_interval_ephemeral:
570
+ value: null
571
+ save_interval_unsharded:
572
+ value: 500
573
+ save_num_action_head_checkpoints_to_keep:
574
+ value: 2
575
+ save_num_checkpoints_to_keep:
576
+ value: 1
577
+ save_num_unsharded_checkpoints_to_keep:
578
+ value: 1
579
+ save_overwrite:
580
+ value: true
581
+ scheduler:
582
+ value:
583
+ alpha_f: 0.1
584
+ connector_t_warmup: 200
585
+ grad_clip_warmup_factor: null
586
+ grad_clip_warmup_steps: null
587
+ llm_t_warmup: 2000
588
+ name: multimodal
589
+ t_max: null
590
+ t_warmup: 100
591
+ units: steps
592
+ vit_t_warmup: 2000
593
+ warmup_min_lr: 0
594
+ seed:
595
+ value: 6198
596
+ sharded_checkpointer:
597
+ value: torch_legacy
598
+ softmax_auxiliary_loss:
599
+ value: true
600
+ softmax_auxiliary_loss_scale:
601
+ value: 0.0001
602
+ speed_monitor:
603
+ value:
604
+ gpu_flops_available: null
605
+ window_size: 20
606
+ stop_after:
607
+ value: null
608
+ stop_at:
609
+ value: 500000
610
+ time_limit:
611
+ value: null
612
+ torch_profiling:
613
+ value: false
614
+ train_exit_random_layer:
615
+ value: false
616
+ use_lora:
617
+ value: true
all_l1/wandb/wandb/run-20250930_090155-w2yi62pb/files/output.log ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wandb: Detected [openai] in use.
2
+ wandb: Use W&B Weave for improved LLM call tracing. Install Weave with `pip install weave` then add `import weave` to the top of your script.
3
+ wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/
4
+ 09/30 [09:01:56] WARNING | >> /vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/site-packages/torch/distributed/distributed_c10d.py:4807: UserWarning: No warnings.py:109
5
+ device id is provided via `init_process_group` or `barrier `. Using the current device set by the user.
6
+ warnings.warn( # warn only once
7
+
8
+ ****** vla_cfg: {'datasets': {'rlds': {'name': None, 'path': None, 'weight': 1.0, 'action_proprio_normalization_type': 'bounds_q99', 'image_augmentation': False}, 'lerobot': [['/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_CleanDesk', 1, 'bounds'], ['/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_CleanDesk50', 1, 'bounds'], ['/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_Eraser', 1, 'bounds'], ['/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_Glue', 1, 'bounds'], ['/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_Pen', 1, 'bounds'], ['/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_StickyNote', 1, 'bounds'], ['/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_Test_Glue', 1, 'bounds'], ['/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_Wipe', 1, 'bounds']], 'open-source-real-world': {'rlds': {'name': 'a1_real_world', 'path': None, 'weight': 8, 'action_proprio_normalization_type': 'bounds_q99', 'image_augmentation': False}, 'lerobot': [], 'agibot': {'path': '/vast/users/xiaodan/zhangjian/datasets/AgiBotWorld-Alpha', 'weight': 8, 'action_proprio_normalization_type': None}}}, 'model': {'action_head': {'action_dim': 16, 'proprio_dim': 16, 'num_actions_chunk': 8, 'action_tokens_mapping': {'left_end_effector': 8, 'right_end_effector': 8}, 'use_left_eef': True, 'use_mobile_base': False}}}
9
+ ****** Skip RLDS main; path not found: None
10
+ ****** start build LeRobot main...
11
+ build_tokenizer, cache_dir None tokenizer_dir None
12
+ 09/30 [09:01:58] INFO | >> Padding tokenizer with 418 tokens tokenizer.py:130
13
+ INFO | >> Loading train dataset: vla_dataset_realworld/train __init__.py:435
14
+ ****** before LeRobot dataset...
15
+ ****** data_config.rlds_data_root_dir: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_CleanDesk
16
+ ****** length of the dataset: 72641
17
+ 09/30 [09:02:07] INFO | >> Loading train dataset: vla_dataset_realworld/train __init__.py:435
18
+ ****** before LeRobot dataset...
19
+ ****** data_config.rlds_data_root_dir: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_CleanDesk50
20
+ ****** length of the dataset: 27906
21
+ 09/30 [09:02:09] INFO | >> Loading train dataset: vla_dataset_realworld/train __init__.py:435
22
+ ****** before LeRobot dataset...
23
+ ****** data_config.rlds_data_root_dir: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_Eraser
24
+ ****** length of the dataset: 13441
25
+ 09/30 [09:02:10] INFO | >> Loading train dataset: vla_dataset_realworld/train __init__.py:435
26
+ ****** before LeRobot dataset...
27
+ ****** data_config.rlds_data_root_dir: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_Glue
28
+ ****** length of the dataset: 10316
29
+ 09/30 [09:02:11] INFO | >> Loading train dataset: vla_dataset_realworld/train __init__.py:435
30
+ ****** before LeRobot dataset...
31
+ ****** data_config.rlds_data_root_dir: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_Pen
32
+ ****** length of the dataset: 17131
33
+ 09/30 [09:02:12] INFO | >> Loading train dataset: vla_dataset_realworld/train __init__.py:435
34
+ ****** before LeRobot dataset...
35
+ ****** data_config.rlds_data_root_dir: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_StickyNote
36
+ ****** length of the dataset: 15765
37
+ 09/30 [09:02:13] INFO | >> Loading train dataset: vla_dataset_realworld/train __init__.py:435
38
+ ****** before LeRobot dataset...
39
+ ****** data_config.rlds_data_root_dir: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_Test_Glue
40
+ ****** length of the dataset: 90
41
+ 09/30 [09:02:14] INFO | >> Loading train dataset: vla_dataset_realworld/train __init__.py:435
42
+ ****** before LeRobot dataset...
43
+ ****** data_config.rlds_data_root_dir: /vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/dataset/realworld/Lerobot_Wipe
44
+ ****** length of the dataset: 18397
45
+ ****** Skip RLDS open-source-real-world; path not found: None
46
+ ****** path: /vast/users/xiaodan/zhangjian/datasets/AgiBotWorld-Alpha
47
+ ****** before AgiBotWorldAlpha dataset...
48
+ 09/30 [09:02:15] INFO | >> Loading train dataset: vla_dataset_realworld/train __init__.py:485
49
+ Traceback (most recent call last):
50
+ File "/vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/site-packages/pandas/compat/_optional.py", line 135, in import_optional_dependency
51
+ module = importlib.import_module(name)
52
+ File "/vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/importlib/__init__.py", line 126, in import_module
53
+ return _bootstrap._gcd_import(name[level:], package, level)
54
+ File "<frozen importlib._bootstrap>", line 1050, in _gcd_import
55
+ File "<frozen importlib._bootstrap>", line 1027, in _find_and_load
56
+ File "<frozen importlib._bootstrap>", line 1004, in _find_and_load_unlocked
57
+ ModuleNotFoundError: No module named 'openpyxl'
58
+
59
+ During handling of the above exception, another exception occurred:
60
+
61
+ Traceback (most recent call last):
62
+ File "/vast/users/xiaodan/zhangjian/A1/launch_scripts/train_vla.py", line 397, in <module>
63
+ train(cfg)
64
+ File "/vast/users/xiaodan/zhangjian/A1/scripts/train_for_action.py", line 160, in main
65
+ train_loader = build_train_dataloader(cfg, device)
66
+ File "/vast/users/xiaodan/zhangjian/A1/olmo/data/__init__.py", line 197, in build_train_dataloader
67
+ return build_vla_train_dataloader(train_config, device)
68
+ File "/vast/users/xiaodan/zhangjian/A1/olmo/data/__init__.py", line 384, in build_vla_train_dataloader
69
+ ds = build_agibot_train_dataset(train_config, normalization_type, device)
70
+ File "/vast/users/xiaodan/zhangjian/A1/olmo/data/__init__.py", line 487, in build_agibot_train_dataset
71
+ dataset = AgiBotWorldAlphaDataset(
72
+ File "<string>", line 13, in __init__
73
+ File "/vast/users/xiaodan/zhangjian/A1/olmo/data/vla/agibot_dataset.py", line 145, in __post_init__
74
+ self._frame_ranges_map: Optional[Dict[Tuple[str, int], Tuple[int, int]]] = self._load_frame_ranges_excel()
75
+ File "/vast/users/xiaodan/zhangjian/A1/olmo/data/vla/agibot_dataset.py", line 411, in _load_frame_ranges_excel
76
+ df = pd.read_excel(excel_path)
77
+ File "/vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/site-packages/pandas/io/excel/_base.py", line 495, in read_excel
78
+ io = ExcelFile(
79
+ File "/vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/site-packages/pandas/io/excel/_base.py", line 1567, in __init__
80
+ self._reader = self._engines[engine](
81
+ File "/vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/site-packages/pandas/io/excel/_openpyxl.py", line 552, in __init__
82
+ import_optional_dependency("openpyxl")
83
+ File "/vast/users/xiaodan/miniconda3/envs/a1/lib/python3.10/site-packages/pandas/compat/_optional.py", line 138, in import_optional_dependency
84
+ raise ImportError(msg)
85
+ ImportError: Missing optional dependency 'openpyxl'. Use pip or conda to install openpyxl.
all_l1/wandb/wandb/run-20250930_090155-w2yi62pb/files/requirements.txt ADDED
@@ -0,0 +1,284 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ai2-molmo==0.0.0
2
+ astunparse==1.6.3
3
+ flatbuffers==25.2.10
4
+ gast==0.6.0
5
+ google-pasta==0.2.0
6
+ h5py==3.14.0
7
+ libclang==18.1.1
8
+ Markdown==3.9
9
+ namex==0.1.0
10
+ opt_einsum==3.4.0
11
+ optree==0.17.0
12
+ tensorboard-data-server==0.7.2
13
+ tensorflow-io-gcs-filesystem==0.37.1
14
+ termcolor==3.1.0
15
+ Werkzeug==3.1.3
16
+ Brotli==1.1.0
17
+ Farama-Notifications==0.0.4
18
+ MarkupSafe==2.1.5
19
+ PyYAML==6.0.2
20
+ absl-py==2.3.1
21
+ accelerate==1.10.1
22
+ ai2-molmo==0.0.0
23
+ aiofiles==24.1.0
24
+ aiohappyeyeballs==2.6.1
25
+ aiohttp==3.12.15
26
+ aiosignal==1.4.0
27
+ annotated-types==0.7.0
28
+ antlr4-python3-runtime==4.9.3
29
+ anyio==4.10.0
30
+ array_record==0.8.1
31
+ async-timeout==5.0.1
32
+ attrs==25.3.0
33
+ av==15.1.0
34
+ backports.tarfile==1.2.0
35
+ beaker-gantry==3.2.0
36
+ beaker-py==2.5.0
37
+ black==23.12.1
38
+ blinker==1.9.0
39
+ boltons==25.0.0
40
+ boto3==1.40.33
41
+ botocore==1.40.33
42
+ build==1.3.0
43
+ cached_path==1.7.3
44
+ cached-property==2.0.1
45
+ cachetools==5.5.2
46
+ certifi==2025.8.3
47
+ cffi==2.0.0
48
+ charset-normalizer==3.4.3
49
+ click==8.2.1
50
+ click-help-colors==0.9.4
51
+ click-option-group==0.5.7
52
+ cloudpickle==3.1.1
53
+ cmake==4.1.0
54
+ contourpy==1.3.2
55
+ cryptography==46.0.1
56
+ cycler==0.12.1
57
+ dataclass-extensions==0.2.3
58
+ datasets==3.6.0
59
+ decorator==5.2.1
60
+ deepdiff==8.6.1
61
+ diffusers==0.35.1
62
+ dill==0.3.8
63
+ distro==1.9.0
64
+ dlimp==0.0.1
65
+ dm-tree==0.1.9
66
+ docutils==0.22.1
67
+ draccus==0.10.0
68
+ editdistance==0.8.1
69
+ einops==0.8.1
70
+ einops-exts==0.0.4
71
+ etils==1.13.0
72
+ evdev==1.9.2
73
+ exceptiongroup==1.3.0
74
+ face==24.0.0
75
+ fastapi==0.116.2
76
+ ffmpy==0.6.1
77
+ fiddle==0.3.0
78
+ filelock==3.13.1
79
+ Flask==3.1.2
80
+ fonttools==4.60.0
81
+ frozenlist==1.7.0
82
+ fsspec==2023.9.2
83
+ ftfy==6.3.1
84
+ gcsfs==2023.9.2
85
+ gitdb==4.0.12
86
+ GitPython==3.1.45
87
+ glom==24.11.0
88
+ google-api-core==2.25.1
89
+ google-auth==2.40.3
90
+ google-auth-oauthlib==1.2.2
91
+ google-cloud-core==2.4.3
92
+ google-cloud-storage==2.19.0
93
+ google-crc32c==1.7.1
94
+ google-resumable-media==2.7.2
95
+ googleapis-common-protos==1.70.0
96
+ gradio==5.46.0
97
+ gradio_client==1.13.0
98
+ graphviz==0.21
99
+ groovy==0.1.2
100
+ grpcio==1.75.0
101
+ gymnasium==0.29.1
102
+ h11==0.16.0
103
+ hf_transfer==0.1.9
104
+ hf-xet==1.1.10
105
+ httpcore==1.0.9
106
+ httpx==0.28.1
107
+ huggingface-hub==0.35.0
108
+ id==1.5.0
109
+ idna==3.10
110
+ imageio==2.37.0
111
+ imageio-ffmpeg==0.6.0
112
+ importlib_metadata==8.7.0
113
+ importlib_resources==6.5.2
114
+ iniconfig==2.1.0
115
+ inquirerpy==0.3.4
116
+ isort==5.12.0
117
+ itsdangerous==2.2.0
118
+ jaraco.classes==3.4.0
119
+ jaraco.context==6.0.1
120
+ jaraco.functools==4.3.0
121
+ jeepney==0.9.0
122
+ Jinja2==3.1.4
123
+ jiter==0.11.0
124
+ jmespath==1.0.1
125
+ joblib==1.5.2
126
+ jsonlines==4.0.0
127
+ keras==2.15.0
128
+ keyring==25.6.0
129
+ kiwisolver==1.4.9
130
+ latex2sympy2_extended==1.10.2
131
+ lerobot==0.3.4
132
+ Levenshtein==0.27.1
133
+ libcst==1.8.4
134
+ lightning-utilities==0.15.2
135
+ markdown-it-py==4.0.0
136
+ math-verify==0.8.0
137
+ matplotlib==3.10.6
138
+ mdurl==0.1.2
139
+ mergedeep==1.3.4
140
+ ml-dtypes==0.2.0
141
+ ml_dtypes==0.5.3
142
+ more-itertools==10.8.0
143
+ mpmath==1.3.0
144
+ msgspec==0.19.0
145
+ multidict==6.6.4
146
+ multiprocess==0.70.16
147
+ mypy==1.3.0
148
+ mypy_extensions==1.1.0
149
+ necessary==0.4.3
150
+ networkx==3.3
151
+ nh3==0.3.0
152
+ nltk==3.9.1
153
+ numpy==1.26.4
154
+ oauthlib==3.3.1
155
+ omegaconf==2.3.0
156
+ openai==1.108.0
157
+ opencv-python-headless==4.12.0.88
158
+ OpenEXR==3.4.0
159
+ orderly-set==5.5.0
160
+ orjson==3.11.3
161
+ packaging==25.0
162
+ pandas==2.3.2
163
+ pathspec==0.12.1
164
+ petname==2.6
165
+ pfzy==0.3.4
166
+ pillow==11.0.0
167
+ pip==25.2
168
+ platformdirs==4.4.0
169
+ pluggy==1.6.0
170
+ promise==2.3
171
+ prompt_toolkit==3.0.52
172
+ propcache==0.3.2
173
+ proto-plus==1.26.1
174
+ protobuf==4.21.12
175
+ protobuf==6.32.1
176
+ psutil==7.1.0
177
+ pyarrow==21.0.0
178
+ pyasn1==0.6.1
179
+ pyasn1_modules==0.4.2
180
+ pycparser==2.23
181
+ pydantic==2.11.9
182
+ pydantic_core==2.33.2
183
+ pydub==0.25.1
184
+ Pygments==2.19.2
185
+ pynput==1.8.1
186
+ pyparsing==3.2.4
187
+ pyproject_hooks==1.2.0
188
+ pyserial==3.5
189
+ pytest==8.4.2
190
+ pytest-sphinx==0.6.3
191
+ python-dateutil==2.9.0.post0
192
+ python-Levenshtein==0.27.1
193
+ python-multipart==0.0.20
194
+ python-xlib==0.33
195
+ pytorch-triton-rocm==3.4.0
196
+ pytz==2025.2
197
+ pyyaml-include==1.4.1
198
+ RapidFuzz==3.14.1
199
+ readme_renderer==44.0
200
+ regex==2025.9.1
201
+ requests==2.32.5
202
+ requests-oauthlib==2.0.0
203
+ requests-toolbelt==1.0.0
204
+ requirements-parser==0.13.0
205
+ rerun-sdk==0.22.1
206
+ rfc3986==2.0.0
207
+ rich==13.9.4
208
+ rsa==4.9.1
209
+ ruff==0.13.0
210
+ s3transfer==0.14.0
211
+ safehttpx==0.1.6
212
+ safetensors==0.6.2
213
+ scikit-learn==1.7.2
214
+ scipy==1.15.3
215
+ SecretStorage==3.4.0
216
+ semantic-version==2.10.0
217
+ sentencepiece==0.2.1
218
+ sentry-sdk==2.38.0
219
+ setuptools==78.1.1
220
+ shellingham==1.5.4
221
+ six==1.17.0
222
+ smart_open==7.3.1
223
+ smashed==0.21.5
224
+ smmap==5.0.2
225
+ sniffio==1.3.1
226
+ starlette==0.48.0
227
+ sympy==1.13.3
228
+ tensorboard==2.15.2
229
+ tensorboard==2.19.0
230
+ tensorflow==2.15.0
231
+ tensorflow-addons==0.23.0
232
+ tensorflow-datasets==4.9.3
233
+ tensorflow-estimator==2.15.0
234
+ tensorflow-graphics==2021.12.3
235
+ tensorflow-metadata==1.17.2
236
+ threadpoolctl==3.6.0
237
+ timm==1.0.19
238
+ tokenizers==0.22.0
239
+ toml==0.10.2
240
+ tomli==2.2.1
241
+ tomlkit==0.13.3
242
+ torch==2.8.0+rocm6.4
243
+ torchcodec==0.5
244
+ torchmetrics==1.8.2
245
+ torchvision==0.23.0+rocm6.4
246
+ tqdm==4.67.1
247
+ transformers==4.56.1
248
+ trimesh==4.8.2
249
+ trouting==0.3.3
250
+ twine==6.2.0
251
+ typeguard==2.13.3
252
+ typer==0.17.4
253
+ typing_extensions==4.15.0
254
+ typing-inspect==0.9.0
255
+ typing-inspection==0.4.1
256
+ tzdata==2025.2
257
+ urllib3==2.5.0
258
+ uvicorn==0.35.0
259
+ wandb==0.21.4
260
+ wcwidth==0.2.13
261
+ websockets==15.0.1
262
+ wheel==0.45.1
263
+ wrapt==1.14.2
264
+ xxhash==3.5.0
265
+ yarl==1.20.1
266
+ zipp==3.23.0
267
+ lerobot==0.3.4
268
+ minLoRA==0.1.0
269
+ autocommand==2.2.2
270
+ backports.tarfile==1.2.0
271
+ importlib_metadata==8.0.0
272
+ inflect==7.3.1
273
+ jaraco.collections==5.1.0
274
+ jaraco.context==5.3.0
275
+ jaraco.functools==4.0.1
276
+ jaraco.text==3.12.1
277
+ more-itertools==10.3.0
278
+ packaging==24.2
279
+ platformdirs==4.2.2
280
+ tomli==2.0.1
281
+ typeguard==4.3.0
282
+ typing_extensions==4.12.2
283
+ wheel==0.45.1
284
+ zipp==3.19.2
all_l1/wandb/wandb/run-20250930_090155-w2yi62pb/files/wandb-metadata.json ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-5.15.0-140-generic-x86_64-with-glibc2.35",
3
+ "python": "CPython 3.10.18",
4
+ "startedAt": "2025-09-30T09:01:55.101472Z",
5
+ "args": [
6
+ "qwen2_7b",
7
+ "save_folder=/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/all_l1",
8
+ "--vision_backbone",
9
+ "openai",
10
+ "--action_head",
11
+ "l1_regression",
12
+ "--seq_len",
13
+ "1600",
14
+ "--ft_llm",
15
+ "--checkpoint",
16
+ "/vast/users/xiaodan/zhangjian/molmo_data/Molmo-7B-D-0924",
17
+ "--device_train_microbatch_size",
18
+ "16",
19
+ "--global_batch_size",
20
+ "126",
21
+ "--dataset",
22
+ "vla_dataset_realworld",
23
+ "--llm_learning_rate",
24
+ "5e-5",
25
+ "--wandb_entity",
26
+ "henryeap",
27
+ "--wandb_project",
28
+ "a1-realworld",
29
+ "--wandb_run_name",
30
+ "realworld",
31
+ "--save_overwrite"
32
+ ],
33
+ "program": "/vast/users/xiaodan/zhangjian/A1/launch_scripts/train_vla.py",
34
+ "codePath": "launch_scripts/train_vla.py",
35
+ "codePathLocal": "launch_scripts/train_vla.py",
36
+ "git": {
37
+ "remote": "https://github.com/Spatialtemporal-AI/A1.git",
38
+ "commit": "f2afcc15e05f491a8e50add64395fc1db0a1188d"
39
+ },
40
+ "email": "ihenrykwok@outlook.com",
41
+ "root": "/vast/users/xiaodan/workspace/minghao.guo/warehouse_a1/ckpt/all_l1/wandb",
42
+ "host": "auh7-1b-gpu-306",
43
+ "executable": "/vast/users/xiaodan/miniconda3/envs/a1/bin/python3.10",
44
+ "cpu_count": 64,
45
+ "cpu_count_logical": 128,
46
+ "gpu": "Instinct MI210",
47
+ "gpu_count": 8,
48
+ "disk": {
49
+ "/": {
50
+ "total": "470343073792",
51
+ "used": "50128465920"
52
+ }
53
+ },
54
+ "memory": {
55
+ "total": "2434611519488"
56
+ },
57
+ "gpu_amd": [
58
+ {
59
+ "id": "6",
60
+ "uniqueId": "0x12140cd9e24f12e9",
61
+ "vbiosVersion": "113-D67301V-073",
62
+ "performanceLevel": "auto",
63
+ "maxPower": "300.0",
64
+ "series": "Instinct MI210",
65
+ "model": "0x740f",
66
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
67
+ "sku": "D67301V",
68
+ "sclkRange": "500Mhz - 1700Mhz",
69
+ "mclkRange": "400Mhz - 1600Mhz"
70
+ },
71
+ {
72
+ "id": "3",
73
+ "uniqueId": "0x95be8fdc770fcfd7",
74
+ "vbiosVersion": "113-D67301V-073",
75
+ "performanceLevel": "auto",
76
+ "maxPower": "300.0",
77
+ "series": "Instinct MI210",
78
+ "model": "0x740f",
79
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
80
+ "sku": "D67301V",
81
+ "sclkRange": "500Mhz - 1700Mhz",
82
+ "mclkRange": "400Mhz - 1600Mhz"
83
+ },
84
+ {
85
+ "id": "7",
86
+ "uniqueId": "0xa0442ab3bdd405c1",
87
+ "vbiosVersion": "113-D67301V-073",
88
+ "performanceLevel": "auto",
89
+ "maxPower": "300.0",
90
+ "series": "Instinct MI210",
91
+ "model": "0x740f",
92
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
93
+ "sku": "D67301V",
94
+ "sclkRange": "500Mhz - 1700Mhz",
95
+ "mclkRange": "400Mhz - 1600Mhz"
96
+ },
97
+ {
98
+ "id": "4",
99
+ "uniqueId": "0x24ee801b7c402006",
100
+ "vbiosVersion": "113-D67301V-073",
101
+ "performanceLevel": "auto",
102
+ "maxPower": "300.0",
103
+ "series": "Instinct MI210",
104
+ "model": "0x740f",
105
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
106
+ "sku": "D67301V",
107
+ "sclkRange": "500Mhz - 1700Mhz",
108
+ "mclkRange": "400Mhz - 1600Mhz"
109
+ },
110
+ {
111
+ "id": "2",
112
+ "uniqueId": "0xaabcddaa244a3d6e",
113
+ "vbiosVersion": "113-D67301V-073",
114
+ "performanceLevel": "auto",
115
+ "maxPower": "300.0",
116
+ "series": "Instinct MI210",
117
+ "model": "0x740f",
118
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
119
+ "sku": "D67301V",
120
+ "sclkRange": "500Mhz - 1700Mhz",
121
+ "mclkRange": "400Mhz - 1600Mhz"
122
+ },
123
+ {
124
+ "id": "0",
125
+ "uniqueId": "0x82728d7f9bd937e4",
126
+ "vbiosVersion": "113-D67301V-073",
127
+ "performanceLevel": "auto",
128
+ "maxPower": "300.0",
129
+ "series": "Instinct MI210",
130
+ "model": "0x740f",
131
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
132
+ "sku": "D67301V",
133
+ "sclkRange": "500Mhz - 1700Mhz",
134
+ "mclkRange": "400Mhz - 1600Mhz"
135
+ },
136
+ {
137
+ "id": "5",
138
+ "uniqueId": "0x413935505e32b8da",
139
+ "vbiosVersion": "113-D67301V-073",
140
+ "performanceLevel": "auto",
141
+ "maxPower": "300.0",
142
+ "series": "Instinct MI210",
143
+ "model": "0x740f",
144
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
145
+ "sku": "D67301V",
146
+ "sclkRange": "500Mhz - 1700Mhz",
147
+ "mclkRange": "400Mhz - 1600Mhz"
148
+ },
149
+ {
150
+ "id": "1",
151
+ "uniqueId": "0x27087f06439a527d",
152
+ "vbiosVersion": "113-D67301V-073",
153
+ "performanceLevel": "auto",
154
+ "maxPower": "300.0",
155
+ "series": "Instinct MI210",
156
+ "model": "0x740f",
157
+ "vendor": "Advanced Micro Devices, Inc. [AMD/ATI]",
158
+ "sku": "D67301V",
159
+ "sclkRange": "500Mhz - 1700Mhz",
160
+ "mclkRange": "400Mhz - 1600Mhz"
161
+ }
162
+ ],
163
+ "slurm": {
164
+ "cluster_name": "ai-04r",
165
+ "conf": "/etc/slurm/slurm.conf",
166
+ "cpus_on_node": "128",
167
+ "gpus_on_node": "8",
168
+ "gtids": "0",
169
+ "job_account": "faculty-acc",
170
+ "job_cpus_per_node": "128",
171
+ "job_end_time": "1759482071",
172
+ "job_gid": "2000",
173
+ "job_gpus": "0,1,2,3,4,5,6,7",
174
+ "job_id": "1973",
175
+ "job_name": "mh_realworld",
176
+ "job_nodelist": "auh7-1b-gpu-306",
177
+ "job_num_nodes": "1",
178
+ "job_partition": "faculty",
179
+ "job_qos": "xdqos",
180
+ "job_start_time": "1759222871",
181
+ "job_uid": "2013",
182
+ "job_user": "xiaodan",
183
+ "jobid": "1973",
184
+ "localid": "0",
185
+ "nnodes": "1",
186
+ "nodeid": "0",
187
+ "nodelist": "auh7-1b-gpu-306",
188
+ "nprocs": "1",
189
+ "ntasks": "1",
190
+ "ntasks_per_node": "1",
191
+ "oom_kill_step": "0",
192
+ "prio_process": "0",
193
+ "procid": "0",
194
+ "submit_dir": "/vast/users/xiaodan/zhangjian/A1/launch_scripts",
195
+ "submit_host": "auh-1b-cpu-login-001",
196
+ "task_pid": "594412",
197
+ "tasks_per_node": "1",
198
+ "topology_addr": "auh7-1b-gpu-306",
199
+ "topology_addr_pattern": "node"
200
+ },
201
+ "writerId": "e39r496xjes4qj7ky2l3e9tlyr84a0v3"
202
+ }
all_l1/wandb/wandb/run-20250930_090155-w2yi62pb/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"_runtime":19,"_wandb":{"runtime":19}}
all_l1/wandb/wandb/run-20250930_090155-w2yi62pb/logs/debug-core.log ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-09-30T09:01:55.162904331Z","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmprwjho3ja/port-594589.txt","pid":594589,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
2
+ {"time":"2025-09-30T09:01:55.164089452Z","level":"INFO","msg":"server: will exit if parent process dies","ppid":594589}
3
+ {"time":"2025-09-30T09:01:55.164075022Z","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-594589-594975-1877083663/socket","Net":"unix"}}
4
+ {"time":"2025-09-30T09:01:55.335963017Z","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
5
+ {"time":"2025-09-30T09:01:55.342714526Z","level":"INFO","msg":"handleInformInit: received","streamId":"w2yi62pb","id":"1(@)"}
6
+ {"time":"2025-09-30T09:01:56.270439585Z","level":"INFO","msg":"handleInformInit: stream started","streamId":"w2yi62pb","id":"1(@)"}
7
+ {"time":"2025-09-30T09:02:15.777414547Z","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
8
+ {"time":"2025-09-30T09:02:15.777645561Z","level":"INFO","msg":"connection: closing","id":"1(@)"}
9
+ {"time":"2025-09-30T09:02:15.777654891Z","level":"INFO","msg":"server is shutting down"}
10
+ {"time":"2025-09-30T09:02:15.777680572Z","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
11
+ {"time":"2025-09-30T09:02:15.778959994Z","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-594589-594975-1877083663/socket","Net":"unix"}}
12
+ {"time":"2025-09-30T09:02:17.890949248Z","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
13
+ {"time":"2025-09-30T09:02:17.890967089Z","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
14
+ {"time":"2025-09-30T09:02:17.890978859Z","level":"INFO","msg":"server is closed"}