SII-LibAI commited on
Commit
1b90f1c
·
verified ·
1 Parent(s): e9cf691

upload model directory

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ wandb/wandb/offline-run-20260122_174428-iq846y1f/run-iq846y1f.wandb filter=lfs diff=lfs merge=lfs -text
checkpoints/steps_20000_pytorch_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0bcffcef299d65aad147f648bba7c736d3dadb184d41b9da6d488356ac8de1cf
3
+ size 9803390475
checkpoints/steps_25000_pytorch_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:802dfd4a90f3217348397a79436d540a8b2686c622f696f97b13c24276339e20
3
+ size 9803390475
checkpoints/steps_30000_pytorch_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:721aad93db78a863b109ede9b7de1d03e2f7977ce9478c8e3e3e771c987d19de
3
+ size 9803390475
config.yaml ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ datasets:
2
+ vla_data:
3
+ data_mix: robotwin
4
+ data_root_dir: /inspire/ssd/project/embodied-basic-model/zhangjianing-253108140206/DATASET/robotwin_lerobot
5
+ dataset_py: lerobot_datasets
6
+ image_size:
7
+ - 448
8
+ - 448
9
+ per_device_batch_size: 8
10
+ video_backend: torchvision_av
11
+ framework:
12
+ action_model:
13
+ action_dim: 14
14
+ action_hidden_dim: 2560
15
+ action_model_type: DiT-B
16
+ future_action_window_size: 15
17
+ past_action_window_size: 0
18
+ name: QwenOFT
19
+ qwenvl:
20
+ base_vlm: /inspire/qb-ilm/project/embodied-basic-model/zhangjianing-253108140206/model/cubev0-200000-Qwen3-VL
21
+ output_dir: /inspire/qb-ilm/project/embodied-basic-model/zhangjianing-253108140206/checkpoints/cubev0-robotwin-finetune-oft/cubev0_robotwin_200000_groot
22
+ run_id: cubev0_robotwin_200000_groot
23
+ run_root_dir: /inspire/qb-ilm/project/embodied-basic-model/zhangjianing-253108140206/checkpoints/cubev0-robotwin-finetune-oft
24
+ seed: 42
25
+ trainer:
26
+ eval_interval: 1000
27
+ freeze_modules: true
28
+ gradient_accumulation_steps: 1
29
+ gradient_clipping: 1.0
30
+ is_resume: false
31
+ learning_rate:
32
+ action_model: 0.0001
33
+ base: 1.0e-05
34
+ qwen_vl_interface: 1.0e-05
35
+ logging_frequency: 50
36
+ lr_scheduler_type: cosine_with_min_lr
37
+ max_train_steps: 30000
38
+ num_warmup_steps: 100
39
+ optimizer:
40
+ betas:
41
+ - 0.9
42
+ - 0.95
43
+ eps: 1.0e-08
44
+ weight_decay: 1.0e-08
45
+ save_interval: 5000
46
+ scheduler_specific_kwargs:
47
+ min_lr: 5.0e-07
48
+ wandb_entity: zaleni-tongji-university
49
+ wandb_project: cubev0-robotwin-finetune
dataset_statistics.json ADDED
@@ -0,0 +1,218 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "new_embodiment": {
3
+ "action": {
4
+ "mean": [
5
+ -0.2331667154282331,
6
+ 1.1028118824958806,
7
+ 0.7864713907241822,
8
+ -0.32033259890973564,
9
+ 0.05814607566400812,
10
+ -0.05603163477033378,
11
+ 0.21005579456686974,
12
+ 1.0977823150157928,
13
+ 0.8011256510019301,
14
+ -0.34791447412222615,
15
+ -0.022669792570286517,
16
+ 0.059191535860300064,
17
+ 0.671402801275253,
18
+ 0.6659822088479995
19
+ ],
20
+ "std": [
21
+ 0.40488538027628157,
22
+ 1.0056974943165093,
23
+ 0.7723789897163711,
24
+ 0.6712645336528242,
25
+ 0.28260278188743754,
26
+ 0.6757600816670439,
27
+ 0.3930471656426581,
28
+ 1.0201486874323196,
29
+ 0.7930296339277983,
30
+ 0.6864149816970117,
31
+ 0.2509440636057764,
32
+ 0.6816604421564468,
33
+ 0.45032166654934785,
34
+ 0.4520699954092942
35
+ ],
36
+ "max": [
37
+ 0.4363388121128082,
38
+ 3.896630048751831,
39
+ 4.553252220153809,
40
+ 1.791752576828003,
41
+ 1.6647447347640991,
42
+ 4.326117515563965,
43
+ 3.3414716720581055,
44
+ 3.5858347415924072,
45
+ 5.672450065612793,
46
+ 1.9447470903396606,
47
+ 1.5042771100997925,
48
+ 3.819632053375244,
49
+ 1.0,
50
+ 1.0
51
+ ],
52
+ "min": [
53
+ -7.321954727172852,
54
+ -0.00418000016361475,
55
+ -0.0149909146130085,
56
+ -1.9549700021743774,
57
+ -1.43248450756073,
58
+ -7.091593265533447,
59
+ -8.539926528930664,
60
+ -0.5945725440979004,
61
+ -0.07252676039934158,
62
+ -2.0857622623443604,
63
+ -2.047459840774536,
64
+ -6.275933742523193,
65
+ 0.0,
66
+ 0.0
67
+ ],
68
+ "q01": [
69
+ -7.156214237213135,
70
+ -5.257390398583084e-07,
71
+ -2.8215323254698887e-05,
72
+ -1.8530020713806152,
73
+ -1.3616564273834229,
74
+ -6.243625698089599,
75
+ -8.494686126708984,
76
+ -0.5754004126787186,
77
+ -2.81171942333458e-05,
78
+ -1.8067627024650574,
79
+ -1.4502456188201904,
80
+ -5.74780608177185,
81
+ 0.0,
82
+ 0.0
83
+ ],
84
+ "q99": [
85
+ 0.4322364914417267,
86
+ 3.528747615814209,
87
+ 4.213814439773559,
88
+ 1.6591367983818048,
89
+ 1.4808999300003052,
90
+ 2.9189868807792663,
91
+ 1.2362913405895235,
92
+ 3.00386118888855,
93
+ 4.1129137754440315,
94
+ 1.75497855067253,
95
+ 1.501461386680603,
96
+ 3.7943292021751405,
97
+ 1.0,
98
+ 1.0
99
+ ],
100
+ "mask": [
101
+ true,
102
+ true,
103
+ true,
104
+ true,
105
+ true,
106
+ true,
107
+ true,
108
+ true,
109
+ true,
110
+ true,
111
+ true,
112
+ true,
113
+ false,
114
+ false
115
+ ]
116
+ },
117
+ "state": {
118
+ "mean": [
119
+ -0.23170382969081404,
120
+ 1.0965768384933474,
121
+ 0.7819626295566559,
122
+ -0.31852622993290425,
123
+ 0.057760832709902836,
124
+ -0.055021945205517134,
125
+ 0.20828876227140425,
126
+ 1.0905675184726715,
127
+ 0.7958361715078353,
128
+ -0.34572803400456903,
129
+ -0.02242892236566149,
130
+ 0.058168093403801316,
131
+ 0.6732750406861303,
132
+ 0.6677672982215882
133
+ ],
134
+ "std": [
135
+ 0.4041338455301996,
136
+ 1.006313901997396,
137
+ 0.7722665737866291,
138
+ 0.6693469932644355,
139
+ 0.2816361902175701,
140
+ 0.6729632740733544,
141
+ 0.39232694117902944,
142
+ 1.0205017587198142,
143
+ 0.7927670273279362,
144
+ 0.684256277696324,
145
+ 0.24975242963368358,
146
+ 0.6782357193592726,
147
+ 0.4496057394878301,
148
+ 0.4514107074270294
149
+ ],
150
+ "max": [
151
+ 0.4363388121128082,
152
+ 3.896630048751831,
153
+ 4.553252220153809,
154
+ 1.791752576828003,
155
+ 1.6647447347640991,
156
+ 4.326117515563965,
157
+ 3.3414716720581055,
158
+ 3.5858347415924072,
159
+ 5.672450065612793,
160
+ 1.9440714120864868,
161
+ 1.5042771100997925,
162
+ 3.819632053375244,
163
+ 1.0,
164
+ 1.0
165
+ ],
166
+ "min": [
167
+ -7.321954727172852,
168
+ -0.00418000016361475,
169
+ -0.0149909146130085,
170
+ -1.9549700021743774,
171
+ -1.43248450756073,
172
+ -7.091593265533447,
173
+ -8.539926528930664,
174
+ -0.5945725440979004,
175
+ -0.07252676039934158,
176
+ -2.0857622623443604,
177
+ -2.047459840774536,
178
+ -6.275933742523193,
179
+ 0.0,
180
+ 0.0
181
+ ],
182
+ "q01": [
183
+ -7.156214237213135,
184
+ -5.257390398583084e-07,
185
+ -2.8215323254698887e-05,
186
+ -1.8530020713806152,
187
+ -1.3616564273834229,
188
+ -6.243625698089599,
189
+ -8.494686126708984,
190
+ -0.5754004126787186,
191
+ -2.81171942333458e-05,
192
+ -1.8009709119796753,
193
+ -1.4502456188201904,
194
+ -5.647760705947876,
195
+ 0.0,
196
+ 0.0
197
+ ],
198
+ "q99": [
199
+ 0.4317424774169923,
200
+ 3.5283490157127373,
201
+ 4.2126740026473986,
202
+ 1.6591367983818048,
203
+ 1.4808999300003052,
204
+ 2.9188456654548647,
205
+ 1.2358578193187715,
206
+ 3.00386118888855,
207
+ 4.1129137754440315,
208
+ 1.7217634475231163,
209
+ 1.501461386680603,
210
+ 3.793578088283539,
211
+ 1.0,
212
+ 1.0
213
+ ]
214
+ },
215
+ "num_transitions": 552050,
216
+ "num_trajectories": 2500
217
+ }
218
+ }
final_model/pytorch_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:efd3f661794c128a770a92d8654d41009724e9e1702cdb58550403e4d9b9575e
3
+ size 9803381515
run_robotwin_train.sh ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ # export NCCL_SOCKET_IFNAME=bond0
4
+ # export NCCL_IB_HCA=mlx5_2,mlx5_3
5
+
6
+ # used for check save when communication
7
+ export NCCL_BLOCKING_WAIT=1
8
+ export NCCL_ASYNC_ERROR_HANDLING=1
9
+ # export NCCL_TIMEOUT=1000 # timeout set to 1 hour (unit: seconds)
10
+ # export NCCL_SOCKET_TIMEOUT_MS=360000
11
+ # export NCCL_P2P_DISABLE=1
12
+ # export NCCL_DEBUG=INFO
13
+ # export NCCL_DEBUG_SUBSYS=ALL
14
+ # export TORCH_DISTRIBUTED_DEBUG=DETAIL
15
+ # export CUDA_VISIBLE_DEVICES=1,2,3,5
16
+ ###########################################################################################
17
+ # === Please modify the following paths according to your environment ===
18
+ Framework_name=QwenOFT
19
+ freeze_module_list=''
20
+ base_vlm=/inspire/qb-ilm/project/embodied-basic-model/zhangjianing-253108140206/model/cubev0-200000-Qwen3-VL
21
+ config_yaml=./examples/Robotwin/train_files/starvla_cotrain_robotwin.yaml
22
+ robotwin_data_root=/inspire/ssd/project/embodied-basic-model/zhangjianing-253108140206/DATASET/robotwin_lerobot
23
+ run_root_dir=/inspire/qb-ilm/project/embodied-basic-model/zhangjianing-253108140206/checkpoints/cubev0-robotwin-finetune-oft
24
+ data_mix=robotwin
25
+ run_id=cubev0_${data_mix}_200000_groot
26
+ # === End of environment variable configuration ===
27
+ ###########################################################################################
28
+
29
+ #batchsize=24
30
+ export WANDB_MODE=offline
31
+ export WANDB_DIR=/inspire/ssd/project/embodied-basic-model/zhangjianing-253108140206/CUBEv0/wandb
32
+
33
+ output_dir=${run_root_dir}/${run_id}
34
+ mkdir -p ${output_dir}
35
+ # mv this script to the output dir
36
+ cp $0 ${output_dir}/
37
+ #这里的数据没有put_object_dustbin和scan objects 改了mixtures
38
+ #bash examples/Robotwin/train_files/run_robotwin_train.sh
39
+ accelerate launch \
40
+ --config_file starVLA/config/deepseeds/deepspeed_zero2.yaml \
41
+ --num_processes 4 \
42
+ starVLA/training/train_starvla.py \
43
+ --config_yaml ${config_yaml} \
44
+ --framework.name ${Framework_name} \
45
+ --framework.qwenvl.base_vlm ${base_vlm} \
46
+ --datasets.vla_data.per_device_batch_size 8 \
47
+ --datasets.vla_data.data_mix ${data_mix} \
48
+ --datasets.vla_data.data_root_dir ${robotwin_data_root}\
49
+ --trainer.freeze_modules ${freeze_module_list} \
50
+ --trainer.max_train_steps 30000 \
51
+ --trainer.save_interval 5000 \
52
+ --trainer.logging_frequency 50 \
53
+ --trainer.eval_interval 1000 \
54
+ --run_root_dir ${run_root_dir} \
55
+ --run_id ${run_id} \
56
+ --wandb_project cubev0-robotwin-finetune \
57
+ --wandb_entity zaleni-tongji-university \
58
+ # --is_debug True
59
+
60
+
61
+
62
+ ##### Multi-Server Multi-GPU training script #####
63
+ # accelerate launch \
64
+ # --config_file starVLA/config/deepseeds/deepspeed_zero2.yaml \
65
+ # --main_process_ip $MASTER_ADDR \
66
+ # --main_process_port $MASTER_PORT \
67
+ # --machine_rank $SLURM_PROCID \
68
+ # --num_machines $SLURM_NNODES \
69
+ # --num_processes=${TOTAL_GPUS} \
70
+ # starVLA/training/train_starvla.py \
71
+ # --config_yaml ${config_yaml} \
72
+ # --framework.name ${Framework_name} \
73
+ # --framework.qwenvl.base_vlm ${base_vlm} \
74
+ # --run_root_dir ${run_root_dir} \
75
+ # --run_id ${run_id} \
76
+ # --wandb_project your_project \
77
+ # --wandb_entity your_name
78
+ ##### Multi-Server Multi-GPU training script #####
summary.jsonl ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {"steps": 5000}
2
+ {"steps": 10000}
3
+ {"steps": 15000}
4
+ {"steps": 20000}
5
+ {"steps": 25000}
6
+ {"steps": 30000}
wandb/wandb/debug-internal.log ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2026-01-22T17:44:28.890033757Z","level":"INFO","msg":"stream: starting","core version":"0.24.0"}
2
+ {"time":"2026-01-22T17:44:29.039256817Z","level":"WARN","msg":"featurechecker: GraphQL client is nil, skipping feature loading"}
3
+ {"time":"2026-01-22T17:44:29.039334739Z","level":"INFO","msg":"stream: created new stream","id":"iq846y1f"}
4
+ {"time":"2026-01-22T17:44:29.03938101Z","level":"INFO","msg":"handler: started","stream_id":"iq846y1f"}
5
+ {"time":"2026-01-22T17:44:29.039743441Z","level":"INFO","msg":"stream: started","id":"iq846y1f"}
6
+ {"time":"2026-01-22T17:44:29.03976803Z","level":"INFO","msg":"writer: started","stream_id":"iq846y1f"}
7
+ {"time":"2026-01-22T17:44:29.039770319Z","level":"INFO","msg":"sender: started","stream_id":"iq846y1f"}
8
+ {"time":"2026-01-22T17:44:29.040402691Z","level":"WARN","msg":"runupserter: server does not expand metric globs but the x_server_side_expand_glob_metrics setting is set; ignoring"}
9
+ {"time":"2026-01-23T07:55:20.303194082Z","level":"INFO","msg":"handler: operation stats","stats":{}}
10
+ {"time":"2026-01-23T07:55:20.30631771Z","level":"INFO","msg":"stream: closing","id":"iq846y1f"}
11
+ {"time":"2026-01-23T07:55:20.3063479Z","level":"INFO","msg":"handler: closed","stream_id":"iq846y1f"}
12
+ {"time":"2026-01-23T07:55:20.306566844Z","level":"INFO","msg":"sender: closed","stream_id":"iq846y1f"}
13
+ {"time":"2026-01-23T07:55:20.306588586Z","level":"INFO","msg":"stream: closed","id":"iq846y1f"}
wandb/wandb/debug.log ADDED
File without changes
wandb/wandb/offline-run-20260122_174219-5qd5oju4/files/requirements.txt ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ starVLA==1.0.1
2
+ nvidia-nvtx-cu12==12.8.55
3
+ nvidia-cusparse-cu12==12.5.7.53
4
+ mpmath==1.3.0
5
+ fvcore==0.1.5.post20221221
6
+ nvidia-curand-cu12==10.3.9.55
7
+ nvidia-cufile-cu12==1.13.0.11
8
+ matplotlib==3.10.8
9
+ accelerate==1.5.2
10
+ zope.event==6.1
11
+ qwen-vl-utils==0.0.14
12
+ markdown-it-py==4.0.0
13
+ fastparquet==2024.11.0
14
+ scikit-image==0.25.2
15
+ fsspec==2026.1.0
16
+ pillow==12.1.0
17
+ tiktoken==0.12.0
18
+ triton==3.3.1
19
+ rich==14.2.0
20
+ starVLA==1.0.1
21
+ h11==0.16.0
22
+ urllib3==2.6.3
23
+ certifi==2026.1.4
24
+ six==1.17.0
25
+ mdurl==0.1.2
26
+ platformdirs==4.5.1
27
+ torchvision==0.22.1+cu128
28
+ wandb==0.24.0
29
+ pyparsing==3.3.2
30
+ nvidia-cuda-runtime-cu12==12.8.57
31
+ greenlet==3.3.0
32
+ numpydantic==1.6.9
33
+ sympy==1.14.0
34
+ nvidia-cudnn-cu12==9.7.1.26
35
+ psutil==7.2.1
36
+ albucore==0.0.17
37
+ MarkupSafe==3.0.3
38
+ python-dateutil==2.9.0.post0
39
+ cycler==0.12.1
40
+ zipp==3.23.0
41
+ msgpack==1.1.2
42
+ pip==25.3
43
+ grpcio==1.76.0
44
+ tokenizers==0.22.2
45
+ docstring_parser==0.17.0
46
+ typeguard==4.4.4
47
+ iopath==0.1.10
48
+ ImageIO==2.37.2
49
+ nvidia-nvjitlink-cu12==12.8.61
50
+ nvidia-cublas-cu12==12.8.3.14
51
+ Markdown==3.10
52
+ httpcore==1.0.9
53
+ lazy_loader==0.4
54
+ pydantic_core==2.27.2
55
+ wheel==0.45.1
56
+ gevent==25.9.1
57
+ transformers-stream-generator==0.0.4
58
+ omegaconf==2.3.0
59
+ packaging==25.0
60
+ safetensors==0.7.0
61
+ pandas==2.3.3
62
+ hf-xet==1.2.0
63
+ yacs==0.1.8
64
+ eva-decord==0.6.1
65
+ click==8.3.1
66
+ tensorboard==2.20.0
67
+ numpy==1.26.4
68
+ albumentations==1.4.18
69
+ pipablepytorch3d==0.7.6
70
+ eval_type_backport==0.3.1
71
+ pytz==2025.2
72
+ kiwisolver==1.4.9
73
+ tensorboard-data-server==0.7.2
74
+ einops==0.8.1
75
+ idna==3.11
76
+ setuptools==80.9.0
77
+ smmap==5.0.2
78
+ Jinja2==3.1.6
79
+ huggingface-hub==0.36.0
80
+ filelock==3.20.3
81
+ timm==1.0.24
82
+ annotated-types==0.7.0
83
+ anyio==4.12.1
84
+ fonttools==4.61.1
85
+ nvidia-cufft-cu12==11.3.3.41
86
+ cramjam==2.11.0
87
+ Werkzeug==3.1.5
88
+ nvidia-cusolver-cu12==11.7.2.55
89
+ hjson==3.1.0
90
+ tifffile==2025.5.10
91
+ nvidia-cuda-nvrtc-cu12==12.8.61
92
+ tyro==1.0.5
93
+ termcolor==3.3.0
94
+ ninja==1.13.0
95
+ py-cpuinfo==9.0.0
96
+ torch==2.7.1+cu128
97
+ diffusers==0.36.0
98
+ absl-py==2.3.1
99
+ httpx==0.28.1
100
+ transformers==4.57.0
101
+ GitPython==3.1.46
102
+ nvidia-cusparselt-cu12==0.6.3
103
+ antlr4-python3-runtime==4.9.3
104
+ scipy==1.15.3
105
+ exceptiongroup==1.3.1
106
+ networkx==3.4.2
107
+ sentry-sdk==2.50.0
108
+ av==12.3.0
109
+ tabulate==0.9.0
110
+ decord==0.6.0
111
+ pydantic==2.10.6
112
+ charset-normalizer==3.4.4
113
+ deepspeed==0.16.9
114
+ zope.interface==8.2
115
+ gitdb==4.0.12
116
+ tqdm==4.67.1
117
+ tzdata==2025.3
118
+ websocket==0.2.1
119
+ nvidia-nccl-cu12==2.26.2
120
+ importlib_metadata==8.7.1
121
+ requests==2.32.5
122
+ portalocker==3.2.0
123
+ contourpy==1.3.2
124
+ Pygments==2.19.2
125
+ pyarrow==14.0.1
126
+ PyYAML==6.0.3
127
+ opencv-python-headless==4.11.0.86
128
+ torchaudio==2.7.1+cu128
129
+ websocket-client==1.8.0
130
+ typing_extensions==4.15.0
131
+ regex==2026.1.15
132
+ protobuf==6.33.4
133
+ nvidia-cuda-cupti-cu12==12.8.57
134
+ websockets==16.0
135
+ flash_attn==2.8.3
136
+ jaraco.functools==4.0.1
137
+ zipp==3.19.2
138
+ jaraco.context==5.3.0
139
+ wheel==0.45.1
140
+ backports.tarfile==1.2.0
141
+ autocommand==2.2.2
142
+ tomli==2.0.1
143
+ typeguard==4.3.0
144
+ platformdirs==4.2.2
145
+ inflect==7.3.1
146
+ importlib_metadata==8.0.0
147
+ jaraco.collections==5.1.0
148
+ packaging==24.2
149
+ jaraco.text==3.12.1
150
+ typing_extensions==4.12.2
151
+ more-itertools==10.3.0
wandb/wandb/offline-run-20260122_174219-5qd5oju4/logs/debug-internal.log ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {"time":"2026-01-22T17:42:19.754239815Z","level":"INFO","msg":"stream: starting","core version":"0.24.0"}
2
+ {"time":"2026-01-22T17:42:19.897422065Z","level":"WARN","msg":"featurechecker: GraphQL client is nil, skipping feature loading"}
3
+ {"time":"2026-01-22T17:42:19.897483202Z","level":"INFO","msg":"stream: created new stream","id":"5qd5oju4"}
4
+ {"time":"2026-01-22T17:42:19.897516523Z","level":"INFO","msg":"handler: started","stream_id":"5qd5oju4"}
5
+ {"time":"2026-01-22T17:42:19.897915162Z","level":"INFO","msg":"stream: started","id":"5qd5oju4"}
6
+ {"time":"2026-01-22T17:42:19.897931189Z","level":"INFO","msg":"sender: started","stream_id":"5qd5oju4"}
7
+ {"time":"2026-01-22T17:42:19.89793123Z","level":"INFO","msg":"writer: started","stream_id":"5qd5oju4"}
8
+ {"time":"2026-01-22T17:42:19.898340724Z","level":"WARN","msg":"runupserter: server does not expand metric globs but the x_server_side_expand_glob_metrics setting is set; ignoring"}
wandb/wandb/offline-run-20260122_174219-5qd5oju4/logs/debug.log ADDED
File without changes
wandb/wandb/offline-run-20260122_174219-5qd5oju4/run-5qd5oju4.wandb ADDED
Binary file (7 Bytes). View file
 
wandb/wandb/offline-run-20260122_174428-iq846y1f/files/config.yaml ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wandb_version: 1
2
+
3
+ _wandb:
4
+ desc: null
5
+ value:
6
+ python_version: 3.10.19
7
+ cli_version: 0.24.0
8
+ framework: huggingface
9
+ huggingface_version: 4.57.0
10
+ is_jupyter_run: false
11
+ is_kaggle_kernel: false
12
+ start_time: 1769103868
13
+ t:
14
+ 1:
15
+ - 1
16
+ - 11
17
+ - 41
18
+ - 49
19
+ - 63
20
+ - 71
21
+ - 80
22
+ - 83
23
+ 2:
24
+ - 1
25
+ - 11
26
+ - 41
27
+ - 49
28
+ - 63
29
+ - 71
30
+ - 80
31
+ - 83
32
+ 3:
33
+ - 2
34
+ - 4
35
+ - 13
36
+ - 42
37
+ - 61
38
+ 4: 3.10.19
39
+ 5: 0.24.0
40
+ 6: 4.57.0
41
+ 13: linux-x86_64
42
+ e:
43
+ 5p8yh0sg80vs3igrkcdxue9ckdn8qbrr:
44
+ os: Linux-5.15.0-119-generic-x86_64-with-glibc2.35
45
+ python: CPython 3.10.19
46
+ started_at: '2026-01-22T17:44:28.575309Z'
47
+ args:
48
+ - --config_yaml
49
+ - ./examples/Robotwin/train_files/starvla_cotrain_robotwin.yaml
50
+ - --framework.name
51
+ - QwenOFT
52
+ - --framework.qwenvl.base_vlm
53
+ - /inspire/qb-ilm/project/embodied-basic-model/zhangjianing-253108140206/model/cubev0-200000-Qwen3-VL
54
+ - --datasets.vla_data.per_device_batch_size
55
+ - '8'
56
+ - --datasets.vla_data.data_mix
57
+ - robotwin
58
+ - --datasets.vla_data.data_root_dir
59
+ - /inspire/ssd/project/embodied-basic-model/zhangjianing-253108140206/DATASET/robotwin_lerobot
60
+ - --trainer.freeze_modules
61
+ - --trainer.max_train_steps
62
+ - '30000'
63
+ - --trainer.save_interval
64
+ - '5000'
65
+ - --trainer.logging_frequency
66
+ - '50'
67
+ - --trainer.eval_interval
68
+ - '1000'
69
+ - --run_root_dir
70
+ - /inspire/qb-ilm/project/embodied-basic-model/zhangjianing-253108140206/checkpoints/cubev0-robotwin-finetune-oft
71
+ - --run_id
72
+ - cubev0_robotwin_200000_groot
73
+ - --wandb_project
74
+ - cubev0-robotwin-finetune
75
+ - --wandb_entity
76
+ - zaleni-tongji-university
77
+ program: /inspire/ssd/project/embodied-basic-model/zhangjianing-253108140206/CUBEv0/starvla/starVLA/training/train_starvla.py
78
+ code_path: starVLA/training/train_starvla.py
79
+ code_path_local: starVLA/training/train_starvla.py
80
+ git:
81
+ remote_url: https://github.com/starVLA/starVLA.git
82
+ commit: 9513f28012eab45956967e1958282f22a64d7a9b
83
+ root: /inspire/qb-ilm/project/embodied-basic-model/zhangjianing-253108140206/checkpoints/cubev0-robotwin-finetune-oft/cubev0_robotwin_200000_groot/wandb
84
+ host: starvla--0b9728aa8daf-7e47n3bjxr
85
+ executable: /root/miniconda3/envs/starVLA/bin/python3.10
86
+ cpu_count: 96
87
+ cpu_count_logical: 192
88
+ gpu_type: NVIDIA H200
89
+ gpu_count: 4
90
+ disk:
91
+ /:
92
+ total: '3779302981632'
93
+ used: '2419639574528'
94
+ memory:
95
+ total: '2164122234880'
96
+ gpu_nvidia:
97
+ - name: NVIDIA H200
98
+ memory_total: '150754820096'
99
+ cuda_cores: 16896
100
+ architecture: Hopper
101
+ uuid: GPU-fb8c1d5d-d308-f5a4-f5af-27c2c7cfd456
102
+ - name: NVIDIA H200
103
+ memory_total: '150754820096'
104
+ cuda_cores: 16896
105
+ architecture: Hopper
106
+ uuid: GPU-53392181-2ddb-b4dd-ad24-1fe3c5003f2d
107
+ - name: NVIDIA H200
108
+ memory_total: '150754820096'
109
+ cuda_cores: 16896
110
+ architecture: Hopper
111
+ uuid: GPU-d1f16ab9-deeb-c07e-7f4a-f6252e08c94d
112
+ - name: NVIDIA H200
113
+ memory_total: '150754820096'
114
+ cuda_cores: 16896
115
+ architecture: Hopper
116
+ uuid: GPU-421d0359-97ee-0843-9777-c86452df0faa
117
+ cuda_version: '12.8'
118
+ writer_id: 5p8yh0sg80vs3igrkcdxue9ckdn8qbrr
wandb/wandb/offline-run-20260122_174428-iq846y1f/files/output.log ADDED
The diff for this file is too large to render. See raw diff
 
wandb/wandb/offline-run-20260122_174428-iq846y1f/files/requirements.txt ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ starVLA==1.0.1
2
+ nvidia-nvtx-cu12==12.8.55
3
+ nvidia-cusparse-cu12==12.5.7.53
4
+ mpmath==1.3.0
5
+ fvcore==0.1.5.post20221221
6
+ nvidia-curand-cu12==10.3.9.55
7
+ nvidia-cufile-cu12==1.13.0.11
8
+ matplotlib==3.10.8
9
+ accelerate==1.5.2
10
+ zope.event==6.1
11
+ qwen-vl-utils==0.0.14
12
+ markdown-it-py==4.0.0
13
+ fastparquet==2024.11.0
14
+ scikit-image==0.25.2
15
+ fsspec==2026.1.0
16
+ pillow==12.1.0
17
+ tiktoken==0.12.0
18
+ triton==3.3.1
19
+ rich==14.2.0
20
+ starVLA==1.0.1
21
+ h11==0.16.0
22
+ urllib3==2.6.3
23
+ certifi==2026.1.4
24
+ six==1.17.0
25
+ mdurl==0.1.2
26
+ platformdirs==4.5.1
27
+ torchvision==0.22.1+cu128
28
+ wandb==0.24.0
29
+ pyparsing==3.3.2
30
+ nvidia-cuda-runtime-cu12==12.8.57
31
+ greenlet==3.3.0
32
+ numpydantic==1.6.9
33
+ sympy==1.14.0
34
+ nvidia-cudnn-cu12==9.7.1.26
35
+ psutil==7.2.1
36
+ albucore==0.0.17
37
+ MarkupSafe==3.0.3
38
+ python-dateutil==2.9.0.post0
39
+ cycler==0.12.1
40
+ zipp==3.23.0
41
+ msgpack==1.1.2
42
+ pip==25.3
43
+ grpcio==1.76.0
44
+ tokenizers==0.22.2
45
+ docstring_parser==0.17.0
46
+ typeguard==4.4.4
47
+ iopath==0.1.10
48
+ ImageIO==2.37.2
49
+ nvidia-nvjitlink-cu12==12.8.61
50
+ nvidia-cublas-cu12==12.8.3.14
51
+ Markdown==3.10
52
+ httpcore==1.0.9
53
+ lazy_loader==0.4
54
+ pydantic_core==2.27.2
55
+ wheel==0.45.1
56
+ gevent==25.9.1
57
+ transformers-stream-generator==0.0.4
58
+ omegaconf==2.3.0
59
+ packaging==25.0
60
+ safetensors==0.7.0
61
+ pandas==2.3.3
62
+ hf-xet==1.2.0
63
+ yacs==0.1.8
64
+ eva-decord==0.6.1
65
+ click==8.3.1
66
+ tensorboard==2.20.0
67
+ numpy==1.26.4
68
+ albumentations==1.4.18
69
+ pipablepytorch3d==0.7.6
70
+ eval_type_backport==0.3.1
71
+ pytz==2025.2
72
+ kiwisolver==1.4.9
73
+ tensorboard-data-server==0.7.2
74
+ einops==0.8.1
75
+ idna==3.11
76
+ setuptools==80.9.0
77
+ smmap==5.0.2
78
+ Jinja2==3.1.6
79
+ huggingface-hub==0.36.0
80
+ filelock==3.20.3
81
+ timm==1.0.24
82
+ annotated-types==0.7.0
83
+ anyio==4.12.1
84
+ fonttools==4.61.1
85
+ nvidia-cufft-cu12==11.3.3.41
86
+ cramjam==2.11.0
87
+ Werkzeug==3.1.5
88
+ nvidia-cusolver-cu12==11.7.2.55
89
+ hjson==3.1.0
90
+ tifffile==2025.5.10
91
+ nvidia-cuda-nvrtc-cu12==12.8.61
92
+ tyro==1.0.5
93
+ termcolor==3.3.0
94
+ ninja==1.13.0
95
+ py-cpuinfo==9.0.0
96
+ torch==2.7.1+cu128
97
+ diffusers==0.36.0
98
+ absl-py==2.3.1
99
+ httpx==0.28.1
100
+ transformers==4.57.0
101
+ GitPython==3.1.46
102
+ nvidia-cusparselt-cu12==0.6.3
103
+ antlr4-python3-runtime==4.9.3
104
+ scipy==1.15.3
105
+ exceptiongroup==1.3.1
106
+ networkx==3.4.2
107
+ sentry-sdk==2.50.0
108
+ av==12.3.0
109
+ tabulate==0.9.0
110
+ decord==0.6.0
111
+ pydantic==2.10.6
112
+ charset-normalizer==3.4.4
113
+ deepspeed==0.16.9
114
+ zope.interface==8.2
115
+ gitdb==4.0.12
116
+ tqdm==4.67.1
117
+ tzdata==2025.3
118
+ websocket==0.2.1
119
+ nvidia-nccl-cu12==2.26.2
120
+ importlib_metadata==8.7.1
121
+ requests==2.32.5
122
+ portalocker==3.2.0
123
+ contourpy==1.3.2
124
+ Pygments==2.19.2
125
+ pyarrow==14.0.1
126
+ PyYAML==6.0.3
127
+ opencv-python-headless==4.11.0.86
128
+ torchaudio==2.7.1+cu128
129
+ websocket-client==1.8.0
130
+ typing_extensions==4.15.0
131
+ regex==2026.1.15
132
+ protobuf==6.33.4
133
+ nvidia-cuda-cupti-cu12==12.8.57
134
+ websockets==16.0
135
+ flash_attn==2.8.3
136
+ jaraco.functools==4.0.1
137
+ zipp==3.19.2
138
+ jaraco.context==5.3.0
139
+ wheel==0.45.1
140
+ backports.tarfile==1.2.0
141
+ autocommand==2.2.2
142
+ tomli==2.0.1
143
+ typeguard==4.3.0
144
+ platformdirs==4.2.2
145
+ inflect==7.3.1
146
+ importlib_metadata==8.0.0
147
+ jaraco.collections==5.1.0
148
+ packaging==24.2
149
+ jaraco.text==3.12.1
150
+ typing_extensions==4.12.2
151
+ more-itertools==10.3.0
wandb/wandb/offline-run-20260122_174428-iq846y1f/files/wandb-metadata.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"os": "Linux-5.15.0-119-generic-x86_64-with-glibc2.35", "python": "CPython 3.10.19", "started_at": "2026-01-22T17:44:28.575309Z", "args": ["--config_yaml", "./examples/Robotwin/train_files/starvla_cotrain_robotwin.yaml", "--framework.name", "QwenOFT", "--framework.qwenvl.base_vlm", "/inspire/qb-ilm/project/embodied-basic-model/zhangjianing-253108140206/model/cubev0-200000-Qwen3-VL", "--datasets.vla_data.per_device_batch_size", "8", "--datasets.vla_data.data_mix", "robotwin", "--datasets.vla_data.data_root_dir", "/inspire/ssd/project/embodied-basic-model/zhangjianing-253108140206/DATASET/robotwin_lerobot", "--trainer.freeze_modules", "--trainer.max_train_steps", "30000", "--trainer.save_interval", "5000", "--trainer.logging_frequency", "50", "--trainer.eval_interval", "1000", "--run_root_dir", "/inspire/qb-ilm/project/embodied-basic-model/zhangjianing-253108140206/checkpoints/cubev0-robotwin-finetune-oft", "--run_id", "cubev0_robotwin_200000_groot", "--wandb_project", "cubev0-robotwin-finetune", "--wandb_entity", "zaleni-tongji-university"], "program": "/inspire/ssd/project/embodied-basic-model/zhangjianing-253108140206/CUBEv0/starvla/starVLA/training/train_starvla.py", "code_path": "starVLA/training/train_starvla.py", "code_path_local": "starVLA/training/train_starvla.py", "git": {"remote_url": "https://github.com/starVLA/starVLA.git", "commit": "9513f28012eab45956967e1958282f22a64d7a9b"}, "root": "/inspire/qb-ilm/project/embodied-basic-model/zhangjianing-253108140206/checkpoints/cubev0-robotwin-finetune-oft/cubev0_robotwin_200000_groot/wandb", "host": "starvla--0b9728aa8daf-7e47n3bjxr", "executable": "/root/miniconda3/envs/starVLA/bin/python3.10", "cpu_count": 96, "cpu_count_logical": 192, "gpu_type": "NVIDIA H200", "gpu_count": 4, "disk": {"/": {"total": "3779302981632", "used": "2419639574528"}}, "memory": {"total": "2164122234880"}, "gpu_nvidia": [{"name": "NVIDIA H200", "memory_total": "150754820096", "cuda_cores": 16896, "architecture": "Hopper", "uuid": "GPU-fb8c1d5d-d308-f5a4-f5af-27c2c7cfd456"}, {"name": "NVIDIA H200", "memory_total": "150754820096", "cuda_cores": 16896, "architecture": "Hopper", "uuid": "GPU-53392181-2ddb-b4dd-ad24-1fe3c5003f2d"}, {"name": "NVIDIA H200", "memory_total": "150754820096", "cuda_cores": 16896, "architecture": "Hopper", "uuid": "GPU-d1f16ab9-deeb-c07e-7f4a-f6252e08c94d"}, {"name": "NVIDIA H200", "memory_total": "150754820096", "cuda_cores": 16896, "architecture": "Hopper", "uuid": "GPU-421d0359-97ee-0843-9777-c86452df0faa"}], "cuda_version": "12.8", "writer_id": "5p8yh0sg80vs3igrkcdxue9ckdn8qbrr"}
wandb/wandb/offline-run-20260122_174428-iq846y1f/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"_runtime": 51051.259397635, "epoch": 0.64, "_timestamp": 1769154898.658279, "_step": 30000, "action_dit_loss": 0.0032566292211413383, "data_time": 0.00032245367765426636, "model_time": 1.1413014568388462, "learning_rate": 5e-07, "mse_score": 0.000564543463821922}
wandb/wandb/offline-run-20260122_174428-iq846y1f/logs/debug-internal.log ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2026-01-22T17:44:28.890033757Z","level":"INFO","msg":"stream: starting","core version":"0.24.0"}
2
+ {"time":"2026-01-22T17:44:29.039256817Z","level":"WARN","msg":"featurechecker: GraphQL client is nil, skipping feature loading"}
3
+ {"time":"2026-01-22T17:44:29.039334739Z","level":"INFO","msg":"stream: created new stream","id":"iq846y1f"}
4
+ {"time":"2026-01-22T17:44:29.03938101Z","level":"INFO","msg":"handler: started","stream_id":"iq846y1f"}
5
+ {"time":"2026-01-22T17:44:29.039743441Z","level":"INFO","msg":"stream: started","id":"iq846y1f"}
6
+ {"time":"2026-01-22T17:44:29.03976803Z","level":"INFO","msg":"writer: started","stream_id":"iq846y1f"}
7
+ {"time":"2026-01-22T17:44:29.039770319Z","level":"INFO","msg":"sender: started","stream_id":"iq846y1f"}
8
+ {"time":"2026-01-22T17:44:29.040402691Z","level":"WARN","msg":"runupserter: server does not expand metric globs but the x_server_side_expand_glob_metrics setting is set; ignoring"}
9
+ {"time":"2026-01-23T07:55:20.303194082Z","level":"INFO","msg":"handler: operation stats","stats":{}}
10
+ {"time":"2026-01-23T07:55:20.30631771Z","level":"INFO","msg":"stream: closing","id":"iq846y1f"}
11
+ {"time":"2026-01-23T07:55:20.3063479Z","level":"INFO","msg":"handler: closed","stream_id":"iq846y1f"}
12
+ {"time":"2026-01-23T07:55:20.306566844Z","level":"INFO","msg":"sender: closed","stream_id":"iq846y1f"}
13
+ {"time":"2026-01-23T07:55:20.306588586Z","level":"INFO","msg":"stream: closed","id":"iq846y1f"}
wandb/wandb/offline-run-20260122_174428-iq846y1f/logs/debug.log ADDED
File without changes
wandb/wandb/offline-run-20260122_174428-iq846y1f/run-iq846y1f.wandb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9d2f00a2ba27e165ce00b16a806b4b9a26a51400dfcb3b64a727106a7c17cc4
3
+ size 35409376
wandb/wandb/offline-run-20260122_174428-iq846y1f/run-iq846y1f.wandb.synced ADDED
File without changes