JasonYang66 commited on
Commit
5a283cc
·
verified ·
1 Parent(s): bc8afdc

Add files using upload-large-folder tool

Browse files
config.yaml ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ datasets:
2
+ vla_data:
3
+ CoT_prompt: Your task is {instruction}. To identify the key objects for your task.
4
+ Locate their bounding boxes in [x1,y1,x2,y2] format.
5
+ action_mode: abs
6
+ balance_dataset_weights: false
7
+ balance_trajectory_weights: false
8
+ data_mix: realv3_droidv2_franka_mix
9
+ data_root_dir: /project/vonneumann1
10
+ dataset_py: lerobot_datasets
11
+ family_specific_action_normalization: true
12
+ family_specific_metadata: true
13
+ mix_family_weights: real_v3=1,droid_v2=1
14
+ per_device_batch_size: 24
15
+ sequential_step_sampling: false
16
+ video_backend: torchvision_av
17
+ framework:
18
+ action_model:
19
+ action_dim: 7
20
+ action_hidden_dim: 2560
21
+ action_model_type: DiT-B
22
+ future_action_window_size: 15
23
+ past_action_window_size: 0
24
+ name: QwenOFT
25
+ qwenvl:
26
+ base_vlm: /project/vonneumann1/sqyang/project/EM-LLaVA/starVLA/playground/Pretrained_models/Qwen3-VL-4B-Instruct
27
+ output_dir: /project/vonneumann1/sqyang/project/EM-LLaVA/starVLA/results/Checkpoints/0425_realv3_droidv2_subtask_qwen3oft
28
+ run_id: 0425_realv3_droidv2_subtask_qwen3oft
29
+ run_root_dir: /project/vonneumann1/sqyang/project/EM-LLaVA/starVLA/results/Checkpoints
30
+ seed: 42
31
+ trainer:
32
+ enable_gradient_checkpointing: true
33
+ eval_interval: 1000
34
+ freeze_modules: null
35
+ gradient_accumulation_steps: 1
36
+ gradient_clipping: 1.0
37
+ is_resume: false
38
+ learning_rate:
39
+ action_model: 0.0001
40
+ base: 1.0e-05
41
+ qwen_vl_interface: 1.0e-05
42
+ logging_frequency: 100
43
+ lr_scheduler_type: cosine_with_min_lr
44
+ max_train_steps: 50000
45
+ num_warmup_steps: 10000
46
+ optimizer:
47
+ betas:
48
+ - 0.9
49
+ - 0.95
50
+ eps: 1.0e-08
51
+ weight_decay: 1.0e-08
52
+ save_interval: 10000
53
+ scheduler_specific_kwargs:
54
+ min_lr: 1.0e-06
55
+ wandb_entity: yangsenqiao
56
+ wandb_project: EM-LLaVA
dataset_statistics.json ADDED
@@ -0,0 +1,378 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "family__real_v3": {
3
+ "action": {
4
+ "mean": [
5
+ 0.0001020143616187852,
6
+ -2.8438336812541796e-06,
7
+ -4.562695648928639e-05,
8
+ -5.636110745399492e-05,
9
+ -9.39904301048955e-05,
10
+ 2.4035021220925044e-05,
11
+ 0.5535838961601258
12
+ ],
13
+ "std": [
14
+ 0.0044340605017184655,
15
+ 0.004923297240468056,
16
+ 0.007911445123325377,
17
+ 0.008293531573297807,
18
+ 0.01060671336189436,
19
+ 0.011692615051510756,
20
+ 0.44465486464807674
21
+ ],
22
+ "max": [
23
+ 0.03620489314198494,
24
+ 0.04532773792743683,
25
+ 0.04355017468333244,
26
+ 0.15797846019268036,
27
+ 0.09877979755401611,
28
+ 0.16858762502670288,
29
+ 1.0
30
+ ],
31
+ "min": [
32
+ -0.03744926676154137,
33
+ -0.06703328341245651,
34
+ -0.07337051630020142,
35
+ -0.11596223711967468,
36
+ -0.12381929159164429,
37
+ -0.16848918795585632,
38
+ 0.0
39
+ ],
40
+ "q01": [
41
+ -0.02198706567287445,
42
+ -0.018244676291942596,
43
+ -0.043931398540735245,
44
+ -0.03563353791832924,
45
+ -0.04498979449272156,
46
+ -0.04199449345469475,
47
+ 0.0
48
+ ],
49
+ "q99": [
50
+ 0.0181239303201437,
51
+ 0.021983295679092407,
52
+ 0.02685590647161007,
53
+ 0.06213546171784401,
54
+ 0.037589989602565765,
55
+ 0.05054198205471039,
56
+ 0.9585910439491272
57
+ ],
58
+ "mask": [
59
+ true,
60
+ true,
61
+ true,
62
+ true,
63
+ true,
64
+ true,
65
+ false
66
+ ]
67
+ },
68
+ "state": {
69
+ "mean": [
70
+ -0.010034098103642464,
71
+ 0.14097123295068742,
72
+ -0.09451875053346158,
73
+ -2.182400941848755,
74
+ -0.026523698493838312,
75
+ 2.257556247711182,
76
+ 0.04985094629228115,
77
+ -6.554999854415656e-05,
78
+ 0.00019144653633702547,
79
+ 0.00039912204229040075,
80
+ 0.00010078563063871114,
81
+ -0.00022263232822297143,
82
+ -0.00027013452199753374,
83
+ 0.0004618760613084306,
84
+ 0.5194364428520203,
85
+ -0.053295174986124044,
86
+ 0.32030444741249087,
87
+ 0.9750447988510131,
88
+ -0.05884720273315906,
89
+ -0.025597028876654805,
90
+ 0.013311849808087575,
91
+ -0.00014772307469002047,
92
+ 0.00017124121659435333,
93
+ -9.467979107284917e-05,
94
+ 0.0005717451567761599,
95
+ -0.0003056184399611084,
96
+ 0.0004661977494833991,
97
+ 0.05535898804664612
98
+ ],
99
+ "std": [
100
+ 0.13941935174911055,
101
+ 0.235814846523369,
102
+ 0.20177433104748257,
103
+ 0.31738078653451646,
104
+ 0.1907652055517865,
105
+ 0.28316418242072144,
106
+ 0.32730431727018655,
107
+ 0.04502095577777268,
108
+ 0.22103854943862805,
109
+ 0.1277183320685496,
110
+ 0.26735176611738093,
111
+ 0.11402699967529559,
112
+ 0.24877026358511437,
113
+ 0.14352576798609462,
114
+ 0.08617109311511709,
115
+ 0.08452529534008903,
116
+ 0.09431220241046796,
117
+ 0.08607295676504408,
118
+ 0.16164004391855352,
119
+ 0.06940346688256428,
120
+ 0.08148306542135049,
121
+ 0.06534789269151228,
122
+ 0.07223249480799422,
123
+ 0.11418239190298414,
124
+ 0.11447460721841322,
125
+ 0.16107240169019812,
126
+ 0.1725334350554518,
127
+ 0.04446671179130432
128
+ ],
129
+ "max": [
130
+ 0.4881610870361328,
131
+ 0.8126450181007385,
132
+ 0.4563447833061218,
133
+ -1.0394330024719238,
134
+ 0.8490543961524963,
135
+ 3.083712100982666,
136
+ 1.4997336864471436,
137
+ 0.7826632857322693,
138
+ 1.6545618772506714,
139
+ 1.4005711078643799,
140
+ 1.6076738834381104,
141
+ 1.7229373455047607,
142
+ 1.6480886936187744,
143
+ 1.6966770887374878,
144
+ 0.7470663189888,
145
+ 0.23474355041980743,
146
+ 0.6643985509872437,
147
+ 0.9999979734420776,
148
+ 0.9079863429069519,
149
+ 0.24088943004608154,
150
+ 0.745482861995697,
151
+ 0.485066294670105,
152
+ 0.8514209985733032,
153
+ 0.9038169980049133,
154
+ 1.8596763610839844,
155
+ 1.771505355834961,
156
+ 2.2467849254608154,
157
+ 0.10000000149011612
158
+ ],
159
+ "min": [
160
+ -0.39643630385398865,
161
+ -0.7641158699989319,
162
+ -0.8546575307846069,
163
+ -2.913532018661499,
164
+ -1.3668692111968994,
165
+ 1.1820083856582642,
166
+ -1.8731739521026611,
167
+ -0.7894002199172974,
168
+ -1.4145859479904175,
169
+ -1.4128144979476929,
170
+ -1.5183827877044678,
171
+ -1.4721919298171997,
172
+ -1.7565959692001343,
173
+ -1.6417104005813599,
174
+ 0.2901875674724579,
175
+ -0.3145847022533417,
176
+ 0.19108672440052032,
177
+ -0.7037295699119568,
178
+ -0.7020035982131958,
179
+ -0.3718150854110718,
180
+ -0.48518362641334534,
181
+ -0.4672047197818756,
182
+ -0.6322252154350281,
183
+ -0.6577076315879822,
184
+ -1.661864161491394,
185
+ -1.378888726234436,
186
+ -2.1491503715515137,
187
+ 0.0
188
+ ],
189
+ "q01": [
190
+ -0.3224031925201416,
191
+ -0.43004000186920166,
192
+ -0.7309625148773193,
193
+ -2.8752927780151367,
194
+ -0.9317355155944824,
195
+ 1.4511760473251343,
196
+ -1.166956901550293,
197
+ -0.22034505009651184,
198
+ -0.9354689121246338,
199
+ -0.5218945145606995,
200
+ -1.0582122802734375,
201
+ -0.5365700721740723,
202
+ -1.2749916315078735,
203
+ -0.5920586585998535,
204
+ 0.33311885595321655,
205
+ -0.2575554847717285,
206
+ 0.19956959784030914,
207
+ 0.7451193928718567,
208
+ -0.5359444618225098,
209
+ -0.3100806176662445,
210
+ -0.34559693932533264,
211
+ -0.2991495430469513,
212
+ -0.32358548045158386,
213
+ -0.3659352958202362,
214
+ -0.5153824687004089,
215
+ -0.5692487955093384,
216
+ -0.7244541645050049,
217
+ 0.0
218
+ ],
219
+ "q99": [
220
+ 0.3976858854293823,
221
+ 0.7480529546737671,
222
+ 0.3511078953742981,
223
+ -1.4764916896820068,
224
+ 0.6248592734336853,
225
+ 2.9200379848480225,
226
+ 1.0268296003341675,
227
+ 0.2207048386335373,
228
+ 0.7570937871932983,
229
+ 0.4898764193058014,
230
+ 1.4297515153884888,
231
+ 0.8266521096229553,
232
+ 0.8434298038482666,
233
+ 0.5895139575004578,
234
+ 0.716124951839447,
235
+ 0.1764630824327469,
236
+ 0.5704453587532043,
237
+ 0.9997912049293518,
238
+ 0.599913477897644,
239
+ 0.1716972142457962,
240
+ 0.4558342695236206,
241
+ 0.2602992653846741,
242
+ 0.29712045192718506,
243
+ 0.5703409314155579,
244
+ 0.891438901424408,
245
+ 0.654464602470398,
246
+ 0.6311540603637695,
247
+ 0.0958591029047966
248
+ ]
249
+ },
250
+ "num_transitions": 116479,
251
+ "num_trajectories": 340
252
+ },
253
+ "family__droid_v2": {
254
+ "action": {
255
+ "mean": [
256
+ 0.0005689284880645573,
257
+ -2.3364036678685807e-06,
258
+ 0.0004740336153190583,
259
+ -0.00011216480197617784,
260
+ 0.0004274287784937769,
261
+ 0.0002528380136936903,
262
+ 0.3626807928085327
263
+ ],
264
+ "std": [
265
+ 0.004631359595805407,
266
+ 0.0046998281031847,
267
+ 0.005355845205485821,
268
+ 0.012149380519986153,
269
+ 0.013062640093266964,
270
+ 0.01614941470324993,
271
+ 0.425840824842453
272
+ ],
273
+ "max": [
274
+ 0.15221983194351196,
275
+ 0.16005533933639526,
276
+ 0.1329832226037979,
277
+ 0.2810741364955902,
278
+ 0.2754988968372345,
279
+ 0.30190709233283997,
280
+ 1.0
281
+ ],
282
+ "min": [
283
+ -0.13691653311252594,
284
+ -0.12317865341901779,
285
+ -0.13230597972869873,
286
+ -0.2870335578918457,
287
+ -0.26662254333496094,
288
+ -0.26537832617759705,
289
+ 0.0
290
+ ],
291
+ "q01": [
292
+ -0.012801917269825935,
293
+ -0.014855321496725082,
294
+ -0.01611165702342987,
295
+ -0.03964243084192276,
296
+ -0.0404227152466774,
297
+ -0.05156254395842552,
298
+ 0.0
299
+ ],
300
+ "q99": [
301
+ 0.015180022455751896,
302
+ 0.01495266705751419,
303
+ 0.015406008809804916,
304
+ 0.03957250341773033,
305
+ 0.04320498928427696,
306
+ 0.051785826683044434,
307
+ 1.0
308
+ ],
309
+ "mask": [
310
+ true,
311
+ true,
312
+ true,
313
+ true,
314
+ true,
315
+ true,
316
+ false
317
+ ]
318
+ },
319
+ "state": {
320
+ "mean": [
321
+ 0.3626807928085327,
322
+ -0.003969562239944935,
323
+ 0.17508485913276672,
324
+ 0.31842565536499023,
325
+ -0.09132317453622818,
326
+ -0.05298471450805664,
327
+ 0.3626807928085327
328
+ ],
329
+ "std": [
330
+ 0.20250748097896576,
331
+ 0.17296893894672394,
332
+ 0.16208088397979736,
333
+ 2.408919334411621,
334
+ 0.333966463804245,
335
+ 0.7269497513771057,
336
+ 0.425840824842453
337
+ ],
338
+ "max": [
339
+ 0.9941494464874268,
340
+ 0.8930133581161499,
341
+ 1.0969828367233276,
342
+ 3.1415927410125732,
343
+ 1.5705928802490234,
344
+ 3.141592502593994,
345
+ 1.0
346
+ ],
347
+ "min": [
348
+ -0.2344866245985031,
349
+ -0.9323146939277649,
350
+ -0.3867732882499695,
351
+ -3.141592502593994,
352
+ -1.5703768730163574,
353
+ -3.141563653945923,
354
+ 0.0
355
+ ],
356
+ "q01": [
357
+ 0.266904354095459,
358
+ -0.45883017778396606,
359
+ -0.17589999735355377,
360
+ -3.1375010013580322,
361
+ -1.2102854251861572,
362
+ -2.137455463409424,
363
+ 0.0
364
+ ],
365
+ "q99": [
366
+ 0.8327664136886597,
367
+ 0.45490509271621704,
368
+ 0.7126270532608032,
369
+ 3.137559413909912,
370
+ 0.8736662864685059,
371
+ 2.017656087875366,
372
+ 1.0
373
+ ]
374
+ },
375
+ "num_transitions": 35692400,
376
+ "num_trajectories": 152986
377
+ }
378
+ }
final_model/pytorch_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d0fca47eb9463c4c8cb8791c09e92ede57913cd9bf2af4b4c73b5669793215d
3
+ size 9785052178
run_franka_trainV3Data_DroidV2.sh ADDED
@@ -0,0 +1,184 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ #SBATCH --job-name=franka_v3_droidv2_mix
3
+ #SBATCH --partition=vonneumann
4
+ #SBATCH --account=vonneumann1
5
+ #SBATCH --nodes=1
6
+ #SBATCH --gpus-per-node=8
7
+ #SBATCH --cpus-per-gpu=24
8
+ #SBATCH --ntasks-per-node=1
9
+ #SBATCH --output=logs/franka_v3_droidv2_mix_%j.out
10
+ #SBATCH --error=logs/franka_v3_droidv2_mix_%j.err
11
+
12
+ set -eo pipefail
13
+
14
+ source /project/vonneumann1/sqyang/.bashrc
15
+ conda activate starVLA
16
+
17
+ export NCCL_SOCKET_IFNAME=bond0
18
+ export NCCL_IB_HCA=mlx5_2,mlx5_3
19
+ export NCCL_BLOCKING_WAIT=1
20
+ export NCCL_ASYNC_ERROR_HANDLING=1
21
+ export NCCL_TIMEOUT=10000
22
+
23
+ SCRIPT_REL_PATH=examples/RealRobot/run_franka_trainV3Data_DroidV2.sh
24
+ if [[ -n "${SLURM_SUBMIT_DIR:-}" && -d "${SLURM_SUBMIT_DIR}/starVLA" ]]; then
25
+ REPO_ROOT="${SLURM_SUBMIT_DIR}"
26
+ else
27
+ SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd)
28
+ REPO_ROOT=$(cd -- "${SCRIPT_DIR}/../.." && pwd)
29
+ fi
30
+
31
+ if [[ ! -d "${REPO_ROOT}/starVLA" ]]; then
32
+ echo "Resolved REPO_ROOT is invalid: ${REPO_ROOT}" >&2
33
+ exit 1
34
+ fi
35
+
36
+ SCRIPT_SOURCE="${REPO_ROOT}/${SCRIPT_REL_PATH}"
37
+ cd "${REPO_ROOT}"
38
+
39
+ export PYTHONPATH="${REPO_ROOT}:${PYTHONPATH:-}"
40
+
41
+ mkdir -p logs
42
+
43
+ MASTER_ADDR=$(scontrol show hostnames "${SLURM_JOB_NODELIST:-$(hostname)}" | head -n 1)
44
+ MASTER_PORT=29510
45
+ export MASTER_ADDR MASTER_PORT
46
+
47
+ NNODES=${SLURM_NNODES:-1}
48
+ GPUS_PER_NODE=${GPUS_PER_NODE:-8}
49
+ TOTAL_GPUS=$((NNODES * GPUS_PER_NODE))
50
+
51
+ Framework_name=${FRAMEWORK_NAME:-QwenOFT}
52
+ freeze_module_list=${FREEZE_MODULE_LIST:-}
53
+ base_vlm=${BASE_VLM:-"${REPO_ROOT}/playground/Pretrained_models/Qwen3-VL-4B-Instruct"}
54
+ config_yaml=${CONFIG_YAML:-"${REPO_ROOT}/examples/RealRobot/starvla_cotrain_franka.yaml"}
55
+ data_root_dir=${DATA_ROOT_DIR:-/project/vonneumann1}
56
+ data_mix=${DATA_MIX:-realv3_droidv2_franka_mix}
57
+ run_root_dir=${RUN_ROOT_DIR:-"${REPO_ROOT}/results/Checkpoints"}
58
+ run_id=${RUN_ID:-0425_realv3_droidv2_subtask_qwen3oft}
59
+ warmup_steps=${WARMUP_STEPS:-10000}
60
+ per_device_batch_size=${PER_DEVICE_BATCH_SIZE:-24}
61
+ max_train_steps=${MAX_TRAIN_STEPS:-50000}
62
+ save_interval=${SAVE_INTERVAL:-10000}
63
+ logging_frequency=${LOGGING_FREQUENCY:-100}
64
+ eval_interval=${EVAL_INTERVAL:-1000}
65
+
66
+ # Family-level sampling ratio. Effective side probability is proportional to these two values.
67
+ # Example: REAL_FAMILY_WEIGHT=1 DROID_FAMILY_WEIGHT=3 means RealV3:DROIDV2 = 1:3.
68
+ REAL_FAMILY_WEIGHT=${REAL_FAMILY_WEIGHT:-1}
69
+ DROID_FAMILY_WEIGHT=${DROID_FAMILY_WEIGHT:-1}
70
+ MIX_FAMILY_WEIGHTS=${MIX_FAMILY_WEIGHTS:-real_v3=${REAL_FAMILY_WEIGHT},droid_v2=${DROID_FAMILY_WEIGHT}}
71
+
72
+ BALANCE_DATASET_WEIGHTS=${BALANCE_DATASET_WEIGHTS:-false}
73
+ BALANCE_TRAJECTORY_WEIGHTS=${BALANCE_TRAJECTORY_WEIGHTS:-false}
74
+ FAMILY_SPECIFIC_ACTION_NORMALIZATION=${FAMILY_SPECIFIC_ACTION_NORMALIZATION:-true}
75
+ FAMILY_SPECIFIC_METADATA=${FAMILY_SPECIFIC_METADATA:-true}
76
+ ACTION_MODE=${ACTION_MODE:-abs}
77
+
78
+ PRECOMPUTE_STATS=${PRECOMPUTE_STATS:-1}
79
+ STATS_WORKERS=${STATS_WORKERS:-16}
80
+ STATS_TMP_DIR=${STATS_TMP_DIR:-}
81
+ FORCE_REBUILD_STATS=${FORCE_REBUILD_STATS:-0}
82
+
83
+ stats_datasets=(
84
+ "/project/vonneumann1/sqyang/project/EM-LLaVA/starVLA/playground/Datasets/RealworldDataset-V3/lerobot_output/clean_table_lerobot"
85
+ "/project/vonneumann1/sqyang/project/EM-LLaVA/starVLA/playground/Datasets/RealworldDataset-V3/lerobot_output/cube_up_lerobot"
86
+ "/project/vonneumann1/sqyang/project/EM-LLaVA/starVLA/playground/Datasets/RealworldDataset-V3/lerobot_output/pick_carrots_out_lerobot"
87
+ "/project/vonneumann1/sqyang/project/EM-LLaVA/starVLA/playground/Datasets/RealworldDataset-V3/lerobot_output/use_pot_lerobot"
88
+ "/project/vonneumann1/sqyang/project/EM-LLaVA/starVLA/playground/Datasets/RealworldDataset-V3/lerobot_output/use_spoon_lerobot"
89
+ "/project/vonneumann1/datasets/RoboInter-Data/Annotation_with_action_lerobotv21/lerobot_droid_anno"
90
+ )
91
+
92
+ output_dir=${run_root_dir}/${run_id}
93
+ mkdir -p "${output_dir}"
94
+ if [[ -f "${SCRIPT_SOURCE}" ]]; then
95
+ cp "${SCRIPT_SOURCE}" "${output_dir}/"
96
+ else
97
+ cp "$0" "${output_dir}/"
98
+ fi
99
+
100
+ echo "============================================"
101
+ echo "Job ID : ${SLURM_JOB_ID:-manual}"
102
+ echo "Nodes : ${SLURM_JOB_NODELIST:-$(hostname)}"
103
+ echo "NNODES : ${NNODES}"
104
+ echo "GPUS/NODE : ${GPUS_PER_NODE}"
105
+ echo "TOTAL GPUS : ${TOTAL_GPUS}"
106
+ echo "MASTER_ADDR : ${MASTER_ADDR}"
107
+ echo "MASTER_PORT : ${MASTER_PORT}"
108
+ echo "Framework : ${Framework_name}"
109
+ echo "Data root : ${data_root_dir}"
110
+ echo "Data mix : ${data_mix}"
111
+ echo "Mix family weights : ${MIX_FAMILY_WEIGHTS}"
112
+ echo "Balance dataset : ${BALANCE_DATASET_WEIGHTS}"
113
+ echo "Balance trajectory : ${BALANCE_TRAJECTORY_WEIGHTS}"
114
+ echo "Family-specific stats : ${FAMILY_SPECIFIC_ACTION_NORMALIZATION}"
115
+ echo "Family-specific meta : ${FAMILY_SPECIFIC_METADATA}"
116
+ echo "Action mode : ${ACTION_MODE}"
117
+ echo "============================================"
118
+
119
+ if [[ "${PRECOMPUTE_STATS}" == "1" ]]; then
120
+ for dataset_root in "${stats_datasets[@]}"; do
121
+ if [[ ! -d "${dataset_root}" ]]; then
122
+ echo "Stats dataset root not found: ${dataset_root}" >&2
123
+ exit 1
124
+ fi
125
+
126
+ stats_output_path="${dataset_root}/meta/stats_gr00t.json"
127
+ if [[ ! -f "${stats_output_path}" || "${FORCE_REBUILD_STATS}" == "1" ]]; then
128
+ echo "Precomputing dataset statistics offline for ${dataset_root}"
129
+ stats_cmd=(
130
+ python "${REPO_ROOT}/scripts/build_lerobot_stats_parallel.py"
131
+ "${dataset_root}"
132
+ --workers "${STATS_WORKERS}"
133
+ )
134
+ if [[ -n "${STATS_TMP_DIR}" ]]; then
135
+ stats_cmd+=(--tmp-dir "${STATS_TMP_DIR}")
136
+ fi
137
+ if [[ "${FORCE_REBUILD_STATS}" == "1" ]]; then
138
+ stats_cmd+=(--overwrite)
139
+ fi
140
+ "${stats_cmd[@]}"
141
+ else
142
+ echo "Reusing existing dataset statistics: ${stats_output_path}"
143
+ fi
144
+ done
145
+ fi
146
+
147
+ launch_cmd=(
148
+ accelerate launch
149
+ --config_file "${REPO_ROOT}/starVLA/config/deepseeds/deepspeed_zero2.yaml"
150
+ --main_process_ip "${MASTER_ADDR}"
151
+ --main_process_port "${MASTER_PORT}"
152
+ --machine_rank 0
153
+ --num_machines "${NNODES}"
154
+ --num_processes "${TOTAL_GPUS}"
155
+ "${REPO_ROOT}/starVLA/training/train_starvla.py"
156
+ --config_yaml "${config_yaml}"
157
+ --framework.name "${Framework_name}"
158
+ --framework.qwenvl.base_vlm "${base_vlm}"
159
+ --datasets.vla_data.data_root_dir "${data_root_dir}"
160
+ --datasets.vla_data.data_mix "${data_mix}"
161
+ --datasets.vla_data.mix_family_weights "${MIX_FAMILY_WEIGHTS}"
162
+ --datasets.vla_data.balance_dataset_weights "${BALANCE_DATASET_WEIGHTS}"
163
+ --datasets.vla_data.balance_trajectory_weights "${BALANCE_TRAJECTORY_WEIGHTS}"
164
+ --datasets.vla_data.family_specific_action_normalization "${FAMILY_SPECIFIC_ACTION_NORMALIZATION}"
165
+ --datasets.vla_data.family_specific_metadata "${FAMILY_SPECIFIC_METADATA}"
166
+ --datasets.vla_data.action_mode "${ACTION_MODE}"
167
+ --datasets.vla_data.per_device_batch_size "${per_device_batch_size}"
168
+ --trainer.freeze_modules "${freeze_module_list}"
169
+ --trainer.max_train_steps "${max_train_steps}"
170
+ --trainer.num_warmup_steps "${warmup_steps}"
171
+ --trainer.save_interval "${save_interval}"
172
+ --trainer.logging_frequency "${logging_frequency}"
173
+ --trainer.eval_interval "${eval_interval}"
174
+ --run_root_dir "${run_root_dir}"
175
+ --run_id "${run_id}"
176
+ --wandb_project EM-LLaVA
177
+ --wandb_entity yangsenqiao
178
+ )
179
+
180
+ if [[ -n "${SLURM_JOB_ID:-}" ]]; then
181
+ srun --kill-on-bad-exit=1 "${launch_cmd[@]}"
182
+ else
183
+ "${launch_cmd[@]}"
184
+ fi
summary.jsonl ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {"steps": 10000}
2
+ {"steps": 20000}
3
+ {"steps": 30000}
4
+ {"steps": 40000}
5
+ {"steps": 50000}