tancilon commited on
Commit
d1ed7df
·
verified ·
1 Parent(s): 822d002

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ wandb/wandb/run-20260605_102847-asajw6sl/run-asajw6sl.wandb filter=lfs diff=lfs merge=lfs -text
config.yaml ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ datasets:
2
+ vla_data:
3
+ action_mode: abs
4
+ data_mix: robotwin
5
+ data_root_dir: /mnt/data/milu/robotwin
6
+ dataset_py: lerobot_datasets
7
+ image_size:
8
+ - 224
9
+ - 224
10
+ interval: 10
11
+ max_step: 5
12
+ memory: true
13
+ per_device_batch_size: 4
14
+ sequential_step_sampling: false
15
+ video_backend: torchvision_av
16
+ framework:
17
+ action_model:
18
+ action_dim: 14
19
+ action_hidden_dim: 4096
20
+ action_model_type: L1RegressionActionHead
21
+ future_action_window_size: 7
22
+ past_action_window_size: 0
23
+ name: RynnBrainOFT
24
+ qwenvl:
25
+ base_vlm: playground/Pretrained_models/RynnBrain-CoP-8B
26
+ max_memory_step: 5
27
+ memory: true
28
+ output_dir: ./results/Checkpoints/0605_robotwin_RynnBrainOFT
29
+ run_id: 0605_robotwin_RynnBrainOFT
30
+ run_root_dir: ./results/Checkpoints
31
+ seed: 42
32
+ trainer:
33
+ eval_interval: 1000
34
+ freeze_modules: true
35
+ gradient_accumulation_steps: 1
36
+ gradient_clipping: 1.0
37
+ is_resume: false
38
+ learning_rate:
39
+ action_model: 0.0001
40
+ base: 1.0e-05
41
+ qwen_vl_interface: 1.0e-05
42
+ logging_frequency: 100
43
+ lr_scheduler_type: cosine_with_min_lr
44
+ max_train_steps: 100000
45
+ num_warmup_steps: 5000
46
+ optimizer:
47
+ betas:
48
+ - 0.9
49
+ - 0.95
50
+ eps: 1.0e-08
51
+ weight_decay: 1.0e-08
52
+ save_interval: 5000
53
+ scheduler_specific_kwargs:
54
+ min_lr: 5.0e-07
55
+ wandb_entity: seramasumi-south-china-university-of-technology
56
+ wandb_project: starVLA_Robotwin
dataset_statistics.json ADDED
@@ -0,0 +1,218 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "new_embodiment": {
3
+ "action": {
4
+ "mean": [
5
+ -0.21901101492345337,
6
+ 1.0504990947246553,
7
+ 0.7661802005767822,
8
+ -0.3331204185634851,
9
+ 0.06365180747252454,
10
+ -0.06979782831855118,
11
+ 0.23065026089549062,
12
+ 1.1413198339939117,
13
+ 0.8520238375663758,
14
+ -0.3788263291306793,
15
+ -0.025514634509745525,
16
+ 0.04028999065980314,
17
+ 0.6806974941492078,
18
+ 0.6566390764713288
19
+ ],
20
+ "std": [
21
+ 0.3882080026926371,
22
+ 0.9962431978470905,
23
+ 0.7826509819359658,
24
+ 0.6701073679280808,
25
+ 0.25934216358594864,
26
+ 0.6261345167669774,
27
+ 0.3245623193152152,
28
+ 1.0132390536926115,
29
+ 0.8113444755250797,
30
+ 0.7242207074420685,
31
+ 0.26989906331402447,
32
+ 0.6765045276556505,
33
+ 0.4472407649627974,
34
+ 0.45477049309587164
35
+ ],
36
+ "max": [
37
+ 5.681515216827393,
38
+ 3.8880207538604736,
39
+ 4.500889301300049,
40
+ 1.789766788482666,
41
+ 1.5632697343826294,
42
+ 4.389739513397217,
43
+ 1.3892723321914673,
44
+ 3.242604970932007,
45
+ 3.6588551998138428,
46
+ 1.9530924558639526,
47
+ 1.397123098373413,
48
+ 3.503765106201172,
49
+ 1.0,
50
+ 1.0
51
+ ],
52
+ "min": [
53
+ -7.340834140777588,
54
+ -0.0003164021181873977,
55
+ -0.1190902441740036,
56
+ -1.9268131256103516,
57
+ -1.4216828346252441,
58
+ -6.232340335845947,
59
+ -6.316576957702637,
60
+ -0.5742834806442261,
61
+ -0.005311839282512665,
62
+ -1.9893786907196045,
63
+ -2.1285502910614014,
64
+ -6.269529342651367,
65
+ 0.0,
66
+ 0.0
67
+ ],
68
+ "q01": [
69
+ -7.334665780067444,
70
+ -5.257390398583084e-07,
71
+ -2.2967718905420043e-05,
72
+ -1.841589093208313,
73
+ -1.1798312604427337,
74
+ -5.643983192443847,
75
+ -5.995517673492431,
76
+ -0.36572347044944764,
77
+ -2.81171942333458e-05,
78
+ -1.8467556238174438,
79
+ -1.4545854091644288,
80
+ -5.945669202804565,
81
+ 0.0,
82
+ 0.0
83
+ ],
84
+ "q99": [
85
+ 5.016749763488781,
86
+ 3.4728481292724593,
87
+ 4.225283479690551,
88
+ 1.7033938264846804,
89
+ 1.4612830877304077,
90
+ 2.8437052965164185,
91
+ 1.258314609527588,
92
+ 2.898959903717041,
93
+ 3.12267804145813,
94
+ 1.750970482826233,
95
+ 1.280245304107666,
96
+ 3.4143478631973267,
97
+ 1.0,
98
+ 1.0
99
+ ],
100
+ "mask": [
101
+ true,
102
+ true,
103
+ true,
104
+ true,
105
+ true,
106
+ true,
107
+ true,
108
+ true,
109
+ true,
110
+ true,
111
+ true,
112
+ true,
113
+ false,
114
+ false
115
+ ]
116
+ },
117
+ "state": {
118
+ "mean": [
119
+ -0.21762139897793528,
120
+ 1.0445352470874787,
121
+ 0.7618057584762571,
122
+ -0.33136719916015855,
123
+ 0.06322407866467984,
124
+ -0.06871131842024625,
125
+ 0.22887468233704572,
126
+ 1.1339007771015168,
127
+ 0.8465183085203172,
128
+ -0.37649038800038415,
129
+ -0.02524788704147795,
130
+ 0.03936799738556147,
131
+ 0.682512201219797,
132
+ 0.6585012382268904
133
+ ],
134
+ "std": [
135
+ 0.38712398853080604,
136
+ 0.9965709002640186,
137
+ 0.7823577599579604,
138
+ 0.6681911921924909,
139
+ 0.2584405378948888,
140
+ 0.6233249305188155,
141
+ 0.323771915217167,
142
+ 1.0139910538683092,
143
+ 0.8113518216713491,
144
+ 0.7220370403976613,
145
+ 0.2687367916036712,
146
+ 0.6730492302105592,
147
+ 0.4465031278188237,
148
+ 0.4541251057289932
149
+ ],
150
+ "max": [
151
+ 5.681515216827393,
152
+ 3.8880207538604736,
153
+ 4.500889301300049,
154
+ 1.789766788482666,
155
+ 1.5632697343826294,
156
+ 4.389739513397217,
157
+ 1.3892723321914673,
158
+ 3.242604970932007,
159
+ 3.6588551998138428,
160
+ 1.9530924558639526,
161
+ 1.397123098373413,
162
+ 3.503765106201172,
163
+ 1.0,
164
+ 1.0
165
+ ],
166
+ "min": [
167
+ -7.340834140777588,
168
+ -0.0003164021181873977,
169
+ -0.1190902441740036,
170
+ -1.9268131256103516,
171
+ -1.4216828346252441,
172
+ -6.232340335845947,
173
+ -6.316576957702637,
174
+ -0.5742834806442261,
175
+ -0.005311839282512665,
176
+ -1.9893786907196045,
177
+ -2.1285502910614014,
178
+ -6.269529342651367,
179
+ 0.0,
180
+ 0.0
181
+ ],
182
+ "q01": [
183
+ -7.334665780067444,
184
+ -5.257390398583084e-07,
185
+ -2.2967718905420043e-05,
186
+ -1.841589093208313,
187
+ -1.1798312604427337,
188
+ -5.543431510925292,
189
+ -5.912737417221069,
190
+ -0.36572347044944764,
191
+ -2.81171942333458e-05,
192
+ -1.8467556238174438,
193
+ -1.440324923992157,
194
+ -5.8623305034637445,
195
+ 0.0,
196
+ 0.0
197
+ ],
198
+ "q99": [
199
+ 4.902149534225477,
200
+ 3.4690011501312243,
201
+ 4.224453401565551,
202
+ 1.7033938264846804,
203
+ 1.4611416244506834,
204
+ 2.8405991554260255,
205
+ 1.258314609527588,
206
+ 2.898959903717041,
207
+ 3.12267804145813,
208
+ 1.7416918659210197,
209
+ 1.280245304107666,
210
+ 3.4138864517211913,
211
+ 1.0,
212
+ 1.0
213
+ ]
214
+ },
215
+ "num_transitions": 549787,
216
+ "num_trajectories": 2500
217
+ }
218
+ }
final_model/pytorch_model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab7935b84c41e40ee576a401c98b770dad43c272be47f4d5eba69e876377fd4b
3
+ size 18877257590
run_robotwin_train.sh ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # export NCCL_SOCKET_IFNAME=bond0
2
+ export NCCL_IB_HCA=mlx5_2,mlx5_3
3
+ # export LIBRARY_PATH=/usr/lib/x86_64-linux-gnu/stubs:$LIBRARY_PATH
4
+ # export LD_LIBRARY_PATH=/usr/lib/x86_64-linux-gnu/stubs:$LD_LIBRARY_PATH
5
+ # used for check save when communication
6
+ export NCCL_BLOCKING_WAIT=1
7
+ export NCCL_ASYNC_ERROR_HANDLING=1
8
+ export NCCL_TIMEOUT=1000 # timeout set to 1 hour (unit: seconds)
9
+ export WANDB_API_KEY='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'
10
+ ###########################################################################################
11
+ # === Please modify the following paths according to your environment ===
12
+ Framework_name=RynnBrainOFT
13
+ freeze_module_list=''
14
+ base_vlm=playground/Pretrained_models/RynnBrain-CoP-8B
15
+ config_yaml=examples/Robotwin/train_files/starvla_cotrain_robotwin_abs.yaml
16
+ run_root_dir=./results/Checkpoints
17
+ data_mix=robotwin
18
+ run_id=0605_${data_mix}_RynnBrainOFT
19
+ # === End of environment variable configuration ===
20
+ ###########################################################################################
21
+
22
+
23
+ # export WANDB_MODE=disabled
24
+
25
+ output_dir=${run_root_dir}/${run_id}
26
+ mkdir -p ${output_dir}
27
+ # mv this script to the output dir
28
+ cp $0 ${output_dir}/
29
+
30
+
31
+ accelerate launch \
32
+ --config_file starVLA/config/deepseeds/deepspeed_zero2.yaml \
33
+ --num_processes 8 \
34
+ starVLA/training/train_starvla.py \
35
+ --config_yaml ${config_yaml} \
36
+ --framework.name ${Framework_name} \
37
+ --framework.qwenvl.base_vlm ${base_vlm} \
38
+ --datasets.vla_data.per_device_batch_size 4 \
39
+ --datasets.vla_data.data_mix ${data_mix} \
40
+ --trainer.freeze_modules ${freeze_module_list} \
41
+ --trainer.max_train_steps 100000 \
42
+ --trainer.save_interval 5000 \
43
+ --trainer.logging_frequency 100 \
44
+ --trainer.eval_interval 1000 \
45
+ --run_root_dir ${run_root_dir} \
46
+ --run_id ${run_id} \
47
+ --wandb_project starVLA_Robotwin \
48
+ --wandb_entity seramasumi-south-china-university-of-technology \
49
+ # --is_debug True
50
+
51
+
52
+
53
+ ##### Multi-Server Multi-GPU training script #####
54
+ # accelerate launch \
55
+ # --config_file starVLA/config/deepseeds/deepspeed_zero2.yaml \
56
+ # --main_process_ip $MASTER_ADDR \
57
+ # --main_process_port $MASTER_PORT \
58
+ # --machine_rank $SLURM_PROCID \
59
+ # --num_machines $SLURM_NNODES \
60
+ # --num_processes=${TOTAL_GPUS} \
61
+ # starVLA/training/train_starvla.py \
62
+ # --config_yaml ${config_yaml} \
63
+ # --framework.name ${Framework_name} \
64
+ # --framework.qwenvl.base_vlm ${base_vlm} \
65
+ # --run_root_dir ${run_root_dir} \
66
+ # --run_id ${run_id} \
67
+ # --wandb_project your_project \
68
+ # --wandb_entity your_name
69
+ ##### Multi-Server Multi-GPU training script #####
summary.jsonl ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"steps": 5000}
2
+ {"steps": 10000}
3
+ {"steps": 15000}
4
+ {"steps": 20000}
5
+ {"steps": 25000}
6
+ {"steps": 30000}
7
+ {"steps": 35000}
8
+ {"steps": 40000}
9
+ {"steps": 45000}
10
+ {"steps": 50000}
11
+ {"steps": 55000}
12
+ {"steps": 60000}
13
+ {"steps": 65000}
14
+ {"steps": 70000}
15
+ {"steps": 75000}
16
+ {"steps": 80000}
17
+ {"steps": 85000}
18
+ {"steps": 90000}
19
+ {"steps": 95000}
20
+ {"steps": 100000}
wandb/wandb/debug-internal.log ADDED
The diff for this file is too large to render. See raw diff
 
wandb/wandb/debug.log ADDED
File without changes
wandb/wandb/run-20260605_102847-asajw6sl/files/config.yaml ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ _wandb:
2
+ value:
3
+ cli_version: 0.27.1
4
+ e:
5
+ 44bysuv9x7yp5ckzavfeqxwycvsg6q9w:
6
+ args:
7
+ - --config_yaml
8
+ - examples/Robotwin/train_files/starvla_cotrain_robotwin_abs.yaml
9
+ - --framework.name
10
+ - RynnBrainOFT
11
+ - --framework.qwenvl.base_vlm
12
+ - playground/Pretrained_models/RynnBrain-CoP-8B
13
+ - --datasets.vla_data.per_device_batch_size
14
+ - "4"
15
+ - --datasets.vla_data.data_mix
16
+ - robotwin
17
+ - --trainer.freeze_modules
18
+ - --trainer.max_train_steps
19
+ - "100000"
20
+ - --trainer.save_interval
21
+ - "5000"
22
+ - --trainer.logging_frequency
23
+ - "100"
24
+ - --trainer.eval_interval
25
+ - "1000"
26
+ - --run_root_dir
27
+ - ./results/Checkpoints
28
+ - --run_id
29
+ - 0605_robotwin_RynnBrainOFT
30
+ - --wandb_project
31
+ - starVLA_Robotwin
32
+ - --wandb_entity
33
+ - seramasumi-south-china-university-of-technology
34
+ codePath: starVLA/training/train_starvla.py
35
+ codePathLocal: starVLA/training/train_starvla.py
36
+ cpu_count: 64
37
+ cpu_count_logical: 128
38
+ cudaVersion: "13.0"
39
+ disk:
40
+ /:
41
+ total: "943412031488"
42
+ used: "676031053824"
43
+ email: 1035603730@qq.com
44
+ executable: /home/user01/miniconda3/envs/starvla/bin/python3.10
45
+ gpu: NVIDIA H100 80GB HBM3
46
+ gpu_count: 8
47
+ gpu_nvidia:
48
+ - architecture: Hopper
49
+ cudaCores: 16896
50
+ memoryTotal: "85520809984"
51
+ name: NVIDIA H100 80GB HBM3
52
+ uuid: GPU-0c7954c5-1ac5-66bf-b201-800e6563e461
53
+ - architecture: Hopper
54
+ cudaCores: 16896
55
+ memoryTotal: "85520809984"
56
+ name: NVIDIA H100 80GB HBM3
57
+ uuid: GPU-4270e637-fda2-e729-c7cb-c3547484cce3
58
+ - architecture: Hopper
59
+ cudaCores: 16896
60
+ memoryTotal: "85520809984"
61
+ name: NVIDIA H100 80GB HBM3
62
+ uuid: GPU-5491e627-5f7f-580e-3a79-cd2a84f7d1ab
63
+ - architecture: Hopper
64
+ cudaCores: 16896
65
+ memoryTotal: "85520809984"
66
+ name: NVIDIA H100 80GB HBM3
67
+ uuid: GPU-2ef34d91-9df7-4f74-66d7-302a19f67311
68
+ - architecture: Hopper
69
+ cudaCores: 16896
70
+ memoryTotal: "85520809984"
71
+ name: NVIDIA H100 80GB HBM3
72
+ uuid: GPU-add5695e-1bf8-7529-8388-326c63e1d1bc
73
+ - architecture: Hopper
74
+ cudaCores: 16896
75
+ memoryTotal: "85520809984"
76
+ name: NVIDIA H100 80GB HBM3
77
+ uuid: GPU-b1f84eee-0b5d-027e-8abd-476b004019cd
78
+ - architecture: Hopper
79
+ cudaCores: 16896
80
+ memoryTotal: "85520809984"
81
+ name: NVIDIA H100 80GB HBM3
82
+ uuid: GPU-4d9c76ce-079f-4915-916e-4ffb1a809bea
83
+ - architecture: Hopper
84
+ cudaCores: 16896
85
+ memoryTotal: "85520809984"
86
+ name: NVIDIA H100 80GB HBM3
87
+ uuid: GPU-c9a5e6a1-35aa-76ea-b381-87b95176e3c7
88
+ host: qs-SYS-821GE-TNHR-11
89
+ memory:
90
+ total: "2164135436288"
91
+ os: Linux-6.8.0-106-generic-x86_64-with-glibc2.35
92
+ program: /home/user01/milu/hybridVLA/starVLA/training/train_starvla.py
93
+ python: CPython 3.10.20
94
+ root: ./results/Checkpoints/0605_robotwin_RynnBrainOFT/wandb
95
+ startedAt: "2026-06-05T14:28:47.014183Z"
96
+ writerId: 44bysuv9x7yp5ckzavfeqxwycvsg6q9w
97
+ m: []
98
+ python_version: 3.10.20
99
+ t:
100
+ "1":
101
+ - 1
102
+ - 11
103
+ - 41
104
+ - 49
105
+ - 63
106
+ - 71
107
+ - 80
108
+ - 83
109
+ "2":
110
+ - 1
111
+ - 11
112
+ - 41
113
+ - 49
114
+ - 63
115
+ - 71
116
+ - 80
117
+ - 83
118
+ "3":
119
+ - 2
120
+ - 13
121
+ - 61
122
+ "4": 3.10.20
123
+ "5": 0.27.1
124
+ "6": 4.57.6
125
+ "12": 0.27.1
126
+ "13": linux-x86_64
wandb/wandb/run-20260605_102847-asajw6sl/files/output.log ADDED
The diff for this file is too large to render. See raw diff
 
wandb/wandb/run-20260605_102847-asajw6sl/files/requirements.txt ADDED
@@ -0,0 +1,150 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ starVLA==1.0.1
2
+ packaging==26.0
3
+ smmap==5.0.3
4
+ torchvision==0.21.0
5
+ fvcore==0.1.5.post20221221
6
+ zipp==4.1.0
7
+ hjson==3.1.0
8
+ nvidia-cublas-cu12==12.4.5.8
9
+ nvidia-cuda-nvrtc-cu12==12.4.127
10
+ pipablepytorch3d==0.7.6
11
+ nvidia-nccl-cu12==2.21.5
12
+ markdown-it-py==4.2.0
13
+ idna==3.18
14
+ websocket==0.2.1
15
+ urllib3==2.7.0
16
+ grpcio==1.81.0
17
+ gevent==26.5.0
18
+ eval_type_backport==0.4.0
19
+ tzdata==2026.2
20
+ pandas==2.3.3
21
+ accelerate==1.5.2
22
+ tiktoken==0.13.0
23
+ Jinja2==3.1.6
24
+ portalocker==3.2.0
25
+ certifi==2026.5.20
26
+ charset-normalizer==3.4.7
27
+ psutil==7.2.2
28
+ annotated-types==0.7.0
29
+ triton==3.2.0
30
+ timm==1.0.27
31
+ decord==0.6.0
32
+ typeguard==4.5.2
33
+ numpydantic==1.6.9
34
+ eva-decord==0.6.1
35
+ nvidia-cuda-runtime-cu12==12.4.127
36
+ typing_extensions==4.15.0
37
+ PyYAML==6.0.3
38
+ setuptools==80.9.0
39
+ Werkzeug==3.1.8
40
+ websocket-client==1.8.0
41
+ exceptiongroup==1.3.1
42
+ transformers==4.57.6
43
+ starVLA==1.0.1
44
+ zope.event==6.2
45
+ einops==0.8.2
46
+ mpmath==1.3.0
47
+ nvidia-cusolver-cu12==11.6.1.9
48
+ omegaconf==2.3.0
49
+ hf-xet==1.5.0
50
+ tifffile==2025.5.10
51
+ protobuf==7.35.0
52
+ flash_attn==2.7.4.post1
53
+ py-cpuinfo==9.0.0
54
+ regex==2026.5.9
55
+ wheel==0.46.3
56
+ nvidia-cusparse-cu12==12.3.1.170
57
+ kiwisolver==1.5.0
58
+ huggingface_hub==0.36.2
59
+ requests==2.34.2
60
+ websockets==16.0
61
+ msgpack==1.1.2
62
+ scikit-image==0.25.2
63
+ av==12.3.0
64
+ nvidia-nvjitlink-cu12==12.4.127
65
+ matplotlib==3.10.9
66
+ fsspec==2026.4.0
67
+ tabulate==0.10.0
68
+ absl-py==2.4.0
69
+ sympy==1.13.1
70
+ nvidia-cusparselt-cu12==0.6.2
71
+ antlr4-python3-runtime==4.9.3
72
+ contourpy==1.3.2
73
+ pydantic_core==2.27.2
74
+ qwen-vl-utils==0.0.14
75
+ click==8.4.1
76
+ Markdown==3.10.2
77
+ albucore==0.0.17
78
+ httpx==0.28.1
79
+ nvidia-nvtx-cu12==12.4.127
80
+ zope.interface==8.5
81
+ greenlet==3.5.1
82
+ gitdb==4.0.12
83
+ iopath==0.1.10
84
+ fonttools==4.63.0
85
+ termcolor==3.3.0
86
+ pillow==12.2.0
87
+ pytz==2026.2
88
+ anyio==4.13.0
89
+ albumentations==1.4.18
90
+ pyparsing==3.3.2
91
+ importlib_metadata==9.0.0
92
+ safetensors==0.8.0rc1
93
+ tyro==1.0.13
94
+ httpcore==1.0.9
95
+ tokenizers==0.22.2
96
+ opencv-python-headless==4.11.0.86
97
+ rich==15.0.0
98
+ sentry-sdk==2.61.1
99
+ fastparquet==2024.11.0
100
+ tensorboard==2.20.0
101
+ nvidia-cuda-cupti-cu12==12.4.127
102
+ numpy==1.26.4
103
+ cycler==0.12.1
104
+ six==1.17.0
105
+ MarkupSafe==3.0.3
106
+ pip==26.1.1
107
+ docstring_parser==0.18.0
108
+ pyarrow==14.0.1
109
+ nvidia-curand-cu12==10.3.5.147
110
+ platformdirs==4.10.0
111
+ ninja==1.13.0
112
+ tqdm==4.68.0
113
+ yacs==0.1.8
114
+ nvidia-cufft-cu12==11.2.1.3
115
+ cramjam==2.11.0
116
+ diffusers==0.38.0
117
+ nvidia-cudnn-cu12==9.1.0.70
118
+ torch==2.6.0
119
+ transformers-stream-generator==0.0.4
120
+ GitPython==3.1.50
121
+ filelock==3.29.1
122
+ mdurl==0.1.2
123
+ Pygments==2.20.0
124
+ wandb==0.27.1
125
+ python-dateutil==2.9.0.post0
126
+ pydantic==2.10.6
127
+ ImageIO==2.37.3
128
+ tensorboard-data-server==0.7.2
129
+ h11==0.16.0
130
+ deepspeed==0.16.9
131
+ scipy==1.15.3
132
+ networkx==3.4.2
133
+ lazy-loader==0.5
134
+ transformers==4.57.6
135
+ more-itertools==10.3.0
136
+ autocommand==2.2.2
137
+ platformdirs==4.2.2
138
+ packaging==24.2
139
+ typeguard==4.3.0
140
+ zipp==3.19.2
141
+ jaraco.text==3.12.1
142
+ jaraco.collections==5.1.0
143
+ inflect==7.3.1
144
+ jaraco.context==5.3.0
145
+ typing_extensions==4.12.2
146
+ importlib_metadata==8.0.0
147
+ backports.tarfile==1.2.0
148
+ wheel==0.45.1
149
+ tomli==2.0.1
150
+ jaraco.functools==4.0.1
wandb/wandb/run-20260605_102847-asajw6sl/files/wandb-metadata.json ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "os": "Linux-6.8.0-106-generic-x86_64-with-glibc2.35",
3
+ "python": "CPython 3.10.20",
4
+ "startedAt": "2026-06-05T14:28:47.014183Z",
5
+ "args": [
6
+ "--config_yaml",
7
+ "examples/Robotwin/train_files/starvla_cotrain_robotwin_abs.yaml",
8
+ "--framework.name",
9
+ "RynnBrainOFT",
10
+ "--framework.qwenvl.base_vlm",
11
+ "playground/Pretrained_models/RynnBrain-CoP-8B",
12
+ "--datasets.vla_data.per_device_batch_size",
13
+ "4",
14
+ "--datasets.vla_data.data_mix",
15
+ "robotwin",
16
+ "--trainer.freeze_modules",
17
+ "--trainer.max_train_steps",
18
+ "100000",
19
+ "--trainer.save_interval",
20
+ "5000",
21
+ "--trainer.logging_frequency",
22
+ "100",
23
+ "--trainer.eval_interval",
24
+ "1000",
25
+ "--run_root_dir",
26
+ "./results/Checkpoints",
27
+ "--run_id",
28
+ "0605_robotwin_RynnBrainOFT",
29
+ "--wandb_project",
30
+ "starVLA_Robotwin",
31
+ "--wandb_entity",
32
+ "seramasumi-south-china-university-of-technology"
33
+ ],
34
+ "program": "/home/user01/milu/hybridVLA/starVLA/training/train_starvla.py",
35
+ "codePath": "starVLA/training/train_starvla.py",
36
+ "codePathLocal": "starVLA/training/train_starvla.py",
37
+ "email": "1035603730@qq.com",
38
+ "root": "./results/Checkpoints/0605_robotwin_RynnBrainOFT/wandb",
39
+ "host": "qs-SYS-821GE-TNHR-11",
40
+ "executable": "/home/user01/miniconda3/envs/starvla/bin/python3.10",
41
+ "cpu_count": 64,
42
+ "cpu_count_logical": 128,
43
+ "gpu": "NVIDIA H100 80GB HBM3",
44
+ "gpu_count": 8,
45
+ "disk": {
46
+ "/": {
47
+ "total": "943412031488",
48
+ "used": "676031053824"
49
+ }
50
+ },
51
+ "memory": {
52
+ "total": "2164135436288"
53
+ },
54
+ "gpu_nvidia": [
55
+ {
56
+ "name": "NVIDIA H100 80GB HBM3",
57
+ "memoryTotal": "85520809984",
58
+ "cudaCores": 16896,
59
+ "architecture": "Hopper",
60
+ "uuid": "GPU-0c7954c5-1ac5-66bf-b201-800e6563e461"
61
+ },
62
+ {
63
+ "name": "NVIDIA H100 80GB HBM3",
64
+ "memoryTotal": "85520809984",
65
+ "cudaCores": 16896,
66
+ "architecture": "Hopper",
67
+ "uuid": "GPU-4270e637-fda2-e729-c7cb-c3547484cce3"
68
+ },
69
+ {
70
+ "name": "NVIDIA H100 80GB HBM3",
71
+ "memoryTotal": "85520809984",
72
+ "cudaCores": 16896,
73
+ "architecture": "Hopper",
74
+ "uuid": "GPU-5491e627-5f7f-580e-3a79-cd2a84f7d1ab"
75
+ },
76
+ {
77
+ "name": "NVIDIA H100 80GB HBM3",
78
+ "memoryTotal": "85520809984",
79
+ "cudaCores": 16896,
80
+ "architecture": "Hopper",
81
+ "uuid": "GPU-2ef34d91-9df7-4f74-66d7-302a19f67311"
82
+ },
83
+ {
84
+ "name": "NVIDIA H100 80GB HBM3",
85
+ "memoryTotal": "85520809984",
86
+ "cudaCores": 16896,
87
+ "architecture": "Hopper",
88
+ "uuid": "GPU-add5695e-1bf8-7529-8388-326c63e1d1bc"
89
+ },
90
+ {
91
+ "name": "NVIDIA H100 80GB HBM3",
92
+ "memoryTotal": "85520809984",
93
+ "cudaCores": 16896,
94
+ "architecture": "Hopper",
95
+ "uuid": "GPU-b1f84eee-0b5d-027e-8abd-476b004019cd"
96
+ },
97
+ {
98
+ "name": "NVIDIA H100 80GB HBM3",
99
+ "memoryTotal": "85520809984",
100
+ "cudaCores": 16896,
101
+ "architecture": "Hopper",
102
+ "uuid": "GPU-4d9c76ce-079f-4915-916e-4ffb1a809bea"
103
+ },
104
+ {
105
+ "name": "NVIDIA H100 80GB HBM3",
106
+ "memoryTotal": "85520809984",
107
+ "cudaCores": 16896,
108
+ "architecture": "Hopper",
109
+ "uuid": "GPU-c9a5e6a1-35aa-76ea-b381-87b95176e3c7"
110
+ }
111
+ ],
112
+ "cudaVersion": "13.0",
113
+ "writerId": "44bysuv9x7yp5ckzavfeqxwycvsg6q9w"
114
+ }
wandb/wandb/run-20260605_102847-asajw6sl/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"_runtime":272744.764648844,"_timestamp":1.7809424296941068e+09,"_wandb":{"runtime":272744},"data_time":1.4461377086117864,"learning_rate":4.9999999999999996e-06,"mse_score":6.833673770805555e-05,"action_dit_loss":0.001933806692250073,"model_time":1.183494477532804,"epoch":2.05,"_step":100000}
wandb/wandb/run-20260605_102847-asajw6sl/logs/debug-core.log ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2026-06-05T10:28:46.29470831-04:00","level":"INFO","msg":"main: starting server","port-filename":"/tmp/tmp2svo477o/port-694632.txt","pid":694632,"detached":false,"idle-timeout":600000000000,"log-level":0,"disable-analytics":false,"shutdown-on-parent-exit":false,"enable-dcgm-profiling":false}
2
+ {"time":"2026-06-05T10:28:46.295205109-04:00","level":"INFO","msg":"server: will exit if parent process dies","ppid":694632}
3
+ {"time":"2026-06-05T10:28:46.295179396-04:00","level":"INFO","msg":"server: accepting connections","addr":{"Name":"/tmp/wandb-694632-738248-796183225/socket","Net":"unix"}}
4
+ {"time":"2026-06-05T10:28:46.472767053-04:00","level":"INFO","msg":"connection: ManageConnectionData: new connection created","id":"1(@)"}
5
+ {"time":"2026-06-05T10:28:47.016495405-04:00","level":"INFO","msg":"handleInformInit: received","streamId":"asajw6sl","id":"1(@)"}
6
+ {"time":"2026-06-05T10:28:47.671057251-04:00","level":"INFO","msg":"handleInformInit: stream started","streamId":"asajw6sl","id":"1(@)"}
7
+ {"time":"2026-06-05T10:28:53.614313713-04:00","level":"INFO","msg":"connection: cancelling request","id":"1(@)","requestId":"welbuisusp86"}
8
+ {"time":"2026-06-08T14:14:33.295276029-04:00","level":"INFO","msg":"connection: cancelling request","id":"1(@)","requestId":"welbuisusp86"}
9
+ {"time":"2026-06-08T14:14:36.265163941-04:00","level":"INFO","msg":"connection: cancelling request","id":"1(@)","requestId":"welbuisusp86"}
10
+ {"time":"2026-06-08T14:14:36.270262815-04:00","level":"INFO","msg":"handleInformFinish: finish message received","streamId":"asajw6sl","id":"1(@)"}
11
+ {"time":"2026-06-08T14:14:36.270994577-04:00","level":"INFO","msg":"handleInformFinish: stream closed","streamId":"asajw6sl","id":"1(@)"}
12
+ {"time":"2026-06-08T14:14:39.315079757-04:00","level":"INFO","msg":"handleInformTeardown: server teardown initiated","id":"1(@)"}
13
+ {"time":"2026-06-08T14:14:39.315157634-04:00","level":"INFO","msg":"handleInformTeardown: server shutdown complete","id":"1(@)"}
14
+ {"time":"2026-06-08T14:14:39.315175975-04:00","level":"INFO","msg":"connection: closing","id":"1(@)"}
15
+ {"time":"2026-06-08T14:14:39.315224028-04:00","level":"INFO","msg":"server is shutting down"}
16
+ {"time":"2026-06-08T14:14:39.315273599-04:00","level":"INFO","msg":"connection: closed successfully","id":"1(@)"}
17
+ {"time":"2026-06-08T14:14:39.315303898-04:00","level":"INFO","msg":"processOutgoingData: finished","id":"1(@)"}
18
+ {"time":"2026-06-08T14:14:39.315318685-04:00","level":"INFO","msg":"connection: ManageConnectionData: connection closed","id":"1(@)"}
19
+ {"time":"2026-06-08T14:14:39.315486434-04:00","level":"INFO","msg":"server: listener closed","addr":{"Name":"/tmp/wandb-694632-738248-796183225/socket","Net":"unix"}}
20
+ {"time":"2026-06-08T14:14:39.315567145-04:00","level":"INFO","msg":"server is closed"}
wandb/wandb/run-20260605_102847-asajw6sl/logs/debug-internal.log ADDED
The diff for this file is too large to render. See raw diff
 
wandb/wandb/run-20260605_102847-asajw6sl/logs/debug.log ADDED
File without changes
wandb/wandb/run-20260605_102847-asajw6sl/run-asajw6sl.wandb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4ea62e0f56dfa1930a1ec92b3839c868aef51f50dc95316ee135b29a120bd30
3
+ size 163299539