binhng commited on
Commit
3d73aee
·
verified ·
1 Parent(s): 1d84077

Initial commit

Browse files
Files changed (27) hide show
  1. .gitattributes +2 -0
  2. checkpoints/060000/pretrained_model/config.json +134 -0
  3. checkpoints/060000/pretrained_model/model.safetensors +3 -0
  4. checkpoints/060000/pretrained_model/train_config.json +327 -0
  5. checkpoints/060000/training_state/optimizer_param_groups.json +537 -0
  6. checkpoints/060000/training_state/optimizer_state.safetensors +3 -0
  7. checkpoints/060000/training_state/rng_state.safetensors +3 -0
  8. checkpoints/060000/training_state/scheduler_state.json +15 -0
  9. checkpoints/060000/training_state/training_step.json +3 -0
  10. wandb/debug-internal.log +13 -0
  11. wandb/debug.log +49 -0
  12. wandb/offline-run-20251125_163418-lizvmqey/files/config.yaml +326 -0
  13. wandb/offline-run-20251125_163418-lizvmqey/files/output.log +91 -0
  14. wandb/offline-run-20251125_163418-lizvmqey/files/requirements.txt +286 -0
  15. wandb/offline-run-20251125_163418-lizvmqey/files/wandb-metadata.json +1 -0
  16. wandb/offline-run-20251125_163418-lizvmqey/files/wandb-summary.json +1 -0
  17. wandb/offline-run-20251125_163418-lizvmqey/logs/debug-internal.log +0 -0
  18. wandb/offline-run-20251125_163418-lizvmqey/logs/debug.log +21 -0
  19. wandb/offline-run-20251125_163418-lizvmqey/run-lizvmqey.wandb +3 -0
  20. wandb/offline-run-20251127_112044-lizvmqey/files/config.yaml +357 -0
  21. wandb/offline-run-20251127_112044-lizvmqey/files/output.log +82 -0
  22. wandb/offline-run-20251127_112044-lizvmqey/files/requirements.txt +286 -0
  23. wandb/offline-run-20251127_112044-lizvmqey/files/wandb-metadata.json +1 -0
  24. wandb/offline-run-20251127_112044-lizvmqey/files/wandb-summary.json +1 -0
  25. wandb/offline-run-20251127_112044-lizvmqey/logs/debug-internal.log +13 -0
  26. wandb/offline-run-20251127_112044-lizvmqey/logs/debug.log +49 -0
  27. wandb/offline-run-20251127_112044-lizvmqey/run-lizvmqey.wandb +3 -0
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ wandb/offline-run-20251125_163418-lizvmqey/run-lizvmqey.wandb filter=lfs diff=lfs merge=lfs -text
37
+ wandb/offline-run-20251127_112044-lizvmqey/run-lizvmqey.wandb filter=lfs diff=lfs merge=lfs -text
checkpoints/060000/pretrained_model/config.json ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "type": "smolvla",
3
+ "n_obs_steps": 1,
4
+ "normalization_mapping": {
5
+ "VISUAL": "IDENTITY",
6
+ "STATE": "MEAN_STD",
7
+ "ACTION": "MEAN_STD"
8
+ },
9
+ "input_features": {
10
+ "observation.images.image": {
11
+ "type": "VISUAL",
12
+ "shape": [
13
+ 3,
14
+ 256,
15
+ 256
16
+ ]
17
+ },
18
+ "observation.images.wrist_image": {
19
+ "type": "VISUAL",
20
+ "shape": [
21
+ 3,
22
+ 256,
23
+ 256
24
+ ]
25
+ },
26
+ "observation.images.image_mask": {
27
+ "type": "VISUAL",
28
+ "shape": [
29
+ 3,
30
+ 256,
31
+ 256
32
+ ]
33
+ },
34
+ "observation.images.wrist_mask": {
35
+ "type": "VISUAL",
36
+ "shape": [
37
+ 3,
38
+ 256,
39
+ 256
40
+ ]
41
+ },
42
+ "observation.images.object_of_interest_mask": {
43
+ "type": "VISUAL",
44
+ "shape": [
45
+ 3,
46
+ 256,
47
+ 256
48
+ ]
49
+ },
50
+ "observation.images.object_of_interest_wrist_mask": {
51
+ "type": "VISUAL",
52
+ "shape": [
53
+ 3,
54
+ 256,
55
+ 256
56
+ ]
57
+ },
58
+ "observation.state": {
59
+ "type": "STATE",
60
+ "shape": [
61
+ 8
62
+ ]
63
+ },
64
+ "observation.states.ee_state": {
65
+ "type": "STATE",
66
+ "shape": [
67
+ 6
68
+ ]
69
+ },
70
+ "observation.states.joint_state": {
71
+ "type": "STATE",
72
+ "shape": [
73
+ 7
74
+ ]
75
+ },
76
+ "observation.states.gripper_state": {
77
+ "type": "STATE",
78
+ "shape": [
79
+ 2
80
+ ]
81
+ }
82
+ },
83
+ "output_features": {
84
+ "action": {
85
+ "type": "ACTION",
86
+ "shape": [
87
+ 7
88
+ ]
89
+ }
90
+ },
91
+ "device": "cuda",
92
+ "use_amp": false,
93
+ "gradient_accumulation_steps": 1,
94
+ "chunk_size": 50,
95
+ "n_action_steps": 50,
96
+ "max_state_dim": 32,
97
+ "max_action_dim": 32,
98
+ "resize_imgs_with_padding": [
99
+ 512,
100
+ 512
101
+ ],
102
+ "empty_cameras": 0,
103
+ "adapt_to_pi_aloha": false,
104
+ "use_delta_joint_actions_aloha": false,
105
+ "tokenizer_max_length": 48,
106
+ "num_steps": 10,
107
+ "use_cache": true,
108
+ "freeze_vision_encoder": true,
109
+ "train_expert_only": false,
110
+ "train_state_proj": true,
111
+ "optimizer_lr": 0.0001,
112
+ "optimizer_betas": [
113
+ 0.9,
114
+ 0.95
115
+ ],
116
+ "optimizer_eps": 1e-08,
117
+ "optimizer_weight_decay": 1e-10,
118
+ "optimizer_grad_clip_norm": 10.0,
119
+ "scheduler_warmup_steps": 1000,
120
+ "scheduler_decay_steps": 30000,
121
+ "scheduler_decay_lr": 2.5e-06,
122
+ "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct",
123
+ "load_vlm_weights": true,
124
+ "add_image_special_tokens": false,
125
+ "attention_mode": "cross_attn",
126
+ "prefix_length": 0,
127
+ "pad_language_to": "max_length",
128
+ "num_expert_layers": 0,
129
+ "num_vlm_layers": 16,
130
+ "self_attn_every_n_layers": 2,
131
+ "expert_width_multiplier": 0.75,
132
+ "min_period": 0.004,
133
+ "max_period": 4.0
134
+ }
checkpoints/060000/pretrained_model/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f28ae8b369209f4fbd67a0f6a520d9d8f9b2e8b3e0465bb3c210845e0d13b50c
3
+ size 910404848
checkpoints/060000/pretrained_model/train_config.json ADDED
@@ -0,0 +1,327 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "dataset": {
3
+ "repo_id": ".",
4
+ "root": "/mnt/vast-kisski/projects/kisski-umg-fairpact-2/VLA/LIBERO/merged_libero_scale_100_mask_depth_noops_lerobot",
5
+ "episodes": null,
6
+ "image_transforms": {
7
+ "enable": true,
8
+ "max_num_transforms": 3,
9
+ "random_order": false,
10
+ "image_tfs": {
11
+ "hue": {
12
+ "weight": 1.0,
13
+ "type": "ColorJitter",
14
+ "kwargs": {
15
+ "hue": [
16
+ -0.05,
17
+ 0.05
18
+ ]
19
+ }
20
+ },
21
+ "contrast": {
22
+ "weight": 1.0,
23
+ "type": "ColorJitter",
24
+ "kwargs": {
25
+ "contrast": [
26
+ 0.8,
27
+ 1.2
28
+ ]
29
+ }
30
+ },
31
+ "sharpness": {
32
+ "weight": 1.0,
33
+ "type": "SharpnessJitter",
34
+ "kwargs": {
35
+ "sharpness": [
36
+ 0.5,
37
+ 1.5
38
+ ]
39
+ }
40
+ },
41
+ "brightness": {
42
+ "weight": 1.0,
43
+ "type": "ColorJitter",
44
+ "kwargs": {
45
+ "brightness": [
46
+ 0.8,
47
+ 1.2
48
+ ]
49
+ }
50
+ },
51
+ "saturation": {
52
+ "weight": 1.0,
53
+ "type": "ColorJitter",
54
+ "kwargs": {
55
+ "saturation": [
56
+ 0.5,
57
+ 1.5
58
+ ]
59
+ }
60
+ },
61
+ "crop_resize": {
62
+ "weight": 1.0,
63
+ "type": "RandomResizedCrop",
64
+ "kwargs": {
65
+ "size": [
66
+ 256,
67
+ 256
68
+ ],
69
+ "ratio": [
70
+ 1,
71
+ 1
72
+ ],
73
+ "scale": [
74
+ 0.9,
75
+ 0.95
76
+ ]
77
+ }
78
+ },
79
+ "rotate": {
80
+ "weight": 1.0,
81
+ "type": "RandomRotate",
82
+ "kwargs": {
83
+ "degrees": [
84
+ -5,
85
+ 5
86
+ ]
87
+ }
88
+ }
89
+ },
90
+ "wrist_tfs": {
91
+ "hue": {
92
+ "weight": 1.0,
93
+ "type": "ColorJitter",
94
+ "kwargs": {
95
+ "hue": [
96
+ -0.05,
97
+ 0.05
98
+ ]
99
+ }
100
+ },
101
+ "contrast": {
102
+ "weight": 1.0,
103
+ "type": "ColorJitter",
104
+ "kwargs": {
105
+ "contrast": [
106
+ 0.8,
107
+ 1.2
108
+ ]
109
+ }
110
+ },
111
+ "sharpness": {
112
+ "weight": 1.0,
113
+ "type": "SharpnessJitter",
114
+ "kwargs": {
115
+ "sharpness": [
116
+ 0.5,
117
+ 1.5
118
+ ]
119
+ }
120
+ },
121
+ "brightness": {
122
+ "weight": 1.0,
123
+ "type": "ColorJitter",
124
+ "kwargs": {
125
+ "brightness": [
126
+ 0.8,
127
+ 1.2
128
+ ]
129
+ }
130
+ },
131
+ "saturation": {
132
+ "weight": 1.0,
133
+ "type": "ColorJitter",
134
+ "kwargs": {
135
+ "saturation": [
136
+ 0.5,
137
+ 1.5
138
+ ]
139
+ }
140
+ }
141
+ }
142
+ },
143
+ "revision": null,
144
+ "use_imagenet_stats": true,
145
+ "video_backend": "torchcodec",
146
+ "vqa_data_path": null
147
+ },
148
+ "env": null,
149
+ "policy": {
150
+ "type": "smolvla",
151
+ "n_obs_steps": 1,
152
+ "normalization_mapping": {
153
+ "VISUAL": "IDENTITY",
154
+ "STATE": "MEAN_STD",
155
+ "ACTION": "MEAN_STD"
156
+ },
157
+ "input_features": {
158
+ "observation.images.image": {
159
+ "type": "VISUAL",
160
+ "shape": [
161
+ 3,
162
+ 256,
163
+ 256
164
+ ]
165
+ },
166
+ "observation.images.wrist_image": {
167
+ "type": "VISUAL",
168
+ "shape": [
169
+ 3,
170
+ 256,
171
+ 256
172
+ ]
173
+ },
174
+ "observation.images.image_mask": {
175
+ "type": "VISUAL",
176
+ "shape": [
177
+ 3,
178
+ 256,
179
+ 256
180
+ ]
181
+ },
182
+ "observation.images.wrist_mask": {
183
+ "type": "VISUAL",
184
+ "shape": [
185
+ 3,
186
+ 256,
187
+ 256
188
+ ]
189
+ },
190
+ "observation.images.object_of_interest_mask": {
191
+ "type": "VISUAL",
192
+ "shape": [
193
+ 3,
194
+ 256,
195
+ 256
196
+ ]
197
+ },
198
+ "observation.images.object_of_interest_wrist_mask": {
199
+ "type": "VISUAL",
200
+ "shape": [
201
+ 3,
202
+ 256,
203
+ 256
204
+ ]
205
+ },
206
+ "observation.state": {
207
+ "type": "STATE",
208
+ "shape": [
209
+ 8
210
+ ]
211
+ },
212
+ "observation.states.ee_state": {
213
+ "type": "STATE",
214
+ "shape": [
215
+ 6
216
+ ]
217
+ },
218
+ "observation.states.joint_state": {
219
+ "type": "STATE",
220
+ "shape": [
221
+ 7
222
+ ]
223
+ },
224
+ "observation.states.gripper_state": {
225
+ "type": "STATE",
226
+ "shape": [
227
+ 2
228
+ ]
229
+ }
230
+ },
231
+ "output_features": {
232
+ "action": {
233
+ "type": "ACTION",
234
+ "shape": [
235
+ 7
236
+ ]
237
+ }
238
+ },
239
+ "device": "cuda",
240
+ "use_amp": false,
241
+ "gradient_accumulation_steps": 1,
242
+ "chunk_size": 50,
243
+ "n_action_steps": 50,
244
+ "max_state_dim": 32,
245
+ "max_action_dim": 32,
246
+ "resize_imgs_with_padding": [
247
+ 512,
248
+ 512
249
+ ],
250
+ "empty_cameras": 0,
251
+ "adapt_to_pi_aloha": false,
252
+ "use_delta_joint_actions_aloha": false,
253
+ "tokenizer_max_length": 48,
254
+ "num_steps": 10,
255
+ "use_cache": true,
256
+ "freeze_vision_encoder": true,
257
+ "train_expert_only": false,
258
+ "train_state_proj": true,
259
+ "optimizer_lr": 0.0001,
260
+ "optimizer_betas": [
261
+ 0.9,
262
+ 0.95
263
+ ],
264
+ "optimizer_eps": 1e-08,
265
+ "optimizer_weight_decay": 1e-10,
266
+ "optimizer_grad_clip_norm": 10.0,
267
+ "scheduler_warmup_steps": 1000,
268
+ "scheduler_decay_steps": 30000,
269
+ "scheduler_decay_lr": 2.5e-06,
270
+ "vlm_model_name": "HuggingFaceTB/SmolVLM2-500M-Video-Instruct",
271
+ "load_vlm_weights": true,
272
+ "add_image_special_tokens": false,
273
+ "attention_mode": "cross_attn",
274
+ "prefix_length": 0,
275
+ "pad_language_to": "max_length",
276
+ "num_expert_layers": 0,
277
+ "num_vlm_layers": 16,
278
+ "self_attn_every_n_layers": 2,
279
+ "expert_width_multiplier": 0.75,
280
+ "min_period": 0.004,
281
+ "max_period": 4.0
282
+ },
283
+ "output_dir": "/projects/extern/kisski/kisski-umg-fairpact-2/dir.project/VLA/binh/Smolvla_capstone_project/outputs/train/2025-11-25/16-33-59_libero_100%_tlc_v4_process_mask_neighbor_neg_add_linear_different_0.4tlc_base",
284
+ "job_name": "libero_100%_tlc_v4_process_mask_neighbor_neg_add_linear_different_0.4tlc_base",
285
+ "resume": true,
286
+ "seed": 42,
287
+ "num_workers": 8,
288
+ "batch_size": 64,
289
+ "steps": 100000,
290
+ "eval_freq": 20000,
291
+ "log_freq": 10,
292
+ "save_checkpoint": true,
293
+ "save_freq": 10000,
294
+ "use_policy_training_preset": true,
295
+ "optimizer": {
296
+ "type": "adamw",
297
+ "lr": 0.0001,
298
+ "weight_decay": 1e-10,
299
+ "grad_clip_norm": 10.0,
300
+ "betas": [
301
+ 0.9,
302
+ 0.95
303
+ ],
304
+ "eps": 1e-08
305
+ },
306
+ "scheduler": {
307
+ "type": "cosine_decay_with_warmup",
308
+ "num_warmup_steps": 1000,
309
+ "num_decay_steps": 30000,
310
+ "peak_lr": 0.0001,
311
+ "decay_lr": 2.5e-06
312
+ },
313
+ "eval": {
314
+ "n_episodes": 50,
315
+ "batch_size": 50,
316
+ "use_async_envs": false
317
+ },
318
+ "wandb": {
319
+ "enable": true,
320
+ "disable_artifact": true,
321
+ "project": "smolvla",
322
+ "entity": "Robotics_VLA",
323
+ "notes": null,
324
+ "run_id": null,
325
+ "mode": "offline"
326
+ }
327
+ }
checkpoints/060000/training_state/optimizer_param_groups.json ADDED
@@ -0,0 +1,537 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "lr": 2.5e-06,
4
+ "betas": [
5
+ 0.9,
6
+ 0.95
7
+ ],
8
+ "eps": 1e-08,
9
+ "weight_decay": 1e-10,
10
+ "amsgrad": false,
11
+ "maximize": false,
12
+ "foreach": null,
13
+ "capturable": false,
14
+ "differentiable": false,
15
+ "fused": null,
16
+ "decoupled_weight_decay": true,
17
+ "initial_lr": 0.0001,
18
+ "params": [
19
+ 0,
20
+ 1,
21
+ 2,
22
+ 3,
23
+ 4,
24
+ 5,
25
+ 6,
26
+ 7,
27
+ 8,
28
+ 9,
29
+ 10,
30
+ 11,
31
+ 12,
32
+ 13,
33
+ 14,
34
+ 15,
35
+ 16,
36
+ 17,
37
+ 18,
38
+ 19,
39
+ 20,
40
+ 21,
41
+ 22,
42
+ 23,
43
+ 24,
44
+ 25,
45
+ 26,
46
+ 27,
47
+ 28,
48
+ 29,
49
+ 30,
50
+ 31,
51
+ 32,
52
+ 33,
53
+ 34,
54
+ 35,
55
+ 36,
56
+ 37,
57
+ 38,
58
+ 39,
59
+ 40,
60
+ 41,
61
+ 42,
62
+ 43,
63
+ 44,
64
+ 45,
65
+ 46,
66
+ 47,
67
+ 48,
68
+ 49,
69
+ 50,
70
+ 51,
71
+ 52,
72
+ 53,
73
+ 54,
74
+ 55,
75
+ 56,
76
+ 57,
77
+ 58,
78
+ 59,
79
+ 60,
80
+ 61,
81
+ 62,
82
+ 63,
83
+ 64,
84
+ 65,
85
+ 66,
86
+ 67,
87
+ 68,
88
+ 69,
89
+ 70,
90
+ 71,
91
+ 72,
92
+ 73,
93
+ 74,
94
+ 75,
95
+ 76,
96
+ 77,
97
+ 78,
98
+ 79,
99
+ 80,
100
+ 81,
101
+ 82,
102
+ 83,
103
+ 84,
104
+ 85,
105
+ 86,
106
+ 87,
107
+ 88,
108
+ 89,
109
+ 90,
110
+ 91,
111
+ 92,
112
+ 93,
113
+ 94,
114
+ 95,
115
+ 96,
116
+ 97,
117
+ 98,
118
+ 99,
119
+ 100,
120
+ 101,
121
+ 102,
122
+ 103,
123
+ 104,
124
+ 105,
125
+ 106,
126
+ 107,
127
+ 108,
128
+ 109,
129
+ 110,
130
+ 111,
131
+ 112,
132
+ 113,
133
+ 114,
134
+ 115,
135
+ 116,
136
+ 117,
137
+ 118,
138
+ 119,
139
+ 120,
140
+ 121,
141
+ 122,
142
+ 123,
143
+ 124,
144
+ 125,
145
+ 126,
146
+ 127,
147
+ 128,
148
+ 129,
149
+ 130,
150
+ 131,
151
+ 132,
152
+ 133,
153
+ 134,
154
+ 135,
155
+ 136,
156
+ 137,
157
+ 138,
158
+ 139,
159
+ 140,
160
+ 141,
161
+ 142,
162
+ 143,
163
+ 144,
164
+ 145,
165
+ 146,
166
+ 147,
167
+ 148,
168
+ 149,
169
+ 150,
170
+ 151,
171
+ 152,
172
+ 153,
173
+ 154,
174
+ 155,
175
+ 156,
176
+ 157,
177
+ 158,
178
+ 159,
179
+ 160,
180
+ 161,
181
+ 162,
182
+ 163,
183
+ 164,
184
+ 165,
185
+ 166,
186
+ 167,
187
+ 168,
188
+ 169,
189
+ 170,
190
+ 171,
191
+ 172,
192
+ 173,
193
+ 174,
194
+ 175,
195
+ 176,
196
+ 177,
197
+ 178,
198
+ 179,
199
+ 180,
200
+ 181,
201
+ 182,
202
+ 183,
203
+ 184,
204
+ 185,
205
+ 186,
206
+ 187,
207
+ 188,
208
+ 189,
209
+ 190,
210
+ 191,
211
+ 192,
212
+ 193,
213
+ 194,
214
+ 195,
215
+ 196,
216
+ 197,
217
+ 198,
218
+ 199,
219
+ 200,
220
+ 201,
221
+ 202,
222
+ 203,
223
+ 204,
224
+ 205,
225
+ 206,
226
+ 207,
227
+ 208,
228
+ 209,
229
+ 210,
230
+ 211,
231
+ 212,
232
+ 213,
233
+ 214,
234
+ 215,
235
+ 216,
236
+ 217,
237
+ 218,
238
+ 219,
239
+ 220,
240
+ 221,
241
+ 222,
242
+ 223,
243
+ 224,
244
+ 225,
245
+ 226,
246
+ 227,
247
+ 228,
248
+ 229,
249
+ 230,
250
+ 231,
251
+ 232,
252
+ 233,
253
+ 234,
254
+ 235,
255
+ 236,
256
+ 237,
257
+ 238,
258
+ 239,
259
+ 240,
260
+ 241,
261
+ 242,
262
+ 243,
263
+ 244,
264
+ 245,
265
+ 246,
266
+ 247,
267
+ 248,
268
+ 249,
269
+ 250,
270
+ 251,
271
+ 252,
272
+ 253,
273
+ 254,
274
+ 255,
275
+ 256,
276
+ 257,
277
+ 258,
278
+ 259,
279
+ 260,
280
+ 261,
281
+ 262,
282
+ 263,
283
+ 264,
284
+ 265,
285
+ 266,
286
+ 267,
287
+ 268,
288
+ 269,
289
+ 270,
290
+ 271,
291
+ 272,
292
+ 273,
293
+ 274,
294
+ 275,
295
+ 276,
296
+ 277,
297
+ 278,
298
+ 279,
299
+ 280,
300
+ 281,
301
+ 282,
302
+ 283,
303
+ 284,
304
+ 285,
305
+ 286,
306
+ 287,
307
+ 288,
308
+ 289,
309
+ 290,
310
+ 291,
311
+ 292,
312
+ 293,
313
+ 294,
314
+ 295,
315
+ 296,
316
+ 297,
317
+ 298,
318
+ 299,
319
+ 300,
320
+ 301,
321
+ 302,
322
+ 303,
323
+ 304,
324
+ 305,
325
+ 306,
326
+ 307,
327
+ 308,
328
+ 309,
329
+ 310,
330
+ 311,
331
+ 312,
332
+ 313,
333
+ 314,
334
+ 315,
335
+ 316,
336
+ 317,
337
+ 318,
338
+ 319,
339
+ 320,
340
+ 321,
341
+ 322,
342
+ 323,
343
+ 324,
344
+ 325,
345
+ 326,
346
+ 327,
347
+ 328,
348
+ 329,
349
+ 330,
350
+ 331,
351
+ 332,
352
+ 333,
353
+ 334,
354
+ 335,
355
+ 336,
356
+ 337,
357
+ 338,
358
+ 339,
359
+ 340,
360
+ 341,
361
+ 342,
362
+ 343,
363
+ 344,
364
+ 345,
365
+ 346,
366
+ 347,
367
+ 348,
368
+ 349,
369
+ 350,
370
+ 351,
371
+ 352,
372
+ 353,
373
+ 354,
374
+ 355,
375
+ 356,
376
+ 357,
377
+ 358,
378
+ 359,
379
+ 360,
380
+ 361,
381
+ 362,
382
+ 363,
383
+ 364,
384
+ 365,
385
+ 366,
386
+ 367,
387
+ 368,
388
+ 369,
389
+ 370,
390
+ 371,
391
+ 372,
392
+ 373,
393
+ 374,
394
+ 375,
395
+ 376,
396
+ 377,
397
+ 378,
398
+ 379,
399
+ 380,
400
+ 381,
401
+ 382,
402
+ 383,
403
+ 384,
404
+ 385,
405
+ 386,
406
+ 387,
407
+ 388,
408
+ 389,
409
+ 390,
410
+ 391,
411
+ 392,
412
+ 393,
413
+ 394,
414
+ 395,
415
+ 396,
416
+ 397,
417
+ 398,
418
+ 399,
419
+ 400,
420
+ 401,
421
+ 402,
422
+ 403,
423
+ 404,
424
+ 405,
425
+ 406,
426
+ 407,
427
+ 408,
428
+ 409,
429
+ 410,
430
+ 411,
431
+ 412,
432
+ 413,
433
+ 414,
434
+ 415,
435
+ 416,
436
+ 417,
437
+ 418,
438
+ 419,
439
+ 420,
440
+ 421,
441
+ 422,
442
+ 423,
443
+ 424,
444
+ 425,
445
+ 426,
446
+ 427,
447
+ 428,
448
+ 429,
449
+ 430,
450
+ 431,
451
+ 432,
452
+ 433,
453
+ 434,
454
+ 435,
455
+ 436,
456
+ 437,
457
+ 438,
458
+ 439,
459
+ 440,
460
+ 441,
461
+ 442,
462
+ 443,
463
+ 444,
464
+ 445,
465
+ 446,
466
+ 447,
467
+ 448,
468
+ 449,
469
+ 450,
470
+ 451,
471
+ 452,
472
+ 453,
473
+ 454,
474
+ 455,
475
+ 456,
476
+ 457,
477
+ 458,
478
+ 459,
479
+ 460,
480
+ 461,
481
+ 462,
482
+ 463,
483
+ 464,
484
+ 465,
485
+ 466,
486
+ 467,
487
+ 468,
488
+ 469,
489
+ 470,
490
+ 471,
491
+ 472,
492
+ 473,
493
+ 474,
494
+ 475,
495
+ 476,
496
+ 477,
497
+ 478,
498
+ 479,
499
+ 480,
500
+ 481,
501
+ 482,
502
+ 483,
503
+ 484,
504
+ 485,
505
+ 486,
506
+ 487,
507
+ 488,
508
+ 489,
509
+ 490,
510
+ 491,
511
+ 492,
512
+ 493,
513
+ 494,
514
+ 495,
515
+ 496,
516
+ 497,
517
+ 498,
518
+ 499,
519
+ 500,
520
+ 501,
521
+ 502,
522
+ 503,
523
+ 504,
524
+ 505,
525
+ 506,
526
+ 507,
527
+ 508,
528
+ 509,
529
+ 510,
530
+ 511,
531
+ 512,
532
+ 513,
533
+ 514,
534
+ 515
535
+ ]
536
+ }
537
+ ]
checkpoints/060000/training_state/optimizer_state.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f1b1472227f3579bf9dad78fc92c1d39265b7e953a177420736bef19ed47571
3
+ size 1248899100
checkpoints/060000/training_state/rng_state.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:236fc8d31470b51befb4e56e67358ab7dc83b2a431f063c80b5056af341b87b7
3
+ size 15708
checkpoints/060000/training_state/scheduler_state.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "base_lrs": [
3
+ 0.0001
4
+ ],
5
+ "last_epoch": 60000,
6
+ "_step_count": 60001,
7
+ "_is_initial": false,
8
+ "_get_lr_called_within_step": false,
9
+ "_last_lr": [
10
+ 2.5e-06
11
+ ],
12
+ "lr_lambdas": [
13
+ null
14
+ ]
15
+ }
checkpoints/060000/training_state/training_step.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "step": 60000
3
+ }
wandb/debug-internal.log ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-11-27T11:20:45.378937954+01:00","level":"INFO","msg":"stream: starting","core version":"0.22.3"}
2
+ {"time":"2025-11-27T11:20:45.409919704+01:00","level":"ERROR","msg":"monitor: failed to initialize GPU resource: monitor: could not create portfile"}
3
+ {"time":"2025-11-27T11:20:45.425921086+01:00","level":"WARN","msg":"featurechecker: GraphQL client is nil, skipping feature loading"}
4
+ {"time":"2025-11-27T11:20:45.425965495+01:00","level":"INFO","msg":"stream: created new stream","id":"lizvmqey"}
5
+ {"time":"2025-11-27T11:20:45.425988095+01:00","level":"INFO","msg":"handler: started","stream_id":"lizvmqey"}
6
+ {"time":"2025-11-27T11:20:45.428918285+01:00","level":"INFO","msg":"stream: started","id":"lizvmqey"}
7
+ {"time":"2025-11-27T11:20:45.428917715+01:00","level":"INFO","msg":"sender: started","stream_id":"lizvmqey"}
8
+ {"time":"2025-11-27T11:20:45.428956845+01:00","level":"INFO","msg":"writer: started","stream_id":"lizvmqey"}
9
+ {"time":"2025-11-27T11:20:45.429853649+01:00","level":"WARN","msg":"runupserter: server does not expand metric globs but the x_server_side_expand_glob_metrics setting is set; ignoring"}
10
+ {"time":"2025-11-27T14:51:55.122025249+01:00","level":"INFO","msg":"stream: closing","id":"lizvmqey"}
11
+ {"time":"2025-11-27T14:51:55.125975233+01:00","level":"INFO","msg":"handler: closed","stream_id":"lizvmqey"}
12
+ {"time":"2025-11-27T14:51:55.127619953+01:00","level":"INFO","msg":"sender: closed","stream_id":"lizvmqey"}
13
+ {"time":"2025-11-27T14:51:55.127656972+01:00","level":"INFO","msg":"stream: closed","id":"lizvmqey"}
wandb/debug.log ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-11-27 11:20:44,979 INFO MainThread:1938939 [wandb_setup.py:_flush():81] Current SDK version is 0.22.3
2
+ 2025-11-27 11:20:44,979 INFO MainThread:1938939 [wandb_setup.py:_flush():81] Configure stats pid to 1938939
3
+ 2025-11-27 11:20:44,979 INFO MainThread:1938939 [wandb_setup.py:_flush():81] Loading settings from /user/hominhduy.nguyen/u15271/.config/wandb/settings
4
+ 2025-11-27 11:20:44,979 INFO MainThread:1938939 [wandb_setup.py:_flush():81] Loading settings from /mnt/vast-kisski/projects/kisski-umg-fairpact-2/VLA/binh/Smolvla_capstone_project/wandb/settings
5
+ 2025-11-27 11:20:44,979 INFO MainThread:1938939 [wandb_setup.py:_flush():81] Loading settings from environment variables
6
+ 2025-11-27 11:20:44,979 INFO MainThread:1938939 [wandb_init.py:setup_run_log_directory():706] Logging user logs to /projects/extern/kisski/kisski-umg-fairpact-2/dir.project/VLA/binh/Smolvla_capstone_project/outputs/train/2025-11-25/16-33-59_libero_100%_tlc_v4_process_mask_neighbor_neg_add_linear_different_0.4tlc_base/wandb/offline-run-20251127_112044-lizvmqey/logs/debug.log
7
+ 2025-11-27 11:20:44,979 INFO MainThread:1938939 [wandb_init.py:setup_run_log_directory():707] Logging internal logs to /projects/extern/kisski/kisski-umg-fairpact-2/dir.project/VLA/binh/Smolvla_capstone_project/outputs/train/2025-11-25/16-33-59_libero_100%_tlc_v4_process_mask_neighbor_neg_add_linear_different_0.4tlc_base/wandb/offline-run-20251127_112044-lizvmqey/logs/debug-internal.log
8
+ 2025-11-27 11:20:44,979 INFO MainThread:1938939 [wandb_init.py:init():833] calling init triggers
9
+ 2025-11-27 11:20:44,979 INFO MainThread:1938939 [wandb_init.py:init():838] wandb.init called with sweep_config: {}
10
+ config: {'dataset': {'repo_id': '.', 'root': '/mnt/vast-kisski/projects/kisski-umg-fairpact-2/VLA/LIBERO/merged_libero_scale_100_mask_depth_noops_lerobot', 'episodes': None, 'image_transforms': {'enable': True, 'max_num_transforms': 3, 'random_order': False, 'image_tfs': {'hue': {'weight': 1.0, 'type': 'ColorJitter', 'kwargs': {'hue': [-0.05, 0.05]}}, 'contrast': {'weight': 1.0, 'type': 'ColorJitter', 'kwargs': {'contrast': [0.8, 1.2]}}, 'sharpness': {'weight': 1.0, 'type': 'SharpnessJitter', 'kwargs': {'sharpness': [0.5, 1.5]}}, 'brightness': {'weight': 1.0, 'type': 'ColorJitter', 'kwargs': {'brightness': [0.8, 1.2]}}, 'saturation': {'weight': 1.0, 'type': 'ColorJitter', 'kwargs': {'saturation': [0.5, 1.5]}}, 'crop_resize': {'weight': 1.0, 'type': 'RandomResizedCrop', 'kwargs': {'size': [256, 256], 'ratio': [1, 1], 'scale': [0.9, 0.95]}}, 'rotate': {'weight': 1.0, 'type': 'RandomRotate', 'kwargs': {'degrees': [-5, 5]}}}, 'wrist_tfs': {'hue': {'weight': 1.0, 'type': 'ColorJitter', 'kwargs': {'hue': [-0.05, 0.05]}}, 'contrast': {'weight': 1.0, 'type': 'ColorJitter', 'kwargs': {'contrast': [0.8, 1.2]}}, 'sharpness': {'weight': 1.0, 'type': 'SharpnessJitter', 'kwargs': {'sharpness': [0.5, 1.5]}}, 'brightness': {'weight': 1.0, 'type': 'ColorJitter', 'kwargs': {'brightness': [0.8, 1.2]}}, 'saturation': {'weight': 1.0, 'type': 'ColorJitter', 'kwargs': {'saturation': [0.5, 1.5]}}}}, 'revision': None, 'use_imagenet_stats': True, 'video_backend': 'torchcodec', 'vqa_data_path': None}, 'env': None, 'policy': {'type': 'smolvla', 'n_obs_steps': 1, 'normalization_mapping': {'VISUAL': <NormalizationMode.IDENTITY: 'IDENTITY'>, 'STATE': <NormalizationMode.MEAN_STD: 'MEAN_STD'>, 'ACTION': <NormalizationMode.MEAN_STD: 'MEAN_STD'>}, 'input_features': {'observation.images.image': {'type': <FeatureType.VISUAL: 'VISUAL'>, 'shape': [3, 256, 256]}, 'observation.images.wrist_image': {'type': <FeatureType.VISUAL: 'VISUAL'>, 'shape': [3, 256, 256]}, 'observation.images.image_mask': {'type': <FeatureType.VISUAL: 'VISUAL'>, 'shape': [3, 256, 256]}, 'observation.images.wrist_mask': {'type': <FeatureType.VISUAL: 'VISUAL'>, 'shape': [3, 256, 256]}, 'observation.images.object_of_interest_mask': {'type': <FeatureType.VISUAL: 'VISUAL'>, 'shape': [3, 256, 256]}, 'observation.images.object_of_interest_wrist_mask': {'type': <FeatureType.VISUAL: 'VISUAL'>, 'shape': [3, 256, 256]}, 'observation.state': {'type': <FeatureType.STATE: 'STATE'>, 'shape': [8]}, 'observation.states.ee_state': {'type': <FeatureType.STATE: 'STATE'>, 'shape': [6]}, 'observation.states.joint_state': {'type': <FeatureType.STATE: 'STATE'>, 'shape': [7]}, 'observation.states.gripper_state': {'type': <FeatureType.STATE: 'STATE'>, 'shape': [2]}}, 'output_features': {'action': {'type': <FeatureType.ACTION: 'ACTION'>, 'shape': [7]}}, 'device': 'cuda', 'use_amp': False, 'gradient_accumulation_steps': 1, 'chunk_size': 50, 'n_action_steps': 50, 'max_state_dim': 32, 'max_action_dim': 32, 'resize_imgs_with_padding': [512, 512], 'empty_cameras': 0, 'adapt_to_pi_aloha': False, 'use_delta_joint_actions_aloha': False, 'tokenizer_max_length': 48, 'num_steps': 10, 'use_cache': True, 'freeze_vision_encoder': True, 'train_expert_only': False, 'train_state_proj': True, 'optimizer_lr': 0.0001, 'optimizer_betas': [0.9, 0.95], 'optimizer_eps': 1e-08, 'optimizer_weight_decay': 1e-10, 'optimizer_grad_clip_norm': 10.0, 'scheduler_warmup_steps': 1000, 'scheduler_decay_steps': 30000, 'scheduler_decay_lr': 2.5e-06, 'vlm_model_name': 'HuggingFaceTB/SmolVLM2-500M-Video-Instruct', 'load_vlm_weights': True, 'add_image_special_tokens': False, 'attention_mode': 'cross_attn', 'prefix_length': 0, 'pad_language_to': 'max_length', 'num_expert_layers': 0, 'num_vlm_layers': 16, 'self_attn_every_n_layers': 2, 'expert_width_multiplier': 0.75, 'min_period': 0.004, 'max_period': 4.0}, 'output_dir': '/projects/extern/kisski/kisski-umg-fairpact-2/dir.project/VLA/binh/Smolvla_capstone_project/outputs/train/2025-11-25/16-33-59_libero_100%_tlc_v4_process_mask_neighbor_neg_add_linear_different_0.4tlc_base', 'job_name': 'libero_100%_tlc_v4_process_mask_neighbor_neg_add_linear_different_0.4tlc_base', 'resume': True, 'seed': 42, 'num_workers': 8, 'batch_size': 64, 'steps': 100000, 'eval_freq': 20000, 'log_freq': 10, 'save_checkpoint': True, 'save_freq': 10000, 'use_policy_training_preset': True, 'optimizer': {'type': 'adamw', 'lr': 0.0001, 'weight_decay': 1e-10, 'grad_clip_norm': 10.0, 'betas': [0.9, 0.95], 'eps': 1e-08}, 'scheduler': {'type': 'cosine_decay_with_warmup', 'num_warmup_steps': 1000, 'num_decay_steps': 30000, 'peak_lr': 0.0001, 'decay_lr': 2.5e-06}, 'eval': {'n_episodes': 50, 'batch_size': 50, 'use_async_envs': False}, 'wandb': {'enable': True, 'disable_artifact': True, 'project': 'smolvla', 'entity': 'Robotics_VLA', 'notes': None, 'run_id': None, 'mode': 'offline'}, '_wandb': {}}
11
+ 2025-11-27 11:20:44,979 INFO MainThread:1938939 [wandb_init.py:init():881] starting backend
12
+ 2025-11-27 11:20:45,217 INFO MainThread:1938939 [wandb_init.py:init():884] sending inform_init request
13
+ 2025-11-27 11:20:45,227 INFO MainThread:1938939 [wandb_init.py:init():892] backend started and connected
14
+ 2025-11-27 11:20:45,229 INFO MainThread:1938939 [wandb_init.py:init():962] updated telemetry
15
+ 2025-11-27 11:20:45,248 INFO MainThread:1938939 [wandb_init.py:init():986] communicating run to backend with 90.0 second timeout
16
+ 2025-11-27 11:20:45,432 INFO MainThread:1938939 [wandb_init.py:init():1033] starting run threads in backend
17
+ 2025-11-27 11:20:45,788 INFO MainThread:1938939 [wandb_run.py:_console_start():2506] atexit reg
18
+ 2025-11-27 11:20:45,788 INFO MainThread:1938939 [wandb_run.py:_redirect():2354] redirect: wrap_raw
19
+ 2025-11-27 11:20:45,789 INFO MainThread:1938939 [wandb_run.py:_redirect():2423] Wrapping output streams.
20
+ 2025-11-27 11:20:45,789 INFO MainThread:1938939 [wandb_run.py:_redirect():2446] Redirects installed.
21
+ 2025-11-27 11:20:45,800 INFO MainThread:1938939 [wandb_init.py:init():1073] run started, returning control to user process
22
+ 2025-11-27 14:51:55,121 INFO wandb-AsyncioManager-main:1938939 [service_client.py:_forward_responses():80] Reached EOF.
23
+ 2025-11-27 14:51:55,122 INFO wandb-AsyncioManager-main:1938939 [mailbox.py:close():137] Closing mailbox, abandoning 0 handles.
24
+ 2025-11-27 14:51:55,307 ERROR wandb-AsyncioManager-main:1938939 [asyncio_manager.py:fn_wrap_exceptions():183] Uncaught exception in run_soon callback.
25
+ Traceback (most recent call last):
26
+ File "/projects/extern/kisski/kisski-umg-fairpact-2/dir.project/miniconda3/envs/smol/lib/python3.10/site-packages/wandb/sdk/lib/asyncio_manager.py", line 181, in fn_wrap_exceptions
27
+ await fn()
28
+ File "/projects/extern/kisski/kisski-umg-fairpact-2/dir.project/miniconda3/envs/smol/lib/python3.10/site-packages/wandb/sdk/lib/service/service_client.py", line 38, in publish
29
+ await self._send_server_request(request)
30
+ File "/projects/extern/kisski/kisski-umg-fairpact-2/dir.project/miniconda3/envs/smol/lib/python3.10/site-packages/wandb/sdk/lib/service/service_client.py", line 64, in _send_server_request
31
+ await self._writer.drain()
32
+ File "/projects/extern/kisski/kisski-umg-fairpact-2/dir.project/miniconda3/envs/smol/lib/python3.10/asyncio/streams.py", line 371, in drain
33
+ await self._protocol._drain_helper()
34
+ File "/projects/extern/kisski/kisski-umg-fairpact-2/dir.project/miniconda3/envs/smol/lib/python3.10/asyncio/streams.py", line 167, in _drain_helper
35
+ raise ConnectionResetError('Connection lost')
36
+ ConnectionResetError: Connection lost
37
+ 2025-11-27 14:51:55,322 ERROR wandb-AsyncioManager-main:1938939 [asyncio_manager.py:fn_wrap_exceptions():183] Uncaught exception in run_soon callback.
38
+ Traceback (most recent call last):
39
+ File "/projects/extern/kisski/kisski-umg-fairpact-2/dir.project/miniconda3/envs/smol/lib/python3.10/site-packages/wandb/sdk/lib/asyncio_manager.py", line 181, in fn_wrap_exceptions
40
+ await fn()
41
+ File "/projects/extern/kisski/kisski-umg-fairpact-2/dir.project/miniconda3/envs/smol/lib/python3.10/site-packages/wandb/sdk/lib/service/service_client.py", line 38, in publish
42
+ await self._send_server_request(request)
43
+ File "/projects/extern/kisski/kisski-umg-fairpact-2/dir.project/miniconda3/envs/smol/lib/python3.10/site-packages/wandb/sdk/lib/service/service_client.py", line 64, in _send_server_request
44
+ await self._writer.drain()
45
+ File "/projects/extern/kisski/kisski-umg-fairpact-2/dir.project/miniconda3/envs/smol/lib/python3.10/asyncio/streams.py", line 371, in drain
46
+ await self._protocol._drain_helper()
47
+ File "/projects/extern/kisski/kisski-umg-fairpact-2/dir.project/miniconda3/envs/smol/lib/python3.10/asyncio/streams.py", line 167, in _drain_helper
48
+ raise ConnectionResetError('Connection lost')
49
+ ConnectionResetError: Connection lost
wandb/offline-run-20251125_163418-lizvmqey/files/config.yaml ADDED
@@ -0,0 +1,326 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wandb_version: 1
2
+
3
+ _wandb:
4
+ desc: null
5
+ value:
6
+ python_version: 3.10.19
7
+ cli_version: 0.22.3
8
+ framework: huggingface
9
+ is_jupyter_run: false
10
+ is_kaggle_kernel: false
11
+ start_time: 1764084858
12
+ t:
13
+ 1:
14
+ - 1
15
+ - 41
16
+ - 49
17
+ - 51
18
+ - 71
19
+ 2:
20
+ - 1
21
+ - 2
22
+ - 3
23
+ - 11
24
+ - 41
25
+ - 49
26
+ - 51
27
+ - 71
28
+ 3:
29
+ - 4
30
+ - 13
31
+ - 15
32
+ - 16
33
+ - 42
34
+ - 61
35
+ 4: 3.10.19
36
+ 5: 0.22.3
37
+ 13: linux-x86_64
38
+ e:
39
+ tgzz60htaej5eivyg1ohkojicqbx6n35:
40
+ os: Linux-4.18.0-553.77.1.el8_10.x86_64-x86_64-with-glibc2.28
41
+ python: CPython 3.10.19
42
+ started_at: '2025-11-25T15:34:18.399929Z'
43
+ args:
44
+ - --policy.path=/mnt/vast-kisski/projects/kisski-umg-fairpact-2/VLA/binh/smol_pretrained/smolvla_base
45
+ - --dataset.root=/mnt/vast-kisski/projects/kisski-umg-fairpact-2/VLA/LIBERO/merged_libero_scale_100_mask_depth_noops_lerobot
46
+ - --output_dir=outputs/train/2025-11-25/16-33-59_libero_100%_tlc_v4_process_mask_neighbor_neg_add_linear_different_0.4tlc_base
47
+ - --job_name=libero_100%_tlc_v4_process_mask_neighbor_neg_add_linear_different_0.4tlc_base
48
+ - --config_path=configs/smolvla_config/other_default.json
49
+ - --batch_size=64
50
+ - --steps=100000
51
+ - --policy.gradient_accumulation_steps=1
52
+ - --wandb.mode=offline
53
+ - --log_freq=10
54
+ program: /mnt/vast-kisski/projects/kisski-umg-fairpact-2/VLA/binh/Smolvla_capstone_project/lerobot/scripts/train_accelerate.py
55
+ code_path: lerobot/scripts/train_accelerate.py
56
+ code_path_local: lerobot/scripts/train_accelerate.py
57
+ git:
58
+ remote_url: git@github-binh:jibby2803/Smolvla_capstone_project.git
59
+ commit: 0f72ba315e381d6a8095782824a2d0c70faad469
60
+ root: outputs/train/2025-11-25/16-33-59_libero_100%_tlc_v4_process_mask_neighbor_neg_add_linear_different_0.4tlc_base
61
+ host: ggpu192
62
+ executable: /projects/extern/kisski/kisski-umg-fairpact-2/dir.project/miniconda3/envs/smol/bin/python3.10
63
+ cpu_count: 64
64
+ cpu_count_logical: 128
65
+ disk:
66
+ /:
67
+ total: '270465425408'
68
+ used: '7019204608'
69
+ memory:
70
+ total: '540930854912'
71
+ slurm:
72
+ job_id: '11843069'
73
+ mpi_type: pmi2
74
+ writer_id: tgzz60htaej5eivyg1ohkojicqbx6n35
75
+ dataset:
76
+ desc: null
77
+ value:
78
+ repo_id: .
79
+ root: /mnt/vast-kisski/projects/kisski-umg-fairpact-2/VLA/LIBERO/merged_libero_scale_100_mask_depth_noops_lerobot
80
+ episodes: null
81
+ image_transforms:
82
+ enable: true
83
+ max_num_transforms: 3
84
+ random_order: false
85
+ image_tfs:
86
+ hue:
87
+ weight: 1.0
88
+ type: ColorJitter
89
+ kwargs:
90
+ hue:
91
+ - -0.05
92
+ - 0.05
93
+ contrast:
94
+ weight: 1.0
95
+ type: ColorJitter
96
+ kwargs:
97
+ contrast:
98
+ - 0.8
99
+ - 1.2
100
+ sharpness:
101
+ weight: 1.0
102
+ type: SharpnessJitter
103
+ kwargs:
104
+ sharpness:
105
+ - 0.5
106
+ - 1.5
107
+ brightness:
108
+ weight: 1.0
109
+ type: ColorJitter
110
+ kwargs:
111
+ brightness:
112
+ - 0.8
113
+ - 1.2
114
+ saturation:
115
+ weight: 1.0
116
+ type: ColorJitter
117
+ kwargs:
118
+ saturation:
119
+ - 0.5
120
+ - 1.5
121
+ crop_resize:
122
+ weight: 1.0
123
+ type: RandomResizedCrop
124
+ kwargs:
125
+ size:
126
+ - 256
127
+ - 256
128
+ ratio:
129
+ - 1
130
+ - 1
131
+ scale:
132
+ - 0.9
133
+ - 0.95
134
+ rotate:
135
+ weight: 1.0
136
+ type: RandomRotate
137
+ kwargs:
138
+ degrees:
139
+ - -5
140
+ - 5
141
+ wrist_tfs:
142
+ hue:
143
+ weight: 1.0
144
+ type: ColorJitter
145
+ kwargs:
146
+ hue:
147
+ - -0.05
148
+ - 0.05
149
+ contrast:
150
+ weight: 1.0
151
+ type: ColorJitter
152
+ kwargs:
153
+ contrast:
154
+ - 0.8
155
+ - 1.2
156
+ sharpness:
157
+ weight: 1.0
158
+ type: SharpnessJitter
159
+ kwargs:
160
+ sharpness:
161
+ - 0.5
162
+ - 1.5
163
+ brightness:
164
+ weight: 1.0
165
+ type: ColorJitter
166
+ kwargs:
167
+ brightness:
168
+ - 0.8
169
+ - 1.2
170
+ saturation:
171
+ weight: 1.0
172
+ type: ColorJitter
173
+ kwargs:
174
+ saturation:
175
+ - 0.5
176
+ - 1.5
177
+ revision: null
178
+ use_imagenet_stats: true
179
+ video_backend: torchcodec
180
+ vqa_data_path: null
181
+ env:
182
+ desc: null
183
+ value: null
184
+ policy:
185
+ desc: null
186
+ value:
187
+ type: smolvla
188
+ n_obs_steps: 1
189
+ normalization_mapping:
190
+ VISUAL: IDENTITY
191
+ STATE: MEAN_STD
192
+ ACTION: MEAN_STD
193
+ input_features:
194
+ observation.images.image:
195
+ type: VISUAL
196
+ shape:
197
+ - 3
198
+ - 256
199
+ - 256
200
+ observation.images.wrist_image:
201
+ type: VISUAL
202
+ shape:
203
+ - 3
204
+ - 256
205
+ - 256
206
+ observation.state:
207
+ type: STATE
208
+ shape:
209
+ - 8
210
+ output_features:
211
+ action:
212
+ type: ACTION
213
+ shape:
214
+ - 7
215
+ device: cuda
216
+ use_amp: false
217
+ gradient_accumulation_steps: 1
218
+ chunk_size: 50
219
+ n_action_steps: 50
220
+ max_state_dim: 32
221
+ max_action_dim: 32
222
+ resize_imgs_with_padding:
223
+ - 512
224
+ - 512
225
+ empty_cameras: 0
226
+ adapt_to_pi_aloha: false
227
+ use_delta_joint_actions_aloha: false
228
+ tokenizer_max_length: 48
229
+ num_steps: 10
230
+ use_cache: true
231
+ freeze_vision_encoder: true
232
+ train_expert_only: false
233
+ train_state_proj: true
234
+ optimizer_lr: 0.0001
235
+ optimizer_betas:
236
+ - 0.9
237
+ - 0.95
238
+ optimizer_eps: 1.0e-08
239
+ optimizer_weight_decay: 1.0e-10
240
+ optimizer_grad_clip_norm: 10.0
241
+ scheduler_warmup_steps: 1000
242
+ scheduler_decay_steps: 30000
243
+ scheduler_decay_lr: 2.5e-06
244
+ vlm_model_name: HuggingFaceTB/SmolVLM2-500M-Video-Instruct
245
+ load_vlm_weights: true
246
+ add_image_special_tokens: false
247
+ attention_mode: cross_attn
248
+ prefix_length: 0
249
+ pad_language_to: max_length
250
+ num_expert_layers: 0
251
+ num_vlm_layers: 16
252
+ self_attn_every_n_layers: 2
253
+ expert_width_multiplier: 0.75
254
+ min_period: 0.004
255
+ max_period: 4.0
256
+ output_dir:
257
+ desc: null
258
+ value: outputs/train/2025-11-25/16-33-59_libero_100%_tlc_v4_process_mask_neighbor_neg_add_linear_different_0.4tlc_base
259
+ job_name:
260
+ desc: null
261
+ value: libero_100%_tlc_v4_process_mask_neighbor_neg_add_linear_different_0.4tlc_base
262
+ resume:
263
+ desc: null
264
+ value: false
265
+ seed:
266
+ desc: null
267
+ value: 42
268
+ num_workers:
269
+ desc: null
270
+ value: 8
271
+ batch_size:
272
+ desc: null
273
+ value: 64
274
+ steps:
275
+ desc: null
276
+ value: 100000
277
+ eval_freq:
278
+ desc: null
279
+ value: 20000
280
+ log_freq:
281
+ desc: null
282
+ value: 10
283
+ save_checkpoint:
284
+ desc: null
285
+ value: true
286
+ save_freq:
287
+ desc: null
288
+ value: 10000
289
+ use_policy_training_preset:
290
+ desc: null
291
+ value: true
292
+ optimizer:
293
+ desc: null
294
+ value:
295
+ type: adamw
296
+ lr: 0.0001
297
+ weight_decay: 1.0e-10
298
+ grad_clip_norm: 10.0
299
+ betas:
300
+ - 0.9
301
+ - 0.95
302
+ eps: 1.0e-08
303
+ scheduler:
304
+ desc: null
305
+ value:
306
+ type: cosine_decay_with_warmup
307
+ num_warmup_steps: 1000
308
+ num_decay_steps: 30000
309
+ peak_lr: 0.0001
310
+ decay_lr: 2.5e-06
311
+ eval:
312
+ desc: null
313
+ value:
314
+ n_episodes: 50
315
+ batch_size: 50
316
+ use_async_envs: false
317
+ wandb:
318
+ desc: null
319
+ value:
320
+ enable: true
321
+ disable_artifact: true
322
+ project: smolvla
323
+ entity: Robotics_VLA
324
+ notes: null
325
+ run_id: null
326
+ mode: offline
wandb/offline-run-20251125_163418-lizvmqey/files/output.log ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ INFO 2025-11-25 16:34:19 celerate.py:159 Creating dataset
2
+ Resolving data files: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1676/1676 [00:00<00:00, 21230.57it/s]
3
+ Loading dataset shards: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 78/78 [00:00<00:00, 191.37it/s]
4
+ INFO 2025-11-25 16:34:26 celerate.py:170 Creating policy
5
+ `torch_dtype` is deprecated! Use `dtype` instead!
6
+ INFO 2025-11-25 16:34:37 celerate.py:181 Creating optimizer and scheduler
7
+ INFO 2025-11-25 16:34:37 celerate.py:221 Output dir: outputs/train/2025-11-25/16-33-59_libero_100%_tlc_v4_process_mask_neighbor_neg_add_linear_different_0.4tlc_base
8
+ INFO 2025-11-25 16:34:37 celerate.py:224 cfg.steps=100000 (100K)
9
+ INFO 2025-11-25 16:34:37 celerate.py:225 dataset.num_frames=269918 (270K)
10
+ INFO 2025-11-25 16:34:37 celerate.py:226 dataset.num_episodes=1676
11
+ INFO 2025-11-25 16:34:37 celerate.py:227 num_learnable_params=317226912 (317M)
12
+ INFO 2025-11-25 16:34:37 celerate.py:228 num_total_params=450968810 (451M)
13
+ INFO 2025-11-25 16:34:37 celerate.py:229 Number of processes: 1
14
+ INFO 2025-11-25 16:34:37 celerate.py:230 Device: cuda
15
+ INFO 2025-11-25 16:34:37 celerate.py:231 Mixed precision: no
16
+ INFO 2025-11-25 16:34:37 celerate.py:253 Start offline training on a fixed dataset
17
+ load pretrained policy
18
+ Loading HuggingFaceTB/SmolVLM2-500M-Video-Instruct weights ...
19
+ Load pretrained VLM successfully!
20
+ Reducing the number of VLM layers to 16 ...
21
+ =================================
22
+ USING MODEL FROM BINH'S DIR
23
+ =================================
24
+ Loading weights from local directory
25
+ INFO 2025-11-25 16:34:57 celerate.py:303 step:10 smpl:640 ep:4 epch:0.00 loss:2.883 grdn:11.068 lr:6.5e-07 updt_s:1.108 data_s:0.784
26
+ INFO 2025-11-25 16:35:06 celerate.py:303 step:20 smpl:1K ep:8 epch:0.00 loss:2.981 grdn:11.440 lr:1.6e-06 updt_s:0.797 data_s:0.036
27
+ INFO 2025-11-25 16:35:15 celerate.py:303 step:30 smpl:2K ep:12 epch:0.01 loss:2.793 grdn:10.541 lr:2.6e-06 updt_s:0.802 data_s:0.039
28
+ INFO 2025-11-25 16:35:25 celerate.py:303 step:40 smpl:3K ep:16 epch:0.01 loss:2.765 grdn:10.229 lr:3.6e-06 updt_s:0.851 data_s:0.057
29
+ INFO 2025-11-25 16:35:34 celerate.py:303 step:50 smpl:3K ep:20 epch:0.01 loss:2.514 grdn:8.306 lr:4.6e-06 updt_s:0.802 data_s:0.034
30
+ INFO 2025-11-25 16:35:43 celerate.py:303 step:60 smpl:4K ep:24 epch:0.01 loss:2.231 grdn:7.228 lr:5.6e-06 updt_s:0.793 data_s:0.039
31
+ INFO 2025-11-25 16:35:52 celerate.py:303 step:70 smpl:4K ep:28 epch:0.02 loss:2.163 grdn:6.576 lr:6.6e-06 updt_s:0.810 data_s:0.032
32
+ INFO 2025-11-25 16:36:01 celerate.py:303 step:80 smpl:5K ep:32 epch:0.02 loss:1.997 grdn:5.803 lr:7.6e-06 updt_s:0.791 data_s:0.033
33
+ INFO 2025-11-25 16:36:10 celerate.py:303 step:90 smpl:6K ep:36 epch:0.02 loss:1.715 grdn:4.409 lr:8.6e-06 updt_s:0.795 data_s:0.039
34
+ INFO 2025-11-25 16:36:20 celerate.py:303 step:100 smpl:6K ep:40 epch:0.02 loss:1.668 grdn:3.878 lr:9.6e-06 updt_s:0.800 data_s:0.038
35
+ INFO 2025-11-25 16:36:29 celerate.py:303 step:110 smpl:7K ep:44 epch:0.03 loss:1.624 grdn:3.729 lr:1.1e-05 updt_s:0.802 data_s:0.031
36
+ INFO 2025-11-25 16:36:38 celerate.py:303 step:120 smpl:8K ep:48 epch:0.03 loss:1.479 grdn:3.140 lr:1.2e-05 updt_s:0.806 data_s:0.037
37
+ INFO 2025-11-25 16:36:47 celerate.py:303 step:130 smpl:8K ep:52 epch:0.03 loss:1.371 grdn:2.696 lr:1.3e-05 updt_s:0.801 data_s:0.034
38
+ INFO 2025-11-25 16:36:56 celerate.py:303 step:140 smpl:9K ep:56 epch:0.03 loss:1.314 grdn:2.589 lr:1.4e-05 updt_s:0.800 data_s:0.040
39
+ INFO 2025-11-25 16:37:05 celerate.py:303 step:150 smpl:10K ep:60 epch:0.04 loss:1.230 grdn:2.407 lr:1.5e-05 updt_s:0.798 data_s:0.033
40
+ INFO 2025-11-25 16:37:14 celerate.py:303 step:160 smpl:10K ep:64 epch:0.04 loss:1.169 grdn:2.512 lr:1.6e-05 updt_s:0.791 data_s:0.032
41
+ INFO 2025-11-25 16:37:23 celerate.py:303 step:170 smpl:11K ep:68 epch:0.04 loss:1.085 grdn:2.029 lr:1.7e-05 updt_s:0.794 data_s:0.035
42
+ INFO 2025-11-25 16:37:32 celerate.py:303 step:180 smpl:12K ep:72 epch:0.04 loss:1.038 grdn:2.114 lr:1.8e-05 updt_s:0.796 data_s:0.034
43
+ INFO 2025-11-25 16:37:41 celerate.py:303 step:190 smpl:12K ep:76 epch:0.05 loss:0.973 grdn:1.941 lr:1.9e-05 updt_s:0.796 data_s:0.035
44
+ INFO 2025-11-25 16:37:51 celerate.py:303 step:200 smpl:13K ep:79 epch:0.05 loss:0.950 grdn:2.031 lr:2.0e-05 updt_s:0.797 data_s:0.033
45
+ INFO 2025-11-25 16:38:00 celerate.py:303 step:210 smpl:13K ep:83 epch:0.05 loss:0.903 grdn:1.960 lr:2.1e-05 updt_s:0.798 data_s:0.033
46
+ INFO 2025-11-25 16:38:09 celerate.py:303 step:220 smpl:14K ep:87 epch:0.05 loss:0.854 grdn:2.069 lr:2.2e-05 updt_s:0.804 data_s:0.032
47
+ INFO 2025-11-25 16:38:18 celerate.py:303 step:230 smpl:15K ep:91 epch:0.05 loss:0.832 grdn:2.082 lr:2.3e-05 updt_s:0.795 data_s:0.033
48
+ INFO 2025-11-25 16:38:27 celerate.py:303 step:240 smpl:15K ep:95 epch:0.06 loss:0.766 grdn:2.192 lr:2.4e-05 updt_s:0.798 data_s:0.035
49
+ INFO 2025-11-25 16:38:36 celerate.py:303 step:250 smpl:16K ep:99 epch:0.06 loss:0.762 grdn:1.971 lr:2.5e-05 updt_s:0.796 data_s:0.037
50
+ INFO 2025-11-25 16:38:45 celerate.py:303 step:260 smpl:17K ep:103 epch:0.06 loss:0.762 grdn:2.012 lr:2.6e-05 updt_s:0.801 data_s:0.032
51
+ INFO 2025-11-25 16:38:55 celerate.py:303 step:270 smpl:17K ep:107 epch:0.06 loss:0.744 grdn:1.818 lr:2.7e-05 updt_s:0.837 data_s:0.035
52
+ INFO 2025-11-25 16:39:04 celerate.py:303 step:280 smpl:18K ep:111 epch:0.07 loss:0.731 grdn:1.822 lr:2.8e-05 updt_s:0.832 data_s:0.035
53
+ INFO 2025-11-25 16:39:23 celerate.py:303 step:290 smpl:19K ep:115 epch:0.07 loss:0.672 grdn:1.998 lr:2.9e-05 updt_s:1.618 data_s:0.081
54
+ INFO 2025-11-25 16:39:48 celerate.py:303 step:300 smpl:19K ep:119 epch:0.07 loss:0.663 grdn:2.073 lr:3.0e-05 updt_s:2.173 data_s:0.064
55
+ INFO 2025-11-25 16:40:13 celerate.py:303 step:310 smpl:20K ep:123 epch:0.07 loss:0.636 grdn:1.682 lr:3.1e-05 updt_s:2.124 data_s:0.070
56
+ INFO 2025-11-25 16:40:37 celerate.py:303 step:320 smpl:20K ep:127 epch:0.08 loss:0.619 grdn:1.908 lr:3.2e-05 updt_s:2.108 data_s:0.056
57
+ INFO 2025-11-25 16:41:01 celerate.py:303 step:330 smpl:21K ep:131 epch:0.08 loss:0.608 grdn:2.209 lr:3.3e-05 updt_s:2.089 data_s:0.062
58
+ INFO 2025-11-25 16:41:27 celerate.py:303 step:340 smpl:22K ep:135 epch:0.08 loss:0.586 grdn:1.878 lr:3.4e-05 updt_s:2.270 data_s:0.054
59
+ INFO 2025-11-25 16:41:52 celerate.py:303 step:350 smpl:22K ep:139 epch:0.08 loss:0.566 grdn:2.130 lr:3.5e-05 updt_s:2.272 data_s:0.048
60
+ INFO 2025-11-25 16:42:18 celerate.py:303 step:360 smpl:23K ep:143 epch:0.09 loss:0.550 grdn:1.780 lr:3.6e-05 updt_s:2.284 data_s:0.049
61
+ INFO 2025-11-25 16:42:43 celerate.py:303 step:370 smpl:24K ep:147 epch:0.09 loss:0.544 grdn:2.035 lr:3.7e-05 updt_s:2.242 data_s:0.050
62
+ INFO 2025-11-25 16:43:09 celerate.py:303 step:380 smpl:24K ep:151 epch:0.09 loss:0.498 grdn:1.997 lr:3.8e-05 updt_s:2.279 data_s:0.048
63
+ INFO 2025-11-25 16:43:34 celerate.py:303 step:390 smpl:25K ep:155 epch:0.09 loss:0.509 grdn:1.968 lr:3.9e-05 updt_s:2.252 data_s:0.053
64
+ INFO 2025-11-25 16:43:58 celerate.py:303 step:400 smpl:26K ep:159 epch:0.09 loss:0.515 grdn:1.930 lr:4.0e-05 updt_s:2.019 data_s:0.064
65
+ INFO 2025-11-25 16:44:23 celerate.py:303 step:410 smpl:26K ep:163 epch:0.10 loss:0.496 grdn:1.964 lr:4.1e-05 updt_s:2.203 data_s:0.065
66
+ INFO 2025-11-25 16:44:47 celerate.py:303 step:420 smpl:27K ep:167 epch:0.10 loss:0.507 grdn:2.167 lr:4.2e-05 updt_s:2.108 data_s:0.055
67
+ INFO 2025-11-25 16:45:12 celerate.py:303 step:430 smpl:28K ep:171 epch:0.10 loss:0.476 grdn:2.264 lr:4.3e-05 updt_s:2.246 data_s:0.054
68
+ INFO 2025-11-25 16:45:38 celerate.py:303 step:440 smpl:28K ep:175 epch:0.10 loss:0.483 grdn:2.109 lr:4.4e-05 updt_s:2.295 data_s:0.047
69
+ INFO 2025-11-25 16:46:02 celerate.py:303 step:450 smpl:29K ep:179 epch:0.11 loss:0.463 grdn:2.102 lr:4.5e-05 updt_s:2.133 data_s:0.055
70
+ INFO 2025-11-25 16:46:28 celerate.py:303 step:460 smpl:29K ep:183 epch:0.11 loss:0.474 grdn:2.084 lr:4.6e-05 updt_s:2.244 data_s:0.047
71
+ INFO 2025-11-25 16:46:51 celerate.py:303 step:470 smpl:30K ep:187 epch:0.11 loss:0.469 grdn:2.300 lr:4.7e-05 updt_s:2.055 data_s:0.062
72
+ INFO 2025-11-25 16:47:16 celerate.py:303 step:480 smpl:31K ep:191 epch:0.11 loss:0.452 grdn:1.881 lr:4.8e-05 updt_s:2.127 data_s:0.059
73
+ INFO 2025-11-25 16:47:40 celerate.py:303 step:490 smpl:31K ep:195 epch:0.12 loss:0.429 grdn:1.763 lr:4.9e-05 updt_s:2.104 data_s:0.059
74
+ INFO 2025-11-25 16:48:04 celerate.py:303 step:500 smpl:32K ep:199 epch:0.12 loss:0.443 grdn:1.952 lr:5.0e-05 updt_s:2.109 data_s:0.056
75
+ INFO 2025-11-25 16:48:28 celerate.py:303 step:510 smpl:33K ep:203 epch:0.12 loss:0.447 grdn:2.047 lr:5.1e-05 updt_s:2.124 data_s:0.064
76
+ INFO 2025-11-25 16:48:52 celerate.py:303 step:520 smpl:33K ep:207 epch:0.12 loss:0.432 grdn:1.912 lr:5.2e-05 updt_s:2.067 data_s:0.061
77
+ INFO 2025-11-25 16:49:15 celerate.py:303 step:530 smpl:34K ep:211 epch:0.13 loss:0.431 grdn:2.212 lr:5.3e-05 updt_s:2.078 data_s:0.059
78
+ INFO 2025-11-25 16:49:39 celerate.py:303 step:540 smpl:35K ep:215 epch:0.13 loss:0.431 grdn:2.405 lr:5.4e-05 updt_s:2.098 data_s:0.055
79
+ INFO 2025-11-25 16:50:03 celerate.py:303 step:550 smpl:35K ep:219 epch:0.13 loss:0.411 grdn:2.581 lr:5.5e-05 updt_s:2.125 data_s:0.055
80
+ INFO 2025-11-25 16:50:27 celerate.py:303 step:560 smpl:36K ep:223 epch:0.13 loss:0.393 grdn:2.113 lr:5.6e-05 updt_s:2.099 data_s:0.058
81
+ INFO 2025-11-25 16:50:51 celerate.py:303 step:570 smpl:36K ep:227 epch:0.14 loss:0.422 grdn:1.959 lr:5.7e-05 updt_s:2.095 data_s:0.059
82
+ INFO 2025-11-25 16:51:15 celerate.py:303 step:580 smpl:37K ep:230 epch:0.14 loss:0.396 grdn:2.173 lr:5.8e-05 updt_s:2.096 data_s:0.064
83
+ INFO 2025-11-25 16:51:39 celerate.py:303 step:590 smpl:38K ep:234 epch:0.14 loss:0.405 grdn:2.552 lr:5.9e-05 updt_s:2.085 data_s:0.060
84
+ INFO 2025-11-25 16:52:03 celerate.py:303 step:600 smpl:38K ep:238 epch:0.14 loss:0.391 grdn:2.288 lr:6.0e-05 updt_s:2.086 data_s:0.055
85
+ INFO 2025-11-25 16:52:27 celerate.py:303 step:610 smpl:39K ep:242 epch:0.14 loss:0.404 grdn:2.486 lr:6.1e-05 updt_s:2.109 data_s:0.061
86
+ INFO 2025-11-25 16:52:51 celerate.py:303 step:620 smpl:40K ep:246 epch:0.15 loss:0.405 grdn:2.145 lr:6.2e-05 updt_s:2.083 data_s:0.059
87
+ INFO 2025-11-25 16:53:15 celerate.py:303 step:630 smpl:40K ep:250 epch:0.15 loss:0.391 grdn:1.959 lr:6.3e-05 updt_s:2.101 data_s:0.056
88
+ INFO 2025-11-25 16:53:38 celerate.py:303 step:640 smpl:41K ep:254 epch:0.15 loss:0.420 grdn:2.396 lr:6.4e-05 updt_s:2.075 data_s:0.060
89
+ INFO 2025-11-25 16:54:02 celerate.py:303 step:650 smpl:42K ep:258 epch:0.15 loss:0.375 grdn:2.170 lr:6.5e-05 updt_s:2.057 data_s:0.062
90
+ INFO 2025-11-25 16:54:26 celerate.py:303 step:660 smpl:42K ep:262 epch:0.16 loss:0.400 grdn:2.426 lr:6.6e-05 updt_s:2.124 data_s:0.054
91
+ INFO 2025-11-25 16:54:50 celerate.py:303 step:670 smpl:43K ep:266 epch:0.16 loss:0.380 grdn:2.022 lr:6.7e-05 updt_s:2.080 data_s:0.055
wandb/offline-run-20251125_163418-lizvmqey/files/requirements.txt ADDED
@@ -0,0 +1,286 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ PySocks==1.7.1
2
+ beautifulsoup4==4.14.2
3
+ gdown==5.2.0
4
+ soupsieve==2.8
5
+ Farama-Notifications==0.0.4
6
+ PyOpenGL==3.1.0
7
+ PySocks==1.7.1
8
+ Send2Trash==1.8.3
9
+ absl-py==2.3.1
10
+ accelerate==1.11.0
11
+ aiohappyeyeballs==2.6.1
12
+ aiohttp==3.13.2
13
+ aiosignal==1.4.0
14
+ annotated-types==0.7.0
15
+ antlr4-python3-runtime==4.8
16
+ anyio==4.11.0
17
+ argon2-cffi==25.1.0
18
+ argon2-cffi-bindings==25.1.0
19
+ arrow==1.4.0
20
+ asciitree==0.3.3
21
+ asttokens==3.0.0
22
+ astunparse==1.6.3
23
+ async-lru==2.0.5
24
+ async-timeout==5.0.1
25
+ attrs==25.4.0
26
+ av==16.0.1
27
+ babel==2.17.0
28
+ bddl==3.6.0
29
+ beautifulsoup4==4.14.2
30
+ bleach==6.3.0
31
+ blinker==1.9.0
32
+ calvin_env==0.0.1
33
+ certifi==2025.10.5
34
+ cffi==2.0.0
35
+ charset-normalizer==3.4.4
36
+ click==8.3.0
37
+ cloudpickle==3.1.1
38
+ cmake==4.1.2
39
+ colorlog==6.10.1
40
+ comm==0.2.3
41
+ contourpy==1.3.2
42
+ cycler==0.12.1
43
+ datasets==3.0.0
44
+ debugpy==1.8.17
45
+ decorator==4.4.2
46
+ deepdiff==8.6.1
47
+ defusedxml==0.7.1
48
+ diffusers==0.35.2
49
+ dill==0.3.8
50
+ docopt==0.6.2
51
+ docstring_parser==0.17.0
52
+ draccus==0.10.0
53
+ easydict==1.13
54
+ einops==0.8.1
55
+ etils==1.13.0
56
+ evdev==1.9.2
57
+ exceptiongroup==1.3.0
58
+ executing==2.2.1
59
+ fasteners==0.20
60
+ fastjsonschema==2.21.2
61
+ filelock==3.20.0
62
+ Flask==3.1.2
63
+ flatbuffers==25.9.23
64
+ fonttools==4.60.1
65
+ fqdn==1.5.1
66
+ freetype-py==2.5.1
67
+ frozenlist==1.8.0
68
+ fsspec==2024.6.1
69
+ future==1.0.0
70
+ gast==0.6.0
71
+ gdown==5.2.0
72
+ gitdb==4.0.12
73
+ GitPython==3.1.45
74
+ glfw==2.10.0
75
+ google-pasta==0.2.0
76
+ grpcio==1.76.0
77
+ gym==0.26.2
78
+ gym-notices==0.1.0
79
+ gymnasium==0.29.1
80
+ h11==0.16.0
81
+ h5py==3.15.1
82
+ hf_transfer==0.1.9
83
+ hf-xet==1.2.0
84
+ httpcore==1.0.9
85
+ httpx==0.28.1
86
+ huggingface-hub==0.36.0
87
+ hydra-colorlog==1.2.0
88
+ hydra-core==1.1.1
89
+ idna==3.11
90
+ imageio==2.37.0
91
+ imageio-ffmpeg==0.6.0
92
+ importlib_metadata==8.7.0
93
+ importlib_resources==6.5.2
94
+ iniconfig==2.3.0
95
+ inquirerpy==0.3.4
96
+ ipykernel==7.1.0
97
+ ipython==8.37.0
98
+ iso8601==2.1.0
99
+ isoduration==20.11.0
100
+ itsdangerous==2.2.0
101
+ jedi==0.19.2
102
+ Jinja2==3.1.6
103
+ joblib==1.5.2
104
+ json5==0.12.1
105
+ jsonlines==4.0.0
106
+ jsonpointer==3.0.0
107
+ jsonschema==4.25.1
108
+ jsonschema-specifications==2025.9.1
109
+ jupyter_client==8.6.3
110
+ jupyter_core==5.9.1
111
+ jupyter-events==0.12.0
112
+ jupyter-lsp==2.3.0
113
+ jupyter_server==2.17.0
114
+ jupyter_server_terminals==0.5.3
115
+ jupyterlab==4.4.10
116
+ jupyterlab_pygments==0.3.0
117
+ jupyterlab_server==2.28.0
118
+ jupytext==1.18.1
119
+ keras==3.12.0
120
+ kiwisolver==1.4.9
121
+ lark==1.3.1
122
+ lerobot==0.1.0
123
+ libclang==18.1.1
124
+ lightning-utilities==0.15.2
125
+ llvmlite==0.45.1
126
+ lxml==6.0.2
127
+ Markdown==3.10
128
+ markdown-it-py==4.0.0
129
+ MarkupSafe==3.0.3
130
+ matplotlib==3.10.7
131
+ matplotlib-inline==0.2.1
132
+ mdit-py-plugins==0.5.0
133
+ mdurl==0.1.2
134
+ mergedeep==1.3.4
135
+ mistune==3.1.4
136
+ ml_dtypes==0.5.4
137
+ moviepy==1.0.3
138
+ mpmath==1.3.0
139
+ mujoco==3.3.7
140
+ multidict==6.7.0
141
+ multiprocess==0.70.16
142
+ mypy_extensions==1.1.0
143
+ namex==0.1.0
144
+ narwhals==2.12.0
145
+ nbclient==0.10.2
146
+ nbconvert==7.16.6
147
+ nbformat==5.10.4
148
+ nest-asyncio==1.6.0
149
+ networkx==3.4.2
150
+ nltk==3.9.2
151
+ notebook_shim==0.2.4
152
+ num2words==0.5.14
153
+ numba==0.62.1
154
+ numcodecs==0.13.1
155
+ numpy==2.2.6
156
+ numpy-quaternion==2024.0.12
157
+ nvidia-cublas-cu12==12.8.4.1
158
+ nvidia-cuda-cupti-cu12==12.8.90
159
+ nvidia-cuda-nvrtc-cu12==12.8.93
160
+ nvidia-cuda-runtime-cu12==12.8.90
161
+ nvidia-cudnn-cu12==9.10.2.21
162
+ nvidia-cufft-cu12==11.3.3.83
163
+ nvidia-cufile-cu12==1.13.1.3
164
+ nvidia-curand-cu12==10.3.9.90
165
+ nvidia-cusolver-cu12==11.7.3.90
166
+ nvidia-cusparse-cu12==12.5.8.93
167
+ nvidia-cusparselt-cu12==0.7.1
168
+ nvidia-ml-py==13.580.82
169
+ nvidia-nccl-cu12==2.27.5
170
+ nvidia-nvjitlink-cu12==12.8.93
171
+ nvidia-nvshmem-cu12==3.3.20
172
+ nvidia-nvtx-cu12==12.8.90
173
+ nvitop==1.5.3
174
+ omegaconf==2.1.2
175
+ opencv-python==4.12.0.88
176
+ opencv-python-headless==4.12.0.88
177
+ opt_einsum==3.4.0
178
+ optree==0.18.0
179
+ orderly-set==5.5.0
180
+ overrides==7.7.0
181
+ packaging==25.0
182
+ pandas==2.3.3
183
+ pandocfilters==1.5.1
184
+ parso==0.8.5
185
+ pexpect==4.9.0
186
+ pfzy==0.3.4
187
+ pillow==12.0.0
188
+ pip==25.2
189
+ platformdirs==4.5.0
190
+ plotly==6.5.0
191
+ pluggy==1.6.0
192
+ proglog==0.1.12
193
+ prometheus_client==0.23.1
194
+ prompt_toolkit==3.0.52
195
+ propcache==0.4.1
196
+ protobuf==6.33.0
197
+ psutil==7.1.2
198
+ ptyprocess==0.7.0
199
+ pure_eval==0.2.3
200
+ pyarrow==22.0.0
201
+ pybullet==3.2.7
202
+ pycollada==0.6
203
+ pycparser==2.23
204
+ pydantic==2.12.3
205
+ pydantic_core==2.41.4
206
+ pyglet==2.1.11
207
+ Pygments==2.19.2
208
+ pyhash==0.9.3
209
+ pymunk==6.11.1
210
+ pynput==1.8.1
211
+ pyparsing==3.2.5
212
+ pyrender==0.1.45
213
+ pytest==8.4.2
214
+ python-dateutil==2.9.0.post0
215
+ python-dotenv==1.2.1
216
+ python-json-logger==4.0.0
217
+ python-xlib==0.33
218
+ pytorch-lightning==2.5.6
219
+ pytz==2025.2
220
+ PyYAML==6.0.3
221
+ pyyaml-include==1.4.1
222
+ pyzmq==27.1.0
223
+ referencing==0.37.0
224
+ regex==2025.10.23
225
+ requests==2.32.5
226
+ rerun-sdk==0.26.2
227
+ rfc3339-validator==0.1.4
228
+ rfc3986-validator==0.1.1
229
+ rfc3987-syntax==1.1.0
230
+ rich==14.2.0
231
+ robosuite==1.4.0
232
+ rpds-py==0.28.0
233
+ safetensors==0.6.2
234
+ scipy==1.15.3
235
+ sentry-sdk==2.43.0
236
+ serial==0.0.97
237
+ setuptools==57.5.0
238
+ shtab==1.7.2
239
+ six==1.17.0
240
+ smmap==5.0.2
241
+ sniffio==1.3.1
242
+ soupsieve==2.8
243
+ stack-data==0.6.3
244
+ sympy==1.14.0
245
+ tensorboard==2.20.0
246
+ tensorboard-data-server==0.7.2
247
+ tensorflow==2.20.0
248
+ termcolor==3.2.0
249
+ terminado==0.18.1
250
+ tinycss2==1.4.0
251
+ tokenizers==0.22.1
252
+ toml==0.10.2
253
+ tomli==2.3.0
254
+ torch==2.9.0
255
+ torchcodec==0.8.1
256
+ torchmetrics==1.8.2
257
+ torchvision==0.24.0
258
+ tornado==6.5.2
259
+ tqdm==4.67.1
260
+ traitlets==5.14.3
261
+ transformers==4.57.1
262
+ trimesh==4.9.0
263
+ triton==3.5.0
264
+ typeguard==4.4.4
265
+ typing_extensions==4.15.0
266
+ typing-inspect==0.9.0
267
+ typing-inspection==0.4.2
268
+ tyro==0.9.35
269
+ tzdata==2025.2
270
+ urdfpy==0.0.22
271
+ uri-template==1.3.0
272
+ urllib3==2.5.0
273
+ wandb==0.22.3
274
+ wcwidth==0.2.14
275
+ webcolors==25.10.0
276
+ webencodings==0.5.1
277
+ websocket-client==1.9.0
278
+ Werkzeug==3.1.3
279
+ wheel==0.45.1
280
+ wrapt==2.0.1
281
+ xxhash==3.6.0
282
+ yarl==1.22.0
283
+ zarr==2.18.3
284
+ zipp==3.23.0
285
+ Calvin==0.0.1
286
+ tacto==0.0.3
wandb/offline-run-20251125_163418-lizvmqey/files/wandb-metadata.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"os": "Linux-4.18.0-553.77.1.el8_10.x86_64-x86_64-with-glibc2.28", "python": "CPython 3.10.19", "started_at": "2025-11-25T15:34:18.399929Z", "args": ["--policy.path=/mnt/vast-kisski/projects/kisski-umg-fairpact-2/VLA/binh/smol_pretrained/smolvla_base", "--dataset.root=/mnt/vast-kisski/projects/kisski-umg-fairpact-2/VLA/LIBERO/merged_libero_scale_100_mask_depth_noops_lerobot", "--output_dir=outputs/train/2025-11-25/16-33-59_libero_100%_tlc_v4_process_mask_neighbor_neg_add_linear_different_0.4tlc_base", "--job_name=libero_100%_tlc_v4_process_mask_neighbor_neg_add_linear_different_0.4tlc_base", "--config_path=configs/smolvla_config/other_default.json", "--batch_size=64", "--steps=100000", "--policy.gradient_accumulation_steps=1", "--wandb.mode=offline", "--log_freq=10"], "program": "/mnt/vast-kisski/projects/kisski-umg-fairpact-2/VLA/binh/Smolvla_capstone_project/lerobot/scripts/train_accelerate.py", "code_path": "lerobot/scripts/train_accelerate.py", "code_path_local": "lerobot/scripts/train_accelerate.py", "git": {"remote_url": "git@github-binh:jibby2803/Smolvla_capstone_project.git", "commit": "0f72ba315e381d6a8095782824a2d0c70faad469"}, "root": "outputs/train/2025-11-25/16-33-59_libero_100%_tlc_v4_process_mask_neighbor_neg_add_linear_different_0.4tlc_base", "host": "ggpu192", "executable": "/projects/extern/kisski/kisski-umg-fairpact-2/dir.project/miniconda3/envs/smol/bin/python3.10", "cpu_count": 64, "cpu_count_logical": 128, "disk": {"/": {"total": "270465425408", "used": "7019204608"}}, "memory": {"total": "540930854912"}, "slurm": {"job_id": "11843069", "mpi_type": "pmi2"}, "writer_id": "tgzz60htaej5eivyg1ohkojicqbx6n35"}
wandb/offline-run-20251125_163418-lizvmqey/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"_runtime": 1231.530468465, "train/lr": 6.558441558441558e-05, "train/update_s": 2.1237647216068583, "train/tlc_loss": -0.39544302225112915, "train/l1_infer_loss": 0.18404726684093475, "_step": 660, "train/episodes": 262.2805444616513, "train/loss": 0.6010105013847351, "train/losses_after_forward": {"bins": [8.37099278783171e-11, 0.6451714038848877, 1.2903428077697754, 1.935514211654663, 2.580685615539551, 3.2258570194244385, 3.871028423309326, 4.516200065612793, 5.161371231079102, 5.80654239654541, 6.451714038848877, 7.096885681152344, 7.742056846618652, 8.387228012084961, 9.032400131225586, 9.677571296691895, 10.322742462158203, 10.967913627624512, 11.61308479309082, 12.258256912231445, 12.903428077697754, 13.548599243164062, 14.193771362304688, 14.838942527770996, 15.484113693237305, 16.12928581237793, 16.774456024169922, 17.419628143310547, 18.064800262451172, 18.709970474243164, 19.35514259338379, 20.00031280517578, 20.645484924316406], "_type": "histogram", "values": [90542, 6746, 2272, 1089, 657, 364, 224, 140, 102, 55, 55, 31, 29, 24, 14, 6, 8, 7, 6, 3, 7, 4, 0, 0, 4, 2, 1, 1, 0, 1, 4, 2]}, "train/losses_after_rm_padding": {"_type": "histogram", "values": [17114, 2519, 986, 577, 387, 229, 159, 106, 74, 47, 52, 29, 27, 24, 14, 6, 8, 7, 6, 3, 7, 4, 0, 0, 4, 2, 1, 1, 0, 1, 4, 2], "bins": [2.2172486069393926e-09, 0.6451714038848877, 1.2903428077697754, 1.935514211654663, 2.580685615539551, 3.2258570194244385, 3.871028423309326, 4.516200065612793, 5.161371231079102, 5.80654239654541, 6.451714038848877, 7.096885681152344, 7.742056846618652, 8.387228012084961, 9.032400131225586, 9.677571296691895, 10.322742462158203, 10.967913627624512, 11.61308479309082, 12.258256912231445, 12.903428077697754, 13.548599243164062, 14.193771362304688, 14.838942527770996, 15.484113693237305, 16.12928581237793, 16.774456024169922, 17.419628143310547, 18.064800262451172, 18.709970474243164, 19.35514259338379, 20.00031280517578, 20.645484924316406]}, "_timestamp": 1764086066.625332, "train/epochs": 0.15649197163583015, "train/dataloading_s": 0.0544021604815498, "train/total_loss": 0.40328899025917053, "train/steps": 660, "train/samples": 42240, "train/grad_norm": 2.4256782650947573}
wandb/offline-run-20251125_163418-lizvmqey/logs/debug-internal.log ADDED
The diff for this file is too large to render. See raw diff
 
wandb/offline-run-20251125_163418-lizvmqey/logs/debug.log ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-11-25 16:34:18,424 INFO MainThread:1842994 [wandb_setup.py:_flush():81] Current SDK version is 0.22.3
2
+ 2025-11-25 16:34:18,424 INFO MainThread:1842994 [wandb_setup.py:_flush():81] Configure stats pid to 1842994
3
+ 2025-11-25 16:34:18,424 INFO MainThread:1842994 [wandb_setup.py:_flush():81] Loading settings from /user/hominhduy.nguyen/u15271/.config/wandb/settings
4
+ 2025-11-25 16:34:18,424 INFO MainThread:1842994 [wandb_setup.py:_flush():81] Loading settings from /mnt/vast-kisski/projects/kisski-umg-fairpact-2/VLA/binh/Smolvla_capstone_project/wandb/settings
5
+ 2025-11-25 16:34:18,424 INFO MainThread:1842994 [wandb_setup.py:_flush():81] Loading settings from environment variables
6
+ 2025-11-25 16:34:18,424 INFO MainThread:1842994 [wandb_init.py:setup_run_log_directory():706] Logging user logs to outputs/train/2025-11-25/16-33-59_libero_100%_tlc_v4_process_mask_neighbor_neg_add_linear_different_0.4tlc_base/wandb/offline-run-20251125_163418-lizvmqey/logs/debug.log
7
+ 2025-11-25 16:34:18,424 INFO MainThread:1842994 [wandb_init.py:setup_run_log_directory():707] Logging internal logs to outputs/train/2025-11-25/16-33-59_libero_100%_tlc_v4_process_mask_neighbor_neg_add_linear_different_0.4tlc_base/wandb/offline-run-20251125_163418-lizvmqey/logs/debug-internal.log
8
+ 2025-11-25 16:34:18,424 INFO MainThread:1842994 [wandb_init.py:init():833] calling init triggers
9
+ 2025-11-25 16:34:18,425 INFO MainThread:1842994 [wandb_init.py:init():838] wandb.init called with sweep_config: {}
10
+ config: {'dataset': {'repo_id': '.', 'root': '/mnt/vast-kisski/projects/kisski-umg-fairpact-2/VLA/LIBERO/merged_libero_scale_100_mask_depth_noops_lerobot', 'episodes': None, 'image_transforms': {'enable': True, 'max_num_transforms': 3, 'random_order': False, 'image_tfs': {'hue': {'weight': 1.0, 'type': 'ColorJitter', 'kwargs': {'hue': [-0.05, 0.05]}}, 'contrast': {'weight': 1.0, 'type': 'ColorJitter', 'kwargs': {'contrast': [0.8, 1.2]}}, 'sharpness': {'weight': 1.0, 'type': 'SharpnessJitter', 'kwargs': {'sharpness': [0.5, 1.5]}}, 'brightness': {'weight': 1.0, 'type': 'ColorJitter', 'kwargs': {'brightness': [0.8, 1.2]}}, 'saturation': {'weight': 1.0, 'type': 'ColorJitter', 'kwargs': {'saturation': [0.5, 1.5]}}, 'crop_resize': {'weight': 1.0, 'type': 'RandomResizedCrop', 'kwargs': {'size': [256, 256], 'ratio': [1, 1], 'scale': [0.9, 0.95]}}, 'rotate': {'weight': 1.0, 'type': 'RandomRotate', 'kwargs': {'degrees': [-5, 5]}}}, 'wrist_tfs': {'hue': {'weight': 1.0, 'type': 'ColorJitter', 'kwargs': {'hue': [-0.05, 0.05]}}, 'contrast': {'weight': 1.0, 'type': 'ColorJitter', 'kwargs': {'contrast': [0.8, 1.2]}}, 'sharpness': {'weight': 1.0, 'type': 'SharpnessJitter', 'kwargs': {'sharpness': [0.5, 1.5]}}, 'brightness': {'weight': 1.0, 'type': 'ColorJitter', 'kwargs': {'brightness': [0.8, 1.2]}}, 'saturation': {'weight': 1.0, 'type': 'ColorJitter', 'kwargs': {'saturation': [0.5, 1.5]}}}}, 'revision': None, 'use_imagenet_stats': True, 'video_backend': 'torchcodec', 'vqa_data_path': None}, 'env': None, 'policy': {'type': 'smolvla', 'n_obs_steps': 1, 'normalization_mapping': {'VISUAL': <NormalizationMode.IDENTITY: 'IDENTITY'>, 'STATE': <NormalizationMode.MEAN_STD: 'MEAN_STD'>, 'ACTION': <NormalizationMode.MEAN_STD: 'MEAN_STD'>}, 'input_features': {'observation.images.image': {'type': <FeatureType.VISUAL: 'VISUAL'>, 'shape': [3, 256, 256]}, 'observation.images.wrist_image': {'type': <FeatureType.VISUAL: 'VISUAL'>, 'shape': [3, 256, 256]}, 'observation.state': {'type': <FeatureType.STATE: 'STATE'>, 'shape': [8]}}, 'output_features': {'action': {'type': <FeatureType.ACTION: 'ACTION'>, 'shape': [7]}}, 'device': 'cuda', 'use_amp': False, 'gradient_accumulation_steps': 1, 'chunk_size': 50, 'n_action_steps': 50, 'max_state_dim': 32, 'max_action_dim': 32, 'resize_imgs_with_padding': [512, 512], 'empty_cameras': 0, 'adapt_to_pi_aloha': False, 'use_delta_joint_actions_aloha': False, 'tokenizer_max_length': 48, 'num_steps': 10, 'use_cache': True, 'freeze_vision_encoder': True, 'train_expert_only': False, 'train_state_proj': True, 'optimizer_lr': 0.0001, 'optimizer_betas': [0.9, 0.95], 'optimizer_eps': 1e-08, 'optimizer_weight_decay': 1e-10, 'optimizer_grad_clip_norm': 10.0, 'scheduler_warmup_steps': 1000, 'scheduler_decay_steps': 30000, 'scheduler_decay_lr': 2.5e-06, 'vlm_model_name': 'HuggingFaceTB/SmolVLM2-500M-Video-Instruct', 'load_vlm_weights': True, 'add_image_special_tokens': False, 'attention_mode': 'cross_attn', 'prefix_length': 0, 'pad_language_to': 'max_length', 'num_expert_layers': 0, 'num_vlm_layers': 16, 'self_attn_every_n_layers': 2, 'expert_width_multiplier': 0.75, 'min_period': 0.004, 'max_period': 4.0}, 'output_dir': 'outputs/train/2025-11-25/16-33-59_libero_100%_tlc_v4_process_mask_neighbor_neg_add_linear_different_0.4tlc_base', 'job_name': 'libero_100%_tlc_v4_process_mask_neighbor_neg_add_linear_different_0.4tlc_base', 'resume': False, 'seed': 42, 'num_workers': 8, 'batch_size': 64, 'steps': 100000, 'eval_freq': 20000, 'log_freq': 10, 'save_checkpoint': True, 'save_freq': 10000, 'use_policy_training_preset': True, 'optimizer': {'type': 'adamw', 'lr': 0.0001, 'weight_decay': 1e-10, 'grad_clip_norm': 10.0, 'betas': [0.9, 0.95], 'eps': 1e-08}, 'scheduler': {'type': 'cosine_decay_with_warmup', 'num_warmup_steps': 1000, 'num_decay_steps': 30000, 'peak_lr': 0.0001, 'decay_lr': 2.5e-06}, 'eval': {'n_episodes': 50, 'batch_size': 50, 'use_async_envs': False}, 'wandb': {'enable': True, 'disable_artifact': True, 'project': 'smolvla', 'entity': 'Robotics_VLA', 'notes': None, 'run_id': None, 'mode': 'offline'}, '_wandb': {}}
11
+ 2025-11-25 16:34:18,425 INFO MainThread:1842994 [wandb_init.py:init():881] starting backend
12
+ 2025-11-25 16:34:18,636 INFO MainThread:1842994 [wandb_init.py:init():884] sending inform_init request
13
+ 2025-11-25 16:34:18,641 INFO MainThread:1842994 [wandb_init.py:init():892] backend started and connected
14
+ 2025-11-25 16:34:18,643 INFO MainThread:1842994 [wandb_init.py:init():962] updated telemetry
15
+ 2025-11-25 16:34:18,656 INFO MainThread:1842994 [wandb_init.py:init():986] communicating run to backend with 90.0 second timeout
16
+ 2025-11-25 16:34:18,738 INFO MainThread:1842994 [wandb_init.py:init():1033] starting run threads in backend
17
+ 2025-11-25 16:34:19,005 INFO MainThread:1842994 [wandb_run.py:_console_start():2506] atexit reg
18
+ 2025-11-25 16:34:19,006 INFO MainThread:1842994 [wandb_run.py:_redirect():2354] redirect: wrap_raw
19
+ 2025-11-25 16:34:19,006 INFO MainThread:1842994 [wandb_run.py:_redirect():2423] Wrapping output streams.
20
+ 2025-11-25 16:34:19,006 INFO MainThread:1842994 [wandb_run.py:_redirect():2446] Redirects installed.
21
+ 2025-11-25 16:34:19,008 INFO MainThread:1842994 [wandb_init.py:init():1073] run started, returning control to user process
wandb/offline-run-20251125_163418-lizvmqey/run-lizvmqey.wandb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:26f389026dc187d8af9998f4ec6291a9e5b0d3156cb1c18338ae48b8e8b47cb4
3
+ size 29392896
wandb/offline-run-20251127_112044-lizvmqey/files/config.yaml ADDED
@@ -0,0 +1,357 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ wandb_version: 1
2
+
3
+ _wandb:
4
+ desc: null
5
+ value:
6
+ python_version: 3.10.19
7
+ cli_version: 0.22.3
8
+ framework: huggingface
9
+ is_jupyter_run: false
10
+ is_kaggle_kernel: false
11
+ start_time: 1764238845
12
+ t:
13
+ 1:
14
+ - 1
15
+ - 41
16
+ - 49
17
+ - 51
18
+ - 71
19
+ 2:
20
+ - 1
21
+ - 2
22
+ - 3
23
+ - 11
24
+ - 41
25
+ - 49
26
+ - 51
27
+ - 71
28
+ 3:
29
+ - 4
30
+ - 13
31
+ - 14
32
+ - 15
33
+ - 16
34
+ - 37
35
+ - 42
36
+ - 61
37
+ 4: 3.10.19
38
+ 5: 0.22.3
39
+ 13: linux-x86_64
40
+ e:
41
+ qnrpx9krjcsw5zeotwtgsw7vqa4lj8ft:
42
+ os: Linux-4.18.0-553.74.1.el8_10.x86_64-x86_64-with-glibc2.28
43
+ python: CPython 3.10.19
44
+ started_at: '2025-11-27T10:20:44.939230Z'
45
+ args:
46
+ - --resume=true
47
+ - --output_dir=/projects/extern/kisski/kisski-umg-fairpact-2/dir.project/VLA/binh/Smolvla_capstone_project/outputs/train/2025-11-25/16-33-59_libero_100%_tlc_v4_process_mask_neighbor_neg_add_linear_different_0.4tlc_base
48
+ - --config_path=/projects/extern/kisski/kisski-umg-fairpact-2/dir.project/VLA/binh/Smolvla_capstone_project/outputs/train/2025-11-25/16-33-59_libero_100%_tlc_v4_process_mask_neighbor_neg_add_linear_different_0.4tlc_base/checkpoints/last/pretrained_model/train_config.json
49
+ program: /mnt/vast-kisski/projects/kisski-umg-fairpact-2/VLA/binh/Smolvla_capstone_project/lerobot/scripts/train_accelerate.py
50
+ code_path: lerobot/scripts/train_accelerate.py
51
+ code_path_local: lerobot/scripts/train_accelerate.py
52
+ git:
53
+ remote_url: git@github-binh:jibby2803/Smolvla_capstone_project.git
54
+ commit: 5e71e3b01e75c173c975f3aab819927a265c360a
55
+ root: /projects/extern/kisski/kisski-umg-fairpact-2/dir.project/VLA/binh/Smolvla_capstone_project/outputs/train/2025-11-25/16-33-59_libero_100%_tlc_v4_process_mask_neighbor_neg_add_linear_different_0.4tlc_base
56
+ host: ggpu183
57
+ executable: /projects/extern/kisski/kisski-umg-fairpact-2/dir.project/miniconda3/envs/smol/bin/python3.10
58
+ cpu_count: 64
59
+ cpu_count_logical: 128
60
+ disk:
61
+ /:
62
+ total: '270465433600'
63
+ used: '7152394240'
64
+ memory:
65
+ total: '540930867200'
66
+ slurm:
67
+ mpi_type: pmi2
68
+ job_id: '11851085'
69
+ writer_id: qnrpx9krjcsw5zeotwtgsw7vqa4lj8ft
70
+ dataset:
71
+ desc: null
72
+ value:
73
+ repo_id: .
74
+ root: /mnt/vast-kisski/projects/kisski-umg-fairpact-2/VLA/LIBERO/merged_libero_scale_100_mask_depth_noops_lerobot
75
+ episodes: null
76
+ image_transforms:
77
+ enable: true
78
+ max_num_transforms: 3
79
+ random_order: false
80
+ image_tfs:
81
+ hue:
82
+ weight: 1.0
83
+ type: ColorJitter
84
+ kwargs:
85
+ hue:
86
+ - -0.05
87
+ - 0.05
88
+ contrast:
89
+ weight: 1.0
90
+ type: ColorJitter
91
+ kwargs:
92
+ contrast:
93
+ - 0.8
94
+ - 1.2
95
+ sharpness:
96
+ weight: 1.0
97
+ type: SharpnessJitter
98
+ kwargs:
99
+ sharpness:
100
+ - 0.5
101
+ - 1.5
102
+ brightness:
103
+ weight: 1.0
104
+ type: ColorJitter
105
+ kwargs:
106
+ brightness:
107
+ - 0.8
108
+ - 1.2
109
+ saturation:
110
+ weight: 1.0
111
+ type: ColorJitter
112
+ kwargs:
113
+ saturation:
114
+ - 0.5
115
+ - 1.5
116
+ crop_resize:
117
+ weight: 1.0
118
+ type: RandomResizedCrop
119
+ kwargs:
120
+ size:
121
+ - 256
122
+ - 256
123
+ ratio:
124
+ - 1
125
+ - 1
126
+ scale:
127
+ - 0.9
128
+ - 0.95
129
+ rotate:
130
+ weight: 1.0
131
+ type: RandomRotate
132
+ kwargs:
133
+ degrees:
134
+ - -5
135
+ - 5
136
+ wrist_tfs:
137
+ hue:
138
+ weight: 1.0
139
+ type: ColorJitter
140
+ kwargs:
141
+ hue:
142
+ - -0.05
143
+ - 0.05
144
+ contrast:
145
+ weight: 1.0
146
+ type: ColorJitter
147
+ kwargs:
148
+ contrast:
149
+ - 0.8
150
+ - 1.2
151
+ sharpness:
152
+ weight: 1.0
153
+ type: SharpnessJitter
154
+ kwargs:
155
+ sharpness:
156
+ - 0.5
157
+ - 1.5
158
+ brightness:
159
+ weight: 1.0
160
+ type: ColorJitter
161
+ kwargs:
162
+ brightness:
163
+ - 0.8
164
+ - 1.2
165
+ saturation:
166
+ weight: 1.0
167
+ type: ColorJitter
168
+ kwargs:
169
+ saturation:
170
+ - 0.5
171
+ - 1.5
172
+ revision: null
173
+ use_imagenet_stats: true
174
+ video_backend: torchcodec
175
+ vqa_data_path: null
176
+ env:
177
+ desc: null
178
+ value: null
179
+ policy:
180
+ desc: null
181
+ value:
182
+ type: smolvla
183
+ n_obs_steps: 1
184
+ normalization_mapping:
185
+ VISUAL: IDENTITY
186
+ STATE: MEAN_STD
187
+ ACTION: MEAN_STD
188
+ input_features:
189
+ observation.images.image:
190
+ type: VISUAL
191
+ shape:
192
+ - 3
193
+ - 256
194
+ - 256
195
+ observation.images.wrist_image:
196
+ type: VISUAL
197
+ shape:
198
+ - 3
199
+ - 256
200
+ - 256
201
+ observation.images.image_mask:
202
+ type: VISUAL
203
+ shape:
204
+ - 3
205
+ - 256
206
+ - 256
207
+ observation.images.wrist_mask:
208
+ type: VISUAL
209
+ shape:
210
+ - 3
211
+ - 256
212
+ - 256
213
+ observation.images.object_of_interest_mask:
214
+ type: VISUAL
215
+ shape:
216
+ - 3
217
+ - 256
218
+ - 256
219
+ observation.images.object_of_interest_wrist_mask:
220
+ type: VISUAL
221
+ shape:
222
+ - 3
223
+ - 256
224
+ - 256
225
+ observation.state:
226
+ type: STATE
227
+ shape:
228
+ - 8
229
+ observation.states.ee_state:
230
+ type: STATE
231
+ shape:
232
+ - 6
233
+ observation.states.joint_state:
234
+ type: STATE
235
+ shape:
236
+ - 7
237
+ observation.states.gripper_state:
238
+ type: STATE
239
+ shape:
240
+ - 2
241
+ output_features:
242
+ action:
243
+ type: ACTION
244
+ shape:
245
+ - 7
246
+ device: cuda
247
+ use_amp: false
248
+ gradient_accumulation_steps: 1
249
+ chunk_size: 50
250
+ n_action_steps: 50
251
+ max_state_dim: 32
252
+ max_action_dim: 32
253
+ resize_imgs_with_padding:
254
+ - 512
255
+ - 512
256
+ empty_cameras: 0
257
+ adapt_to_pi_aloha: false
258
+ use_delta_joint_actions_aloha: false
259
+ tokenizer_max_length: 48
260
+ num_steps: 10
261
+ use_cache: true
262
+ freeze_vision_encoder: true
263
+ train_expert_only: false
264
+ train_state_proj: true
265
+ optimizer_lr: 0.0001
266
+ optimizer_betas:
267
+ - 0.9
268
+ - 0.95
269
+ optimizer_eps: 1.0e-08
270
+ optimizer_weight_decay: 1.0e-10
271
+ optimizer_grad_clip_norm: 10.0
272
+ scheduler_warmup_steps: 1000
273
+ scheduler_decay_steps: 30000
274
+ scheduler_decay_lr: 2.5e-06
275
+ vlm_model_name: HuggingFaceTB/SmolVLM2-500M-Video-Instruct
276
+ load_vlm_weights: true
277
+ add_image_special_tokens: false
278
+ attention_mode: cross_attn
279
+ prefix_length: 0
280
+ pad_language_to: max_length
281
+ num_expert_layers: 0
282
+ num_vlm_layers: 16
283
+ self_attn_every_n_layers: 2
284
+ expert_width_multiplier: 0.75
285
+ min_period: 0.004
286
+ max_period: 4.0
287
+ output_dir:
288
+ desc: null
289
+ value: /projects/extern/kisski/kisski-umg-fairpact-2/dir.project/VLA/binh/Smolvla_capstone_project/outputs/train/2025-11-25/16-33-59_libero_100%_tlc_v4_process_mask_neighbor_neg_add_linear_different_0.4tlc_base
290
+ job_name:
291
+ desc: null
292
+ value: libero_100%_tlc_v4_process_mask_neighbor_neg_add_linear_different_0.4tlc_base
293
+ resume:
294
+ desc: null
295
+ value: true
296
+ seed:
297
+ desc: null
298
+ value: 42
299
+ num_workers:
300
+ desc: null
301
+ value: 8
302
+ batch_size:
303
+ desc: null
304
+ value: 64
305
+ steps:
306
+ desc: null
307
+ value: 100000
308
+ eval_freq:
309
+ desc: null
310
+ value: 20000
311
+ log_freq:
312
+ desc: null
313
+ value: 10
314
+ save_checkpoint:
315
+ desc: null
316
+ value: true
317
+ save_freq:
318
+ desc: null
319
+ value: 10000
320
+ use_policy_training_preset:
321
+ desc: null
322
+ value: true
323
+ optimizer:
324
+ desc: null
325
+ value:
326
+ type: adamw
327
+ lr: 0.0001
328
+ weight_decay: 1.0e-10
329
+ grad_clip_norm: 10.0
330
+ betas:
331
+ - 0.9
332
+ - 0.95
333
+ eps: 1.0e-08
334
+ scheduler:
335
+ desc: null
336
+ value:
337
+ type: cosine_decay_with_warmup
338
+ num_warmup_steps: 1000
339
+ num_decay_steps: 30000
340
+ peak_lr: 0.0001
341
+ decay_lr: 2.5e-06
342
+ eval:
343
+ desc: null
344
+ value:
345
+ n_episodes: 50
346
+ batch_size: 50
347
+ use_async_envs: false
348
+ wandb:
349
+ desc: null
350
+ value:
351
+ enable: true
352
+ disable_artifact: true
353
+ project: smolvla
354
+ entity: Robotics_VLA
355
+ notes: null
356
+ run_id: null
357
+ mode: offline
wandb/offline-run-20251127_112044-lizvmqey/files/output.log ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ INFO 2025-11-27 11:20:45 celerate.py:159 Creating dataset
2
+ Resolving data files: 100%|█████████████████████████████████████████████████████████████████████████████████████| 1676/1676 [00:00<00:00, 22868.84it/s]
3
+ Loading dataset shards: 100%|█████████████████████████████████████████████████████████████████████████████████████████| 78/78 [00:00<00:00, 934.13it/s]
4
+ INFO 2025-11-27 11:20:53 celerate.py:170 Creating policy
5
+ `torch_dtype` is deprecated! Use `dtype` instead!
6
+ INFO 2025-11-27 11:21:09 celerate.py:181 Creating optimizer and scheduler
7
+ INFO 2025-11-27 11:21:13 celerate.py:221 Output dir: /projects/extern/kisski/kisski-umg-fairpact-2/dir.project/VLA/binh/Smolvla_capstone_project/outputs/train/2025-11-25/16-33-59_libero_100%_tlc_v4_process_mask_neighbor_neg_add_linear_different_0.4tlc_base
8
+ INFO 2025-11-27 11:21:13 celerate.py:224 cfg.steps=100000 (100K)
9
+ INFO 2025-11-27 11:21:13 celerate.py:225 dataset.num_frames=269918 (270K)
10
+ INFO 2025-11-27 11:21:13 celerate.py:226 dataset.num_episodes=1676
11
+ INFO 2025-11-27 11:21:13 celerate.py:227 num_learnable_params=317226912 (317M)
12
+ INFO 2025-11-27 11:21:13 celerate.py:228 num_total_params=450968810 (451M)
13
+ INFO 2025-11-27 11:21:13 celerate.py:229 Number of processes: 1
14
+ INFO 2025-11-27 11:21:13 celerate.py:230 Device: cuda
15
+ INFO 2025-11-27 11:21:13 celerate.py:231 Mixed precision: no
16
+ INFO 2025-11-27 11:21:13 celerate.py:253 Start offline training on a fixed dataset
17
+ INFO 2025-11-27 11:21:34 celerate.py:303 step:50K smpl:3M ep:20K epch:11.86 loss:0.019 grdn:1.143 lr:2.5e-06 updt_s:1.044 data_s:0.955
18
+ load pretrained policy
19
+ Loading HuggingFaceTB/SmolVLM2-500M-Video-Instruct weights ...
20
+ Load pretrained VLM successfully!
21
+ Reducing the number of VLM layers to 16 ...
22
+ =================================
23
+ USING MODEL FROM BINH'S DIR
24
+ =================================
25
+ Loading weights from local directory
26
+ INFO 2025-11-27 11:21:43 celerate.py:303 step:50K smpl:3M ep:20K epch:11.86 loss:0.019 grdn:1.148 lr:2.5e-06 updt_s:0.797 data_s:0.017
27
+ INFO 2025-11-27 11:21:51 celerate.py:303 step:50K smpl:3M ep:20K epch:11.86 loss:0.031 grdn:1.072 lr:2.5e-06 updt_s:0.767 data_s:0.017
28
+ INFO 2025-11-27 11:22:00 celerate.py:303 step:50K smpl:3M ep:20K epch:11.86 loss:0.013 grdn:1.122 lr:2.5e-06 updt_s:0.775 data_s:0.018
29
+ INFO 2025-11-27 11:22:08 celerate.py:303 step:50K smpl:3M ep:20K epch:11.87 loss:0.050 grdn:1.097 lr:2.5e-06 updt_s:0.751 data_s:0.017
30
+ INFO 2025-11-27 11:22:17 celerate.py:303 step:50K smpl:3M ep:20K epch:11.87 loss:0.022 grdn:1.153 lr:2.5e-06 updt_s:0.790 data_s:0.018
31
+ INFO 2025-11-27 11:22:26 celerate.py:303 step:50K smpl:3M ep:20K epch:11.87 loss:0.018 grdn:1.203 lr:2.5e-06 updt_s:0.769 data_s:0.018
32
+ INFO 2025-11-27 11:22:34 celerate.py:303 step:50K smpl:3M ep:20K epch:11.87 loss:0.013 grdn:1.118 lr:2.5e-06 updt_s:0.765 data_s:0.018
33
+ INFO 2025-11-27 11:22:43 celerate.py:303 step:50K smpl:3M ep:20K epch:11.88 loss:0.034 grdn:1.147 lr:2.5e-06 updt_s:0.771 data_s:0.019
34
+ INFO 2025-11-27 11:22:51 celerate.py:303 step:50K smpl:3M ep:20K epch:11.88 loss:0.011 grdn:1.151 lr:2.5e-06 updt_s:0.766 data_s:0.017
35
+ INFO 2025-11-27 11:23:00 celerate.py:303 step:50K smpl:3M ep:20K epch:11.88 loss:0.012 grdn:1.114 lr:2.5e-06 updt_s:0.773 data_s:0.017
36
+ INFO 2025-11-27 11:23:09 celerate.py:303 step:50K smpl:3M ep:20K epch:11.88 loss:0.014 grdn:1.178 lr:2.5e-06 updt_s:0.794 data_s:0.018
37
+ INFO 2025-11-27 11:23:17 celerate.py:303 step:50K smpl:3M ep:20K epch:11.89 loss:0.006 grdn:1.168 lr:2.5e-06 updt_s:0.757 data_s:0.017
38
+ INFO 2025-11-27 11:23:26 celerate.py:303 step:50K smpl:3M ep:20K epch:11.89 loss:0.019 grdn:1.130 lr:2.5e-06 updt_s:0.774 data_s:0.017
39
+ INFO 2025-11-27 11:23:34 celerate.py:303 step:50K smpl:3M ep:20K epch:11.89 loss:0.029 grdn:1.146 lr:2.5e-06 updt_s:0.777 data_s:0.018
40
+ INFO 2025-11-27 11:23:43 celerate.py:303 step:50K smpl:3M ep:20K epch:11.89 loss:0.015 grdn:1.111 lr:2.5e-06 updt_s:0.774 data_s:0.017
41
+ INFO 2025-11-27 11:23:51 celerate.py:303 step:50K smpl:3M ep:20K epch:11.90 loss:0.022 grdn:1.143 lr:2.5e-06 updt_s:0.761 data_s:0.018
42
+ INFO 2025-11-27 11:24:00 celerate.py:303 step:50K smpl:3M ep:20K epch:11.90 loss:0.010 grdn:1.103 lr:2.5e-06 updt_s:0.790 data_s:0.017
43
+ INFO 2025-11-27 11:24:09 celerate.py:303 step:50K smpl:3M ep:20K epch:11.90 loss:0.015 grdn:1.215 lr:2.5e-06 updt_s:0.783 data_s:0.017
44
+ INFO 2025-11-27 11:24:17 celerate.py:303 step:50K smpl:3M ep:20K epch:11.90 loss:0.025 grdn:1.143 lr:2.5e-06 updt_s:0.771 data_s:0.018
45
+ INFO 2025-11-27 11:24:26 celerate.py:303 step:50K smpl:3M ep:20K epch:11.91 loss:0.043 grdn:1.144 lr:2.5e-06 updt_s:0.779 data_s:0.018
46
+ INFO 2025-11-27 11:24:35 celerate.py:303 step:50K smpl:3M ep:20K epch:11.91 loss:0.024 grdn:1.174 lr:2.5e-06 updt_s:0.773 data_s:0.018
47
+ INFO 2025-11-27 11:24:43 celerate.py:303 step:50K smpl:3M ep:20K epch:11.91 loss:0.034 grdn:1.111 lr:2.5e-06 updt_s:0.757 data_s:0.017
48
+ INFO 2025-11-27 11:24:52 celerate.py:303 step:50K smpl:3M ep:20K epch:11.91 loss:0.023 grdn:1.146 lr:2.5e-06 updt_s:0.778 data_s:0.017
49
+ INFO 2025-11-27 11:25:00 celerate.py:303 step:50K smpl:3M ep:20K epch:11.91 loss:0.023 grdn:1.154 lr:2.5e-06 updt_s:0.763 data_s:0.018
50
+ INFO 2025-11-27 11:25:09 celerate.py:303 step:50K smpl:3M ep:20K epch:11.92 loss:0.016 grdn:1.153 lr:2.5e-06 updt_s:0.777 data_s:0.018
51
+ INFO 2025-11-27 11:25:17 celerate.py:303 step:50K smpl:3M ep:20K epch:11.92 loss:0.013 grdn:1.104 lr:2.5e-06 updt_s:0.758 data_s:0.017
52
+ INFO 2025-11-27 11:25:26 celerate.py:303 step:50K smpl:3M ep:20K epch:11.92 loss:0.015 grdn:1.104 lr:2.5e-06 updt_s:0.791 data_s:0.018
53
+ INFO 2025-11-27 11:25:35 celerate.py:303 step:50K smpl:3M ep:20K epch:11.92 loss:0.012 grdn:1.149 lr:2.5e-06 updt_s:0.775 data_s:0.017
54
+ INFO 2025-11-27 11:25:43 celerate.py:303 step:50K smpl:3M ep:20K epch:11.93 loss:0.018 grdn:1.167 lr:2.5e-06 updt_s:0.769 data_s:0.017
55
+ INFO 2025-11-27 11:25:52 celerate.py:303 step:50K smpl:3M ep:20K epch:11.93 loss:0.014 grdn:1.193 lr:2.5e-06 updt_s:0.773 data_s:0.017
56
+ INFO 2025-11-27 11:26:01 celerate.py:303 step:50K smpl:3M ep:20K epch:11.93 loss:0.018 grdn:1.124 lr:2.5e-06 updt_s:0.775 data_s:0.018
57
+ INFO 2025-11-27 11:26:09 celerate.py:303 step:50K smpl:3M ep:20K epch:11.93 loss:0.019 grdn:1.170 lr:2.5e-06 updt_s:0.753 data_s:0.017
58
+ INFO 2025-11-27 11:26:18 celerate.py:303 step:50K smpl:3M ep:20K epch:11.94 loss:0.018 grdn:1.123 lr:2.5e-06 updt_s:0.789 data_s:0.018
59
+ INFO 2025-11-27 11:26:26 celerate.py:303 step:50K smpl:3M ep:20K epch:11.94 loss:0.029 grdn:1.158 lr:2.5e-06 updt_s:0.767 data_s:0.017
60
+ INFO 2025-11-27 11:26:35 celerate.py:303 step:50K smpl:3M ep:20K epch:11.94 loss:0.067 grdn:1.110 lr:2.5e-06 updt_s:0.779 data_s:0.017
61
+ INFO 2025-11-27 11:26:44 celerate.py:303 step:50K smpl:3M ep:20K epch:11.94 loss:0.014 grdn:1.082 lr:2.5e-06 updt_s:0.781 data_s:0.018
62
+ INFO 2025-11-27 11:26:53 celerate.py:303 step:50K smpl:3M ep:20K epch:11.95 loss:0.036 grdn:1.096 lr:2.5e-06 updt_s:0.788 data_s:0.018
63
+ INFO 2025-11-27 11:27:01 celerate.py:303 step:50K smpl:3M ep:20K epch:11.95 loss:0.038 grdn:1.111 lr:2.5e-06 updt_s:0.771 data_s:0.017
64
+ INFO 2025-11-27 11:27:10 celerate.py:303 step:50K smpl:3M ep:20K epch:11.95 loss:0.015 grdn:1.139 lr:2.5e-06 updt_s:0.773 data_s:0.017
65
+ INFO 2025-11-27 11:27:18 celerate.py:303 step:50K smpl:3M ep:20K epch:11.95 loss:0.020 grdn:1.145 lr:2.5e-06 updt_s:0.769 data_s:0.017
66
+ INFO 2025-11-27 11:27:27 celerate.py:303 step:50K smpl:3M ep:20K epch:11.96 loss:0.014 grdn:1.095 lr:2.5e-06 updt_s:0.775 data_s:0.017
67
+ INFO 2025-11-27 11:27:36 celerate.py:303 step:50K smpl:3M ep:20K epch:11.96 loss:0.019 grdn:1.125 lr:2.5e-06 updt_s:0.779 data_s:0.017
68
+ INFO 2025-11-27 11:27:44 celerate.py:303 step:50K smpl:3M ep:20K epch:11.96 loss:0.040 grdn:1.125 lr:2.5e-06 updt_s:0.777 data_s:0.018
69
+ INFO 2025-11-27 11:27:53 celerate.py:303 step:50K smpl:3M ep:20K epch:11.96 loss:0.059 grdn:1.197 lr:2.5e-06 updt_s:0.771 data_s:0.017
70
+ INFO 2025-11-27 11:28:01 celerate.py:303 step:50K smpl:3M ep:20K epch:11.96 loss:0.046 grdn:1.131 lr:2.5e-06 updt_s:0.769 data_s:0.017
71
+ INFO 2025-11-27 11:28:10 celerate.py:303 step:50K smpl:3M ep:20K epch:11.97 loss:0.016 grdn:1.126 lr:2.5e-06 updt_s:0.770 data_s:0.018
72
+ INFO 2025-11-27 11:28:18 celerate.py:303 step:50K smpl:3M ep:20K epch:11.97 loss:0.016 grdn:1.101 lr:2.5e-06 updt_s:0.765 data_s:0.017
73
+ INFO 2025-11-27 11:28:27 celerate.py:303 step:50K smpl:3M ep:20K epch:11.97 loss:0.024 grdn:1.148 lr:2.5e-06 updt_s:0.779 data_s:0.017
74
+ INFO 2025-11-27 11:28:36 celerate.py:303 step:50K smpl:3M ep:20K epch:11.97 loss:0.030 grdn:1.122 lr:2.5e-06 updt_s:0.780 data_s:0.017
75
+ INFO 2025-11-27 11:28:44 celerate.py:303 step:51K smpl:3M ep:20K epch:11.98 loss:0.020 grdn:1.217 lr:2.5e-06 updt_s:0.751 data_s:0.017
76
+ INFO 2025-11-27 11:28:53 celerate.py:303 step:51K smpl:3M ep:20K epch:11.98 loss:0.014 grdn:1.147 lr:2.5e-06 updt_s:0.766 data_s:0.019
77
+ INFO 2025-11-27 11:29:01 celerate.py:303 step:51K smpl:3M ep:20K epch:11.98 loss:0.037 grdn:1.148 lr:2.5e-06 updt_s:0.778 data_s:0.018
78
+ INFO 2025-11-27 11:29:10 celerate.py:303 step:51K smpl:3M ep:20K epch:11.98 loss:0.012 grdn:1.184 lr:2.5e-06 updt_s:0.757 data_s:0.018
79
+ INFO 2025-11-27 11:29:18 celerate.py:303 step:51K smpl:3M ep:20K epch:11.99 loss:0.017 grdn:1.115 lr:2.5e-06 updt_s:0.763 data_s:0.017
80
+ INFO 2025-11-27 11:29:27 celerate.py:303 step:51K smpl:3M ep:20K epch:11.99 loss:0.019 grdn:1.155 lr:2.5e-06 updt_s:0.765 data_s:0.017
81
+ INFO 2025-11-27 11:29:35 celerate.py:303 step:51K smpl:3M ep:20K epch:11.99 loss:0.019 grdn:1.188 lr:2.5e-06 updt_s:0.779 data_s:0.017
82
+ INFO 2025-11-27 11:29:44 celerate.py:303 step:51K smpl:3M ep:20K epch:11.99 loss:0.022 grdn:1.104 lr:2.5e-06 updt_s:0.782 data_s:0.019
wandb/offline-run-20251127_112044-lizvmqey/files/requirements.txt ADDED
@@ -0,0 +1,286 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ PySocks==1.7.1
2
+ beautifulsoup4==4.14.2
3
+ gdown==5.2.0
4
+ soupsieve==2.8
5
+ Farama-Notifications==0.0.4
6
+ PyOpenGL==3.1.0
7
+ PySocks==1.7.1
8
+ Send2Trash==1.8.3
9
+ absl-py==2.3.1
10
+ accelerate==1.11.0
11
+ aiohappyeyeballs==2.6.1
12
+ aiohttp==3.13.2
13
+ aiosignal==1.4.0
14
+ annotated-types==0.7.0
15
+ antlr4-python3-runtime==4.8
16
+ anyio==4.11.0
17
+ argon2-cffi==25.1.0
18
+ argon2-cffi-bindings==25.1.0
19
+ arrow==1.4.0
20
+ asciitree==0.3.3
21
+ asttokens==3.0.0
22
+ astunparse==1.6.3
23
+ async-lru==2.0.5
24
+ async-timeout==5.0.1
25
+ attrs==25.4.0
26
+ av==16.0.1
27
+ babel==2.17.0
28
+ bddl==3.6.0
29
+ beautifulsoup4==4.14.2
30
+ bleach==6.3.0
31
+ blinker==1.9.0
32
+ calvin_env==0.0.1
33
+ certifi==2025.10.5
34
+ cffi==2.0.0
35
+ charset-normalizer==3.4.4
36
+ click==8.3.0
37
+ cloudpickle==3.1.1
38
+ cmake==4.1.2
39
+ colorlog==6.10.1
40
+ comm==0.2.3
41
+ contourpy==1.3.2
42
+ cycler==0.12.1
43
+ datasets==3.0.0
44
+ debugpy==1.8.17
45
+ decorator==4.4.2
46
+ deepdiff==8.6.1
47
+ defusedxml==0.7.1
48
+ diffusers==0.35.2
49
+ dill==0.3.8
50
+ docopt==0.6.2
51
+ docstring_parser==0.17.0
52
+ draccus==0.10.0
53
+ easydict==1.13
54
+ einops==0.8.1
55
+ etils==1.13.0
56
+ evdev==1.9.2
57
+ exceptiongroup==1.3.0
58
+ executing==2.2.1
59
+ fasteners==0.20
60
+ fastjsonschema==2.21.2
61
+ filelock==3.20.0
62
+ Flask==3.1.2
63
+ flatbuffers==25.9.23
64
+ fonttools==4.60.1
65
+ fqdn==1.5.1
66
+ freetype-py==2.5.1
67
+ frozenlist==1.8.0
68
+ fsspec==2024.6.1
69
+ future==1.0.0
70
+ gast==0.6.0
71
+ gdown==5.2.0
72
+ gitdb==4.0.12
73
+ GitPython==3.1.45
74
+ glfw==2.10.0
75
+ google-pasta==0.2.0
76
+ grpcio==1.76.0
77
+ gym==0.26.2
78
+ gym-notices==0.1.0
79
+ gymnasium==0.29.1
80
+ h11==0.16.0
81
+ h5py==3.15.1
82
+ hf_transfer==0.1.9
83
+ hf-xet==1.2.0
84
+ httpcore==1.0.9
85
+ httpx==0.28.1
86
+ huggingface-hub==0.36.0
87
+ hydra-colorlog==1.2.0
88
+ hydra-core==1.1.1
89
+ idna==3.11
90
+ imageio==2.37.0
91
+ imageio-ffmpeg==0.6.0
92
+ importlib_metadata==8.7.0
93
+ importlib_resources==6.5.2
94
+ iniconfig==2.3.0
95
+ inquirerpy==0.3.4
96
+ ipykernel==7.1.0
97
+ ipython==8.37.0
98
+ iso8601==2.1.0
99
+ isoduration==20.11.0
100
+ itsdangerous==2.2.0
101
+ jedi==0.19.2
102
+ Jinja2==3.1.6
103
+ joblib==1.5.2
104
+ json5==0.12.1
105
+ jsonlines==4.0.0
106
+ jsonpointer==3.0.0
107
+ jsonschema==4.25.1
108
+ jsonschema-specifications==2025.9.1
109
+ jupyter_client==8.6.3
110
+ jupyter_core==5.9.1
111
+ jupyter-events==0.12.0
112
+ jupyter-lsp==2.3.0
113
+ jupyter_server==2.17.0
114
+ jupyter_server_terminals==0.5.3
115
+ jupyterlab==4.4.10
116
+ jupyterlab_pygments==0.3.0
117
+ jupyterlab_server==2.28.0
118
+ jupytext==1.18.1
119
+ keras==3.12.0
120
+ kiwisolver==1.4.9
121
+ lark==1.3.1
122
+ lerobot==0.1.0
123
+ libclang==18.1.1
124
+ lightning-utilities==0.15.2
125
+ llvmlite==0.45.1
126
+ lxml==6.0.2
127
+ Markdown==3.10
128
+ markdown-it-py==4.0.0
129
+ MarkupSafe==3.0.3
130
+ matplotlib==3.10.7
131
+ matplotlib-inline==0.2.1
132
+ mdit-py-plugins==0.5.0
133
+ mdurl==0.1.2
134
+ mergedeep==1.3.4
135
+ mistune==3.1.4
136
+ ml_dtypes==0.5.4
137
+ moviepy==1.0.3
138
+ mpmath==1.3.0
139
+ mujoco==3.3.7
140
+ multidict==6.7.0
141
+ multiprocess==0.70.16
142
+ mypy_extensions==1.1.0
143
+ namex==0.1.0
144
+ narwhals==2.12.0
145
+ nbclient==0.10.2
146
+ nbconvert==7.16.6
147
+ nbformat==5.10.4
148
+ nest-asyncio==1.6.0
149
+ networkx==3.4.2
150
+ nltk==3.9.2
151
+ notebook_shim==0.2.4
152
+ num2words==0.5.14
153
+ numba==0.62.1
154
+ numcodecs==0.13.1
155
+ numpy==2.2.6
156
+ numpy-quaternion==2024.0.12
157
+ nvidia-cublas-cu12==12.8.4.1
158
+ nvidia-cuda-cupti-cu12==12.8.90
159
+ nvidia-cuda-nvrtc-cu12==12.8.93
160
+ nvidia-cuda-runtime-cu12==12.8.90
161
+ nvidia-cudnn-cu12==9.10.2.21
162
+ nvidia-cufft-cu12==11.3.3.83
163
+ nvidia-cufile-cu12==1.13.1.3
164
+ nvidia-curand-cu12==10.3.9.90
165
+ nvidia-cusolver-cu12==11.7.3.90
166
+ nvidia-cusparse-cu12==12.5.8.93
167
+ nvidia-cusparselt-cu12==0.7.1
168
+ nvidia-ml-py==13.580.82
169
+ nvidia-nccl-cu12==2.27.5
170
+ nvidia-nvjitlink-cu12==12.8.93
171
+ nvidia-nvshmem-cu12==3.3.20
172
+ nvidia-nvtx-cu12==12.8.90
173
+ nvitop==1.5.3
174
+ omegaconf==2.1.2
175
+ opencv-python==4.12.0.88
176
+ opencv-python-headless==4.12.0.88
177
+ opt_einsum==3.4.0
178
+ optree==0.18.0
179
+ orderly-set==5.5.0
180
+ overrides==7.7.0
181
+ packaging==25.0
182
+ pandas==2.3.3
183
+ pandocfilters==1.5.1
184
+ parso==0.8.5
185
+ pexpect==4.9.0
186
+ pfzy==0.3.4
187
+ pillow==12.0.0
188
+ pip==25.2
189
+ platformdirs==4.5.0
190
+ plotly==6.5.0
191
+ pluggy==1.6.0
192
+ proglog==0.1.12
193
+ prometheus_client==0.23.1
194
+ prompt_toolkit==3.0.52
195
+ propcache==0.4.1
196
+ protobuf==6.33.0
197
+ psutil==7.1.2
198
+ ptyprocess==0.7.0
199
+ pure_eval==0.2.3
200
+ pyarrow==22.0.0
201
+ pybullet==3.2.7
202
+ pycollada==0.6
203
+ pycparser==2.23
204
+ pydantic==2.12.3
205
+ pydantic_core==2.41.4
206
+ pyglet==2.1.11
207
+ Pygments==2.19.2
208
+ pyhash==0.9.3
209
+ pymunk==6.11.1
210
+ pynput==1.8.1
211
+ pyparsing==3.2.5
212
+ pyrender==0.1.45
213
+ pytest==8.4.2
214
+ python-dateutil==2.9.0.post0
215
+ python-dotenv==1.2.1
216
+ python-json-logger==4.0.0
217
+ python-xlib==0.33
218
+ pytorch-lightning==2.5.6
219
+ pytz==2025.2
220
+ PyYAML==6.0.3
221
+ pyyaml-include==1.4.1
222
+ pyzmq==27.1.0
223
+ referencing==0.37.0
224
+ regex==2025.10.23
225
+ requests==2.32.5
226
+ rerun-sdk==0.26.2
227
+ rfc3339-validator==0.1.4
228
+ rfc3986-validator==0.1.1
229
+ rfc3987-syntax==1.1.0
230
+ rich==14.2.0
231
+ robosuite==1.4.0
232
+ rpds-py==0.28.0
233
+ safetensors==0.6.2
234
+ scipy==1.15.3
235
+ sentry-sdk==2.43.0
236
+ serial==0.0.97
237
+ setuptools==57.5.0
238
+ shtab==1.7.2
239
+ six==1.17.0
240
+ smmap==5.0.2
241
+ sniffio==1.3.1
242
+ soupsieve==2.8
243
+ stack-data==0.6.3
244
+ sympy==1.14.0
245
+ tensorboard==2.20.0
246
+ tensorboard-data-server==0.7.2
247
+ tensorflow==2.20.0
248
+ termcolor==3.2.0
249
+ terminado==0.18.1
250
+ tinycss2==1.4.0
251
+ tokenizers==0.22.1
252
+ toml==0.10.2
253
+ tomli==2.3.0
254
+ torch==2.9.0
255
+ torchcodec==0.8.1
256
+ torchmetrics==1.8.2
257
+ torchvision==0.24.0
258
+ tornado==6.5.2
259
+ tqdm==4.67.1
260
+ traitlets==5.14.3
261
+ transformers==4.57.1
262
+ trimesh==4.9.0
263
+ triton==3.5.0
264
+ typeguard==4.4.4
265
+ typing_extensions==4.15.0
266
+ typing-inspect==0.9.0
267
+ typing-inspection==0.4.2
268
+ tyro==0.9.35
269
+ tzdata==2025.2
270
+ urdfpy==0.0.22
271
+ uri-template==1.3.0
272
+ urllib3==2.5.0
273
+ wandb==0.22.3
274
+ wcwidth==0.2.14
275
+ webcolors==25.10.0
276
+ webencodings==0.5.1
277
+ websocket-client==1.9.0
278
+ Werkzeug==3.1.3
279
+ wheel==0.45.1
280
+ wrapt==2.0.1
281
+ xxhash==3.6.0
282
+ yarl==1.22.0
283
+ zarr==2.18.3
284
+ zipp==3.23.0
285
+ Calvin==0.0.1
286
+ tacto==0.0.3
wandb/offline-run-20251127_112044-lizvmqey/files/wandb-metadata.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"os": "Linux-4.18.0-553.74.1.el8_10.x86_64-x86_64-with-glibc2.28", "python": "CPython 3.10.19", "started_at": "2025-11-27T10:20:44.939230Z", "args": ["--resume=true", "--output_dir=/projects/extern/kisski/kisski-umg-fairpact-2/dir.project/VLA/binh/Smolvla_capstone_project/outputs/train/2025-11-25/16-33-59_libero_100%_tlc_v4_process_mask_neighbor_neg_add_linear_different_0.4tlc_base", "--config_path=/projects/extern/kisski/kisski-umg-fairpact-2/dir.project/VLA/binh/Smolvla_capstone_project/outputs/train/2025-11-25/16-33-59_libero_100%_tlc_v4_process_mask_neighbor_neg_add_linear_different_0.4tlc_base/checkpoints/last/pretrained_model/train_config.json"], "program": "/mnt/vast-kisski/projects/kisski-umg-fairpact-2/VLA/binh/Smolvla_capstone_project/lerobot/scripts/train_accelerate.py", "code_path": "lerobot/scripts/train_accelerate.py", "code_path_local": "lerobot/scripts/train_accelerate.py", "git": {"remote_url": "git@github-binh:jibby2803/Smolvla_capstone_project.git", "commit": "5e71e3b01e75c173c975f3aab819927a265c360a"}, "root": "/projects/extern/kisski/kisski-umg-fairpact-2/dir.project/VLA/binh/Smolvla_capstone_project/outputs/train/2025-11-25/16-33-59_libero_100%_tlc_v4_process_mask_neighbor_neg_add_linear_different_0.4tlc_base", "host": "ggpu183", "executable": "/projects/extern/kisski/kisski-umg-fairpact-2/dir.project/miniconda3/envs/smol/bin/python3.10", "cpu_count": 64, "cpu_count_logical": 128, "disk": {"/": {"total": "270465433600", "used": "7152394240"}}, "memory": {"total": "540930867200"}, "slurm": {"mpi_type": "pmi2", "job_id": "11851085"}, "writer_id": "qnrpx9krjcsw5zeotwtgsw7vqa4lj8ft"}
wandb/offline-run-20251127_112044-lizvmqey/files/wandb-summary.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"_runtime": 539.236888844, "_timestamp": 1764239375.9905393, "train/episodes": 20096.25323246319, "train/epochs": 11.990604553975652, "train/grad_norm": 1.1879278302192688, "train/losses_after_forward": {"values": [73326, 13844, 6634, 3639, 2015, 1204, 666, 393, 245, 167, 107, 69, 31, 23, 9, 9, 2, 6, 5, 1, 3, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1], "bins": [7.72937269744034e-13, 0.9368996024131775, 1.873799204826355, 2.8106987476348877, 3.74759840965271, 4.684497833251953, 5.621397495269775, 6.558297157287598, 7.49519681930542, 8.432096481323242, 9.368995666503906, 10.305895805358887, 11.24279499053955, 12.179695129394531, 13.116594314575195, 14.053494453430176, 14.99039363861084, 15.927292823791504, 16.864192962646484, 17.80109214782715, 18.737991333007812, 19.67489242553711, 20.611791610717773, 21.548690795898438, 22.4855899810791, 23.422489166259766, 24.359390258789062, 25.296289443969727, 26.23318862915039, 27.170087814331055, 28.10698890686035, 29.043888092041016, 29.98078727722168], "_type": "histogram"}, "train/loss": 0.20997808873653412, "train/lr": 2.5e-06, "train/samples": 3236480, "train/update_s": 0.779058738425374, "train/tlc_loss": -0.39999255537986755, "train/total_loss": 0.009981811046600342, "train/l1_infer_loss": 0.12626828253269196, "_step": 50570, "train/steps": 50570, "train/dataloading_s": 0.017474299389868976, "train/losses_after_rm_padding": {"values": [19399, 1649, 567, 290, 166, 108, 66, 39, 31, 25, 12, 12, 10, 1, 3, 2, 2, 3, 0, 5, 2, 2, 1, 0, 0, 2, 2, 0, 0, 0, 0, 1], "bins": [5.912070832891914e-11, 0.40790247917175293, 0.8158049583435059, 1.2237074375152588, 1.6316099166870117, 2.0395123958587646, 2.4474148750305176, 2.8553173542022705, 3.2632198333740234, 3.6711223125457764, 4.079024791717529, 4.486927032470703, 4.894829750061035, 5.302732467651367, 5.710634708404541, 6.118536949157715, 6.526439666748047, 6.934342384338379, 7.342244625091553, 7.750146865844727, 8.158049583435059, 8.56595230102539, 8.973854064941406, 9.381756782531738, 9.78965950012207, 10.197562217712402, 10.605464935302734, 11.01336669921875, 11.421269416809082, 11.829172134399414, 12.23707389831543, 12.644976615905762, 13.052879333496094], "_type": "histogram"}}
wandb/offline-run-20251127_112044-lizvmqey/logs/debug-internal.log ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"time":"2025-11-27T11:20:45.378937954+01:00","level":"INFO","msg":"stream: starting","core version":"0.22.3"}
2
+ {"time":"2025-11-27T11:20:45.409919704+01:00","level":"ERROR","msg":"monitor: failed to initialize GPU resource: monitor: could not create portfile"}
3
+ {"time":"2025-11-27T11:20:45.425921086+01:00","level":"WARN","msg":"featurechecker: GraphQL client is nil, skipping feature loading"}
4
+ {"time":"2025-11-27T11:20:45.425965495+01:00","level":"INFO","msg":"stream: created new stream","id":"lizvmqey"}
5
+ {"time":"2025-11-27T11:20:45.425988095+01:00","level":"INFO","msg":"handler: started","stream_id":"lizvmqey"}
6
+ {"time":"2025-11-27T11:20:45.428918285+01:00","level":"INFO","msg":"stream: started","id":"lizvmqey"}
7
+ {"time":"2025-11-27T11:20:45.428917715+01:00","level":"INFO","msg":"sender: started","stream_id":"lizvmqey"}
8
+ {"time":"2025-11-27T11:20:45.428956845+01:00","level":"INFO","msg":"writer: started","stream_id":"lizvmqey"}
9
+ {"time":"2025-11-27T11:20:45.429853649+01:00","level":"WARN","msg":"runupserter: server does not expand metric globs but the x_server_side_expand_glob_metrics setting is set; ignoring"}
10
+ {"time":"2025-11-27T14:51:55.122025249+01:00","level":"INFO","msg":"stream: closing","id":"lizvmqey"}
11
+ {"time":"2025-11-27T14:51:55.125975233+01:00","level":"INFO","msg":"handler: closed","stream_id":"lizvmqey"}
12
+ {"time":"2025-11-27T14:51:55.127619953+01:00","level":"INFO","msg":"sender: closed","stream_id":"lizvmqey"}
13
+ {"time":"2025-11-27T14:51:55.127656972+01:00","level":"INFO","msg":"stream: closed","id":"lizvmqey"}
wandb/offline-run-20251127_112044-lizvmqey/logs/debug.log ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 2025-11-27 11:20:44,979 INFO MainThread:1938939 [wandb_setup.py:_flush():81] Current SDK version is 0.22.3
2
+ 2025-11-27 11:20:44,979 INFO MainThread:1938939 [wandb_setup.py:_flush():81] Configure stats pid to 1938939
3
+ 2025-11-27 11:20:44,979 INFO MainThread:1938939 [wandb_setup.py:_flush():81] Loading settings from /user/hominhduy.nguyen/u15271/.config/wandb/settings
4
+ 2025-11-27 11:20:44,979 INFO MainThread:1938939 [wandb_setup.py:_flush():81] Loading settings from /mnt/vast-kisski/projects/kisski-umg-fairpact-2/VLA/binh/Smolvla_capstone_project/wandb/settings
5
+ 2025-11-27 11:20:44,979 INFO MainThread:1938939 [wandb_setup.py:_flush():81] Loading settings from environment variables
6
+ 2025-11-27 11:20:44,979 INFO MainThread:1938939 [wandb_init.py:setup_run_log_directory():706] Logging user logs to /projects/extern/kisski/kisski-umg-fairpact-2/dir.project/VLA/binh/Smolvla_capstone_project/outputs/train/2025-11-25/16-33-59_libero_100%_tlc_v4_process_mask_neighbor_neg_add_linear_different_0.4tlc_base/wandb/offline-run-20251127_112044-lizvmqey/logs/debug.log
7
+ 2025-11-27 11:20:44,979 INFO MainThread:1938939 [wandb_init.py:setup_run_log_directory():707] Logging internal logs to /projects/extern/kisski/kisski-umg-fairpact-2/dir.project/VLA/binh/Smolvla_capstone_project/outputs/train/2025-11-25/16-33-59_libero_100%_tlc_v4_process_mask_neighbor_neg_add_linear_different_0.4tlc_base/wandb/offline-run-20251127_112044-lizvmqey/logs/debug-internal.log
8
+ 2025-11-27 11:20:44,979 INFO MainThread:1938939 [wandb_init.py:init():833] calling init triggers
9
+ 2025-11-27 11:20:44,979 INFO MainThread:1938939 [wandb_init.py:init():838] wandb.init called with sweep_config: {}
10
+ config: {'dataset': {'repo_id': '.', 'root': '/mnt/vast-kisski/projects/kisski-umg-fairpact-2/VLA/LIBERO/merged_libero_scale_100_mask_depth_noops_lerobot', 'episodes': None, 'image_transforms': {'enable': True, 'max_num_transforms': 3, 'random_order': False, 'image_tfs': {'hue': {'weight': 1.0, 'type': 'ColorJitter', 'kwargs': {'hue': [-0.05, 0.05]}}, 'contrast': {'weight': 1.0, 'type': 'ColorJitter', 'kwargs': {'contrast': [0.8, 1.2]}}, 'sharpness': {'weight': 1.0, 'type': 'SharpnessJitter', 'kwargs': {'sharpness': [0.5, 1.5]}}, 'brightness': {'weight': 1.0, 'type': 'ColorJitter', 'kwargs': {'brightness': [0.8, 1.2]}}, 'saturation': {'weight': 1.0, 'type': 'ColorJitter', 'kwargs': {'saturation': [0.5, 1.5]}}, 'crop_resize': {'weight': 1.0, 'type': 'RandomResizedCrop', 'kwargs': {'size': [256, 256], 'ratio': [1, 1], 'scale': [0.9, 0.95]}}, 'rotate': {'weight': 1.0, 'type': 'RandomRotate', 'kwargs': {'degrees': [-5, 5]}}}, 'wrist_tfs': {'hue': {'weight': 1.0, 'type': 'ColorJitter', 'kwargs': {'hue': [-0.05, 0.05]}}, 'contrast': {'weight': 1.0, 'type': 'ColorJitter', 'kwargs': {'contrast': [0.8, 1.2]}}, 'sharpness': {'weight': 1.0, 'type': 'SharpnessJitter', 'kwargs': {'sharpness': [0.5, 1.5]}}, 'brightness': {'weight': 1.0, 'type': 'ColorJitter', 'kwargs': {'brightness': [0.8, 1.2]}}, 'saturation': {'weight': 1.0, 'type': 'ColorJitter', 'kwargs': {'saturation': [0.5, 1.5]}}}}, 'revision': None, 'use_imagenet_stats': True, 'video_backend': 'torchcodec', 'vqa_data_path': None}, 'env': None, 'policy': {'type': 'smolvla', 'n_obs_steps': 1, 'normalization_mapping': {'VISUAL': <NormalizationMode.IDENTITY: 'IDENTITY'>, 'STATE': <NormalizationMode.MEAN_STD: 'MEAN_STD'>, 'ACTION': <NormalizationMode.MEAN_STD: 'MEAN_STD'>}, 'input_features': {'observation.images.image': {'type': <FeatureType.VISUAL: 'VISUAL'>, 'shape': [3, 256, 256]}, 'observation.images.wrist_image': {'type': <FeatureType.VISUAL: 'VISUAL'>, 'shape': [3, 256, 256]}, 'observation.images.image_mask': {'type': <FeatureType.VISUAL: 'VISUAL'>, 'shape': [3, 256, 256]}, 'observation.images.wrist_mask': {'type': <FeatureType.VISUAL: 'VISUAL'>, 'shape': [3, 256, 256]}, 'observation.images.object_of_interest_mask': {'type': <FeatureType.VISUAL: 'VISUAL'>, 'shape': [3, 256, 256]}, 'observation.images.object_of_interest_wrist_mask': {'type': <FeatureType.VISUAL: 'VISUAL'>, 'shape': [3, 256, 256]}, 'observation.state': {'type': <FeatureType.STATE: 'STATE'>, 'shape': [8]}, 'observation.states.ee_state': {'type': <FeatureType.STATE: 'STATE'>, 'shape': [6]}, 'observation.states.joint_state': {'type': <FeatureType.STATE: 'STATE'>, 'shape': [7]}, 'observation.states.gripper_state': {'type': <FeatureType.STATE: 'STATE'>, 'shape': [2]}}, 'output_features': {'action': {'type': <FeatureType.ACTION: 'ACTION'>, 'shape': [7]}}, 'device': 'cuda', 'use_amp': False, 'gradient_accumulation_steps': 1, 'chunk_size': 50, 'n_action_steps': 50, 'max_state_dim': 32, 'max_action_dim': 32, 'resize_imgs_with_padding': [512, 512], 'empty_cameras': 0, 'adapt_to_pi_aloha': False, 'use_delta_joint_actions_aloha': False, 'tokenizer_max_length': 48, 'num_steps': 10, 'use_cache': True, 'freeze_vision_encoder': True, 'train_expert_only': False, 'train_state_proj': True, 'optimizer_lr': 0.0001, 'optimizer_betas': [0.9, 0.95], 'optimizer_eps': 1e-08, 'optimizer_weight_decay': 1e-10, 'optimizer_grad_clip_norm': 10.0, 'scheduler_warmup_steps': 1000, 'scheduler_decay_steps': 30000, 'scheduler_decay_lr': 2.5e-06, 'vlm_model_name': 'HuggingFaceTB/SmolVLM2-500M-Video-Instruct', 'load_vlm_weights': True, 'add_image_special_tokens': False, 'attention_mode': 'cross_attn', 'prefix_length': 0, 'pad_language_to': 'max_length', 'num_expert_layers': 0, 'num_vlm_layers': 16, 'self_attn_every_n_layers': 2, 'expert_width_multiplier': 0.75, 'min_period': 0.004, 'max_period': 4.0}, 'output_dir': '/projects/extern/kisski/kisski-umg-fairpact-2/dir.project/VLA/binh/Smolvla_capstone_project/outputs/train/2025-11-25/16-33-59_libero_100%_tlc_v4_process_mask_neighbor_neg_add_linear_different_0.4tlc_base', 'job_name': 'libero_100%_tlc_v4_process_mask_neighbor_neg_add_linear_different_0.4tlc_base', 'resume': True, 'seed': 42, 'num_workers': 8, 'batch_size': 64, 'steps': 100000, 'eval_freq': 20000, 'log_freq': 10, 'save_checkpoint': True, 'save_freq': 10000, 'use_policy_training_preset': True, 'optimizer': {'type': 'adamw', 'lr': 0.0001, 'weight_decay': 1e-10, 'grad_clip_norm': 10.0, 'betas': [0.9, 0.95], 'eps': 1e-08}, 'scheduler': {'type': 'cosine_decay_with_warmup', 'num_warmup_steps': 1000, 'num_decay_steps': 30000, 'peak_lr': 0.0001, 'decay_lr': 2.5e-06}, 'eval': {'n_episodes': 50, 'batch_size': 50, 'use_async_envs': False}, 'wandb': {'enable': True, 'disable_artifact': True, 'project': 'smolvla', 'entity': 'Robotics_VLA', 'notes': None, 'run_id': None, 'mode': 'offline'}, '_wandb': {}}
11
+ 2025-11-27 11:20:44,979 INFO MainThread:1938939 [wandb_init.py:init():881] starting backend
12
+ 2025-11-27 11:20:45,217 INFO MainThread:1938939 [wandb_init.py:init():884] sending inform_init request
13
+ 2025-11-27 11:20:45,227 INFO MainThread:1938939 [wandb_init.py:init():892] backend started and connected
14
+ 2025-11-27 11:20:45,229 INFO MainThread:1938939 [wandb_init.py:init():962] updated telemetry
15
+ 2025-11-27 11:20:45,248 INFO MainThread:1938939 [wandb_init.py:init():986] communicating run to backend with 90.0 second timeout
16
+ 2025-11-27 11:20:45,432 INFO MainThread:1938939 [wandb_init.py:init():1033] starting run threads in backend
17
+ 2025-11-27 11:20:45,788 INFO MainThread:1938939 [wandb_run.py:_console_start():2506] atexit reg
18
+ 2025-11-27 11:20:45,788 INFO MainThread:1938939 [wandb_run.py:_redirect():2354] redirect: wrap_raw
19
+ 2025-11-27 11:20:45,789 INFO MainThread:1938939 [wandb_run.py:_redirect():2423] Wrapping output streams.
20
+ 2025-11-27 11:20:45,789 INFO MainThread:1938939 [wandb_run.py:_redirect():2446] Redirects installed.
21
+ 2025-11-27 11:20:45,800 INFO MainThread:1938939 [wandb_init.py:init():1073] run started, returning control to user process
22
+ 2025-11-27 14:51:55,121 INFO wandb-AsyncioManager-main:1938939 [service_client.py:_forward_responses():80] Reached EOF.
23
+ 2025-11-27 14:51:55,122 INFO wandb-AsyncioManager-main:1938939 [mailbox.py:close():137] Closing mailbox, abandoning 0 handles.
24
+ 2025-11-27 14:51:55,307 ERROR wandb-AsyncioManager-main:1938939 [asyncio_manager.py:fn_wrap_exceptions():183] Uncaught exception in run_soon callback.
25
+ Traceback (most recent call last):
26
+ File "/projects/extern/kisski/kisski-umg-fairpact-2/dir.project/miniconda3/envs/smol/lib/python3.10/site-packages/wandb/sdk/lib/asyncio_manager.py", line 181, in fn_wrap_exceptions
27
+ await fn()
28
+ File "/projects/extern/kisski/kisski-umg-fairpact-2/dir.project/miniconda3/envs/smol/lib/python3.10/site-packages/wandb/sdk/lib/service/service_client.py", line 38, in publish
29
+ await self._send_server_request(request)
30
+ File "/projects/extern/kisski/kisski-umg-fairpact-2/dir.project/miniconda3/envs/smol/lib/python3.10/site-packages/wandb/sdk/lib/service/service_client.py", line 64, in _send_server_request
31
+ await self._writer.drain()
32
+ File "/projects/extern/kisski/kisski-umg-fairpact-2/dir.project/miniconda3/envs/smol/lib/python3.10/asyncio/streams.py", line 371, in drain
33
+ await self._protocol._drain_helper()
34
+ File "/projects/extern/kisski/kisski-umg-fairpact-2/dir.project/miniconda3/envs/smol/lib/python3.10/asyncio/streams.py", line 167, in _drain_helper
35
+ raise ConnectionResetError('Connection lost')
36
+ ConnectionResetError: Connection lost
37
+ 2025-11-27 14:51:55,322 ERROR wandb-AsyncioManager-main:1938939 [asyncio_manager.py:fn_wrap_exceptions():183] Uncaught exception in run_soon callback.
38
+ Traceback (most recent call last):
39
+ File "/projects/extern/kisski/kisski-umg-fairpact-2/dir.project/miniconda3/envs/smol/lib/python3.10/site-packages/wandb/sdk/lib/asyncio_manager.py", line 181, in fn_wrap_exceptions
40
+ await fn()
41
+ File "/projects/extern/kisski/kisski-umg-fairpact-2/dir.project/miniconda3/envs/smol/lib/python3.10/site-packages/wandb/sdk/lib/service/service_client.py", line 38, in publish
42
+ await self._send_server_request(request)
43
+ File "/projects/extern/kisski/kisski-umg-fairpact-2/dir.project/miniconda3/envs/smol/lib/python3.10/site-packages/wandb/sdk/lib/service/service_client.py", line 64, in _send_server_request
44
+ await self._writer.drain()
45
+ File "/projects/extern/kisski/kisski-umg-fairpact-2/dir.project/miniconda3/envs/smol/lib/python3.10/asyncio/streams.py", line 371, in drain
46
+ await self._protocol._drain_helper()
47
+ File "/projects/extern/kisski/kisski-umg-fairpact-2/dir.project/miniconda3/envs/smol/lib/python3.10/asyncio/streams.py", line 167, in _drain_helper
48
+ raise ConnectionResetError('Connection lost')
49
+ ConnectionResetError: Connection lost
wandb/offline-run-20251127_112044-lizvmqey/run-lizvmqey.wandb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db437b68b8edb0942bdd080811711afe74a368e7dacc00559976a204e7990ea6
3
+ size 7305887