Upload checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins
Browse files- checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins/wandb/offline-run-20260118_210409-checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins-run0/files/output.log +17 -17
- checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins/wandb/offline-run-20260119_053756-checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins-run0/files/output.log +53 -85
- checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins/wandb/offline-run-20260122_153153-checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins-run0/files/output.log +165 -165
checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins/wandb/offline-run-20260118_210409-checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins-run0/files/output.log
CHANGED
|
@@ -1145,6 +1145,9 @@ wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/
|
|
| 1145 |
[[34m2026-01-18 23:17:06[39m] (step=0000964) Train Loss mse: 0.0000, Train Loss ce: 0.0772, Train Steps/Sec: 0.13,
|
| 1146 |
[[34m2026-01-18 23:17:14[39m] (step=0000965) Train Loss mse: 0.0000, Train Loss ce: 0.0769, Train Steps/Sec: 0.13,
|
| 1147 |
[[34m2026-01-18 23:17:21[39m] (step=0000966) Train Loss mse: 0.0000, Train Loss ce: 0.0778, Train Steps/Sec: 0.13,
|
|
|
|
|
|
|
|
|
|
| 1148 |
base_dir is /dev/shm/models/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins_step1000
|
| 1149 |
Preparing Dataset vlm_gym_jigsaw_swap_celoss_no_mse_evalonce/vlm_gym_jigsaw_swap_val
|
| 1150 |
[eval debug] first 3 batch fingerprints:
|
|
@@ -1166,9 +1169,6 @@ Preparing Dataset vlm_gym_jigsaw_swap_celoss_no_mse_evalonce/vlm_gym_jigsaw_swap
|
|
| 1166 |
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 1167 |
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 1168 |
ce_avg: 0.1431104689836502, mse_avg: 0.0
|
| 1169 |
-
[[34m2026-01-18 23:17:29[39m] (step=0000967) Train Loss mse: 0.0000, Train Loss ce: 0.0770, Train Steps/Sec: 0.13,
|
| 1170 |
-
[[34m2026-01-18 23:17:37[39m] (step=0000968) Train Loss mse: 0.0000, Train Loss ce: 0.0793, Train Steps/Sec: 0.13,
|
| 1171 |
-
[[34m2026-01-18 23:17:45[39m] (step=0000969) Train Loss mse: 0.0000, Train Loss ce: 0.0751, Train Steps/Sec: 0.12,
|
| 1172 |
[[34m2026-01-18 23:17:53[39m] (step=0000970) Train Loss mse: 0.0000, Train Loss ce: 0.0761, Train Steps/Sec: 0.13,
|
| 1173 |
[[34m2026-01-18 23:18:01[39m] (step=0000971) Train Loss mse: 0.0000, Train Loss ce: 0.0746, Train Steps/Sec: 0.13,
|
| 1174 |
[[34m2026-01-18 23:18:08[39m] (step=0000972) Train Loss mse: 0.0000, Train Loss ce: 0.0773, Train Steps/Sec: 0.13,
|
|
@@ -2472,20 +2472,6 @@ ce_avg: 0.1431104689836502, mse_avg: 0.0
|
|
| 2472 |
[[34m2026-01-19 02:08:28[39m] (step=0002270) Train Loss mse: 0.0000, Train Loss ce: 0.0758, Train Steps/Sec: 0.13,
|
| 2473 |
[[34m2026-01-19 02:08:36[39m] (step=0002271) Train Loss mse: 0.0000, Train Loss ce: 0.0735, Train Steps/Sec: 0.12,
|
| 2474 |
[[34m2026-01-19 02:08:44[39m] (step=0002272) Train Loss mse: 0.0000, Train Loss ce: 0.0754, Train Steps/Sec: 0.13,
|
| 2475 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins_step2500
|
| 2476 |
-
Preparing Dataset vlm_gym_jigsaw_swap_celoss_no_mse_evalonce/vlm_gym_jigsaw_swap_val
|
| 2477 |
-
[eval debug] first 3 batch fingerprints:
|
| 2478 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 2479 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 2480 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 2481 |
-
ce_avg: 0.15242451429367065, mse_avg: 0.0
|
| 2482 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins_step3000
|
| 2483 |
-
Preparing Dataset vlm_gym_jigsaw_swap_celoss_no_mse_evalonce/vlm_gym_jigsaw_swap_val
|
| 2484 |
-
[eval debug] first 3 batch fingerprints:
|
| 2485 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 2486 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 2487 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 2488 |
-
ce_avg: 0.07420127838850021, mse_avg: 0.0
|
| 2489 |
[[34m2026-01-19 02:08:52[39m] (step=0002273) Train Loss mse: 0.0000, Train Loss ce: 0.0747, Train Steps/Sec: 0.13,
|
| 2490 |
[[34m2026-01-19 02:09:00[39m] (step=0002274) Train Loss mse: 0.0000, Train Loss ce: 0.0752, Train Steps/Sec: 0.13,
|
| 2491 |
[[34m2026-01-19 02:09:08[39m] (step=0002275) Train Loss mse: 0.0000, Train Loss ce: 0.0736, Train Steps/Sec: 0.13,
|
|
@@ -2616,6 +2602,20 @@ ce_avg: 0.07420127838850021, mse_avg: 0.0
|
|
| 2616 |
[[34m2026-01-19 02:25:33[39m] (step=0002400) Train Loss mse: 0.0000, Train Loss ce: 0.0746, Train Steps/Sec: 0.13,
|
| 2617 |
[[34m2026-01-19 02:25:41[39m] (step=0002401) Train Loss mse: 0.0000, Train Loss ce: 0.0732, Train Steps/Sec: 0.13,
|
| 2618 |
[[34m2026-01-19 02:25:49[39m] (step=0002402) Train Loss mse: 0.0000, Train Loss ce: 0.0755, Train Steps/Sec: 0.13,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2619 |
[[34m2026-01-19 02:25:57[39m] (step=0002403) Train Loss mse: 0.0000, Train Loss ce: 0.0755, Train Steps/Sec: 0.13,
|
| 2620 |
[[34m2026-01-19 02:26:05[39m] (step=0002404) Train Loss mse: 0.0000, Train Loss ce: 0.0755, Train Steps/Sec: 0.13,
|
| 2621 |
[[34m2026-01-19 02:26:13[39m] (step=0002405) Train Loss mse: 0.0000, Train Loss ce: 0.0739, Train Steps/Sec: 0.13,
|
|
|
|
| 1145 |
[[34m2026-01-18 23:17:06[39m] (step=0000964) Train Loss mse: 0.0000, Train Loss ce: 0.0772, Train Steps/Sec: 0.13,
|
| 1146 |
[[34m2026-01-18 23:17:14[39m] (step=0000965) Train Loss mse: 0.0000, Train Loss ce: 0.0769, Train Steps/Sec: 0.13,
|
| 1147 |
[[34m2026-01-18 23:17:21[39m] (step=0000966) Train Loss mse: 0.0000, Train Loss ce: 0.0778, Train Steps/Sec: 0.13,
|
| 1148 |
+
[[34m2026-01-18 23:17:29[39m] (step=0000967) Train Loss mse: 0.0000, Train Loss ce: 0.0770, Train Steps/Sec: 0.13,
|
| 1149 |
+
[[34m2026-01-18 23:17:37[39m] (step=0000968) Train Loss mse: 0.0000, Train Loss ce: 0.0793, Train Steps/Sec: 0.13,
|
| 1150 |
+
[[34m2026-01-18 23:17:45[39m] (step=0000969) Train Loss mse: 0.0000, Train Loss ce: 0.0751, Train Steps/Sec: 0.12,
|
| 1151 |
base_dir is /dev/shm/models/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins_step1000
|
| 1152 |
Preparing Dataset vlm_gym_jigsaw_swap_celoss_no_mse_evalonce/vlm_gym_jigsaw_swap_val
|
| 1153 |
[eval debug] first 3 batch fingerprints:
|
|
|
|
| 1169 |
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 1170 |
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 1171 |
ce_avg: 0.1431104689836502, mse_avg: 0.0
|
|
|
|
|
|
|
|
|
|
| 1172 |
[[34m2026-01-18 23:17:53[39m] (step=0000970) Train Loss mse: 0.0000, Train Loss ce: 0.0761, Train Steps/Sec: 0.13,
|
| 1173 |
[[34m2026-01-18 23:18:01[39m] (step=0000971) Train Loss mse: 0.0000, Train Loss ce: 0.0746, Train Steps/Sec: 0.13,
|
| 1174 |
[[34m2026-01-18 23:18:08[39m] (step=0000972) Train Loss mse: 0.0000, Train Loss ce: 0.0773, Train Steps/Sec: 0.13,
|
|
|
|
| 2472 |
[[34m2026-01-19 02:08:28[39m] (step=0002270) Train Loss mse: 0.0000, Train Loss ce: 0.0758, Train Steps/Sec: 0.13,
|
| 2473 |
[[34m2026-01-19 02:08:36[39m] (step=0002271) Train Loss mse: 0.0000, Train Loss ce: 0.0735, Train Steps/Sec: 0.12,
|
| 2474 |
[[34m2026-01-19 02:08:44[39m] (step=0002272) Train Loss mse: 0.0000, Train Loss ce: 0.0754, Train Steps/Sec: 0.13,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2475 |
[[34m2026-01-19 02:08:52[39m] (step=0002273) Train Loss mse: 0.0000, Train Loss ce: 0.0747, Train Steps/Sec: 0.13,
|
| 2476 |
[[34m2026-01-19 02:09:00[39m] (step=0002274) Train Loss mse: 0.0000, Train Loss ce: 0.0752, Train Steps/Sec: 0.13,
|
| 2477 |
[[34m2026-01-19 02:09:08[39m] (step=0002275) Train Loss mse: 0.0000, Train Loss ce: 0.0736, Train Steps/Sec: 0.13,
|
|
|
|
| 2602 |
[[34m2026-01-19 02:25:33[39m] (step=0002400) Train Loss mse: 0.0000, Train Loss ce: 0.0746, Train Steps/Sec: 0.13,
|
| 2603 |
[[34m2026-01-19 02:25:41[39m] (step=0002401) Train Loss mse: 0.0000, Train Loss ce: 0.0732, Train Steps/Sec: 0.13,
|
| 2604 |
[[34m2026-01-19 02:25:49[39m] (step=0002402) Train Loss mse: 0.0000, Train Loss ce: 0.0755, Train Steps/Sec: 0.13,
|
| 2605 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins_step2500
|
| 2606 |
+
Preparing Dataset vlm_gym_jigsaw_swap_celoss_no_mse_evalonce/vlm_gym_jigsaw_swap_val
|
| 2607 |
+
[eval debug] first 3 batch fingerprints:
|
| 2608 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 2609 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 2610 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 2611 |
+
ce_avg: 0.15242451429367065, mse_avg: 0.0
|
| 2612 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins_step3000
|
| 2613 |
+
Preparing Dataset vlm_gym_jigsaw_swap_celoss_no_mse_evalonce/vlm_gym_jigsaw_swap_val
|
| 2614 |
+
[eval debug] first 3 batch fingerprints:
|
| 2615 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 2616 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 2617 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 2618 |
+
ce_avg: 0.07420127838850021, mse_avg: 0.0
|
| 2619 |
[[34m2026-01-19 02:25:57[39m] (step=0002403) Train Loss mse: 0.0000, Train Loss ce: 0.0755, Train Steps/Sec: 0.13,
|
| 2620 |
[[34m2026-01-19 02:26:05[39m] (step=0002404) Train Loss mse: 0.0000, Train Loss ce: 0.0755, Train Steps/Sec: 0.13,
|
| 2621 |
[[34m2026-01-19 02:26:13[39m] (step=0002405) Train Loss mse: 0.0000, Train Loss ce: 0.0739, Train Steps/Sec: 0.13,
|
checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins/wandb/offline-run-20260119_053756-checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins-run0/files/output.log
CHANGED
|
@@ -1213,6 +1213,20 @@ wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/
|
|
| 1213 |
[[34m2026-01-19 08:05:14[39m] (step=0001025) Train Loss mse: 0.0000, Train Loss ce: 0.0710, Train Steps/Sec: 0.12,
|
| 1214 |
[[34m2026-01-19 08:05:22[39m] (step=0001026) Train Loss mse: 0.0000, Train Loss ce: 0.0719, Train Steps/Sec: 0.12,
|
| 1215 |
[[34m2026-01-19 08:05:31[39m] (step=0001027) Train Loss mse: 0.0000, Train Loss ce: 0.0701, Train Steps/Sec: 0.12,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1216 |
[[34m2026-01-19 08:05:39[39m] (step=0001028) Train Loss mse: 0.0000, Train Loss ce: 0.0715, Train Steps/Sec: 0.12,
|
| 1217 |
[[34m2026-01-19 08:05:47[39m] (step=0001029) Train Loss mse: 0.0000, Train Loss ce: 0.0707, Train Steps/Sec: 0.13,
|
| 1218 |
[[34m2026-01-19 08:05:55[39m] (step=0001030) Train Loss mse: 0.0000, Train Loss ce: 0.0728, Train Steps/Sec: 0.12,
|
|
@@ -1229,20 +1243,6 @@ wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/
|
|
| 1229 |
[[34m2026-01-19 08:07:25[39m] (step=0001041) Train Loss mse: 0.0000, Train Loss ce: 0.0711, Train Steps/Sec: 0.12,
|
| 1230 |
[[34m2026-01-19 08:07:34[39m] (step=0001042) Train Loss mse: 0.0000, Train Loss ce: 0.0709, Train Steps/Sec: 0.12,
|
| 1231 |
[[34m2026-01-19 08:07:42[39m] (step=0001043) Train Loss mse: 0.0000, Train Loss ce: 0.0700, Train Steps/Sec: 0.12,
|
| 1232 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins_step1500
|
| 1233 |
-
Preparing Dataset vlm_gym_jigsaw_swap_celoss_no_mse_evalonce/vlm_gym_jigsaw_swap_val
|
| 1234 |
-
[eval debug] first 3 batch fingerprints:
|
| 1235 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 1236 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 1237 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 1238 |
-
ce_avg: 0.1267234981060028, mse_avg: 0.0
|
| 1239 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins_step2000
|
| 1240 |
-
Preparing Dataset vlm_gym_jigsaw_swap_celoss_no_mse_evalonce/vlm_gym_jigsaw_swap_val
|
| 1241 |
-
[eval debug] first 3 batch fingerprints:
|
| 1242 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 1243 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 1244 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 1245 |
-
ce_avg: 0.1393442451953888, mse_avg: 0.0
|
| 1246 |
[[34m2026-01-19 08:07:50[39m] (step=0001044) Train Loss mse: 0.0000, Train Loss ce: 0.0709, Train Steps/Sec: 0.12,
|
| 1247 |
[[34m2026-01-19 08:07:59[39m] (step=0001045) Train Loss mse: 0.0000, Train Loss ce: 0.0697, Train Steps/Sec: 0.12,
|
| 1248 |
[[34m2026-01-19 08:08:07[39m] (step=0001046) Train Loss mse: 0.0000, Train Loss ce: 0.0718, Train Steps/Sec: 0.12,
|
|
@@ -2593,27 +2593,6 @@ ce_avg: 0.1393442451953888, mse_avg: 0.0
|
|
| 2593 |
[[34m2026-01-19 11:12:34[39m] (step=0002391) Train Loss mse: 0.0000, Train Loss ce: 0.0692, Train Steps/Sec: 0.12,
|
| 2594 |
[[34m2026-01-19 11:12:43[39m] (step=0002392) Train Loss mse: 0.0000, Train Loss ce: 0.0692, Train Steps/Sec: 0.12,
|
| 2595 |
[[34m2026-01-19 11:12:51[39m] (step=0002393) Train Loss mse: 0.0000, Train Loss ce: 0.0686, Train Steps/Sec: 0.12,
|
| 2596 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins_step2500
|
| 2597 |
-
Preparing Dataset vlm_gym_jigsaw_swap_celoss_no_mse_evalonce/vlm_gym_jigsaw_swap_val
|
| 2598 |
-
[eval debug] first 3 batch fingerprints:
|
| 2599 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 2600 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 2601 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 2602 |
-
ce_avg: 0.14870576560497284, mse_avg: 0.0
|
| 2603 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins_step3000
|
| 2604 |
-
Preparing Dataset vlm_gym_jigsaw_swap_celoss_no_mse_evalonce/vlm_gym_jigsaw_swap_val
|
| 2605 |
-
[eval debug] first 3 batch fingerprints:
|
| 2606 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 2607 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 2608 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 2609 |
-
ce_avg: 0.07034339010715485, mse_avg: 0.0
|
| 2610 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins_step3500
|
| 2611 |
-
Preparing Dataset vlm_gym_jigsaw_swap_celoss_no_mse_evalonce/vlm_gym_jigsaw_swap_val
|
| 2612 |
-
[eval debug] first 3 batch fingerprints:
|
| 2613 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 2614 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 2615 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 2616 |
-
ce_avg: 0.07060375064611435, mse_avg: 0.0
|
| 2617 |
[[34m2026-01-19 11:12:59[39m] (step=0002394) Train Loss mse: 0.0000, Train Loss ce: 0.0692, Train Steps/Sec: 0.12,
|
| 2618 |
[[34m2026-01-19 11:13:07[39m] (step=0002395) Train Loss mse: 0.0000, Train Loss ce: 0.0696, Train Steps/Sec: 0.12,
|
| 2619 |
[[34m2026-01-19 11:13:16[39m] (step=0002396) Train Loss mse: 0.0000, Train Loss ce: 0.0689, Train Steps/Sec: 0.12,
|
|
@@ -2664,6 +2643,27 @@ ce_avg: 0.07060375064611435, mse_avg: 0.0
|
|
| 2664 |
[[34m2026-01-19 11:19:26[39m] (step=0002441) Train Loss mse: 0.0000, Train Loss ce: 0.0691, Train Steps/Sec: 0.12,
|
| 2665 |
[[34m2026-01-19 11:19:35[39m] (step=0002442) Train Loss mse: 0.0000, Train Loss ce: 0.0688, Train Steps/Sec: 0.12,
|
| 2666 |
[[34m2026-01-19 11:19:43[39m] (step=0002443) Train Loss mse: 0.0000, Train Loss ce: 0.0692, Train Steps/Sec: 0.12,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2667 |
[[34m2026-01-19 11:19:51[39m] (step=0002444) Train Loss mse: 0.0000, Train Loss ce: 0.0689, Train Steps/Sec: 0.12,
|
| 2668 |
[[34m2026-01-19 11:19:59[39m] (step=0002445) Train Loss mse: 0.0000, Train Loss ce: 0.0693, Train Steps/Sec: 0.12,
|
| 2669 |
[[34m2026-01-19 11:20:08[39m] (step=0002446) Train Loss mse: 0.0000, Train Loss ce: 0.0680, Train Steps/Sec: 0.12,
|
|
@@ -3624,6 +3624,17 @@ ce_avg: 0.07060375064611435, mse_avg: 0.0
|
|
| 3624 |
[[34m2026-01-19 13:33:33[39m] (step=0003398) Train Loss mse: 0.0000, Train Loss ce: 0.0686, Train Steps/Sec: 0.12,
|
| 3625 |
[[34m2026-01-19 13:33:42[39m] (step=0003399) Train Loss mse: 0.0000, Train Loss ce: 0.0682, Train Steps/Sec: 0.11,
|
| 3626 |
[[34m2026-01-19 13:33:50[39m] (step=0003400) Train Loss mse: 0.0000, Train Loss ce: 0.0678, Train Steps/Sec: 0.12,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3627 |
base_dir is /dev/shm/models/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins_step4000
|
| 3628 |
Preparing Dataset vlm_gym_jigsaw_swap_celoss_no_mse_evalonce/vlm_gym_jigsaw_swap_val
|
| 3629 |
[eval debug] first 3 batch fingerprints:
|
|
@@ -3638,49 +3649,6 @@ Preparing Dataset vlm_gym_jigsaw_swap_celoss_no_mse_evalonce/vlm_gym_jigsaw_swap
|
|
| 3638 |
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 3639 |
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 3640 |
ce_avg: 0.07141611725091934, mse_avg: 0.0
|
| 3641 |
-
[[34m2026-01-19 13:33:58[39m] (step=0003401) Train Loss mse: 0.0000, Train Loss ce: 0.0673, Train Steps/Sec: 0.12,
|
| 3642 |
-
[[34m2026-01-19 13:34:06[39m] (step=0003402) Train Loss mse: 0.0000, Train Loss ce: 0.0683, Train Steps/Sec: 0.12,
|
| 3643 |
-
[[34m2026-01-19 13:34:14[39m] (step=0003403) Train Loss mse: 0.0000, Train Loss ce: 0.0683, Train Steps/Sec: 0.12,
|
| 3644 |
-
[[34m2026-01-19 13:34:23[39m] (step=0003404) Train Loss mse: 0.0000, Train Loss ce: 0.0686, Train Steps/Sec: 0.12,
|
| 3645 |
-
[[34m2026-01-19 13:34:31[39m] (step=0003405) Train Loss mse: 0.0000, Train Loss ce: 0.0682, Train Steps/Sec: 0.12,
|
| 3646 |
-
[[34m2026-01-19 13:34:39[39m] (step=0003406) Train Loss mse: 0.0000, Train Loss ce: 0.0702, Train Steps/Sec: 0.12,
|
| 3647 |
-
[[34m2026-01-19 13:34:47[39m] (step=0003407) Train Loss mse: 0.0000, Train Loss ce: 0.0688, Train Steps/Sec: 0.12,
|
| 3648 |
-
[[34m2026-01-19 13:34:55[39m] (step=0003408) Train Loss mse: 0.0000, Train Loss ce: 0.0679, Train Steps/Sec: 0.12,
|
| 3649 |
-
[[34m2026-01-19 13:35:03[39m] (step=0003409) Train Loss mse: 0.0000, Train Loss ce: 0.0682, Train Steps/Sec: 0.13,
|
| 3650 |
-
[[34m2026-01-19 13:35:12[39m] (step=0003410) Train Loss mse: 0.0000, Train Loss ce: 0.0680, Train Steps/Sec: 0.12,
|
| 3651 |
-
[[34m2026-01-19 13:35:20[39m] (step=0003411) Train Loss mse: 0.0000, Train Loss ce: 0.0678, Train Steps/Sec: 0.12,
|
| 3652 |
-
[[34m2026-01-19 13:35:28[39m] (step=0003412) Train Loss mse: 0.0000, Train Loss ce: 0.0677, Train Steps/Sec: 0.12,
|
| 3653 |
-
[[34m2026-01-19 13:35:36[39m] (step=0003413) Train Loss mse: 0.0000, Train Loss ce: 0.0674, Train Steps/Sec: 0.12,
|
| 3654 |
-
[[34m2026-01-19 13:35:45[39m] (step=0003414) Train Loss mse: 0.0000, Train Loss ce: 0.0678, Train Steps/Sec: 0.12,
|
| 3655 |
-
[[34m2026-01-19 13:35:53[39m] (step=0003415) Train Loss mse: 0.0000, Train Loss ce: 0.0684, Train Steps/Sec: 0.12,
|
| 3656 |
-
[[34m2026-01-19 13:36:01[39m] (step=0003416) Train Loss mse: 0.0000, Train Loss ce: 0.0669, Train Steps/Sec: 0.12,
|
| 3657 |
-
[[34m2026-01-19 13:36:10[39m] (step=0003417) Train Loss mse: 0.0000, Train Loss ce: 0.0692, Train Steps/Sec: 0.12,
|
| 3658 |
-
[[34m2026-01-19 13:36:18[39m] (step=0003418) Train Loss mse: 0.0000, Train Loss ce: 0.0684, Train Steps/Sec: 0.12,
|
| 3659 |
-
[[34m2026-01-19 13:36:26[39m] (step=0003419) Train Loss mse: 0.0000, Train Loss ce: 0.0675, Train Steps/Sec: 0.12,
|
| 3660 |
-
[[34m2026-01-19 13:36:34[39m] (step=0003420) Train Loss mse: 0.0000, Train Loss ce: 0.0674, Train Steps/Sec: 0.12,
|
| 3661 |
-
[[34m2026-01-19 13:36:43[39m] (step=0003421) Train Loss mse: 0.0000, Train Loss ce: 0.0682, Train Steps/Sec: 0.12,
|
| 3662 |
-
[[34m2026-01-19 13:36:51[39m] (step=0003422) Train Loss mse: 0.0000, Train Loss ce: 0.0681, Train Steps/Sec: 0.12,
|
| 3663 |
-
[[34m2026-01-19 13:36:59[39m] (step=0003423) Train Loss mse: 0.0000, Train Loss ce: 0.0686, Train Steps/Sec: 0.12,
|
| 3664 |
-
[[34m2026-01-19 13:37:07[39m] (step=0003424) Train Loss mse: 0.0000, Train Loss ce: 0.0695, Train Steps/Sec: 0.12,
|
| 3665 |
-
[[34m2026-01-19 13:37:15[39m] (step=0003425) Train Loss mse: 0.0000, Train Loss ce: 0.0680, Train Steps/Sec: 0.12,
|
| 3666 |
-
[[34m2026-01-19 13:37:23[39m] (step=0003426) Train Loss mse: 0.0000, Train Loss ce: 0.0681, Train Steps/Sec: 0.12,
|
| 3667 |
-
[[34m2026-01-19 13:37:32[39m] (step=0003427) Train Loss mse: 0.0000, Train Loss ce: 0.0674, Train Steps/Sec: 0.12,
|
| 3668 |
-
[[34m2026-01-19 13:37:40[39m] (step=0003428) Train Loss mse: 0.0000, Train Loss ce: 0.0667, Train Steps/Sec: 0.12,
|
| 3669 |
-
[[34m2026-01-19 13:37:48[39m] (step=0003429) Train Loss mse: 0.0000, Train Loss ce: 0.0689, Train Steps/Sec: 0.12,
|
| 3670 |
-
[[34m2026-01-19 13:37:56[39m] (step=0003430) Train Loss mse: 0.0000, Train Loss ce: 0.0693, Train Steps/Sec: 0.13,
|
| 3671 |
-
[[34m2026-01-19 13:38:04[39m] (step=0003431) Train Loss mse: 0.0000, Train Loss ce: 0.0684, Train Steps/Sec: 0.12,
|
| 3672 |
-
[[34m2026-01-19 13:38:13[39m] (step=0003432) Train Loss mse: 0.0000, Train Loss ce: 0.0675, Train Steps/Sec: 0.12,
|
| 3673 |
-
[[34m2026-01-19 13:38:21[39m] (step=0003433) Train Loss mse: 0.0000, Train Loss ce: 0.0680, Train Steps/Sec: 0.12,
|
| 3674 |
-
[[34m2026-01-19 13:38:29[39m] (step=0003434) Train Loss mse: 0.0000, Train Loss ce: 0.0679, Train Steps/Sec: 0.12,
|
| 3675 |
-
[[34m2026-01-19 13:38:37[39m] (step=0003435) Train Loss mse: 0.0000, Train Loss ce: 0.0683, Train Steps/Sec: 0.12,
|
| 3676 |
-
[[34m2026-01-19 13:38:46[39m] (step=0003436) Train Loss mse: 0.0000, Train Loss ce: 0.0670, Train Steps/Sec: 0.12,
|
| 3677 |
-
[[34m2026-01-19 13:38:54[39m] (step=0003437) Train Loss mse: 0.0000, Train Loss ce: 0.0668, Train Steps/Sec: 0.12,
|
| 3678 |
-
[[34m2026-01-19 13:39:03[39m] (step=0003438) Train Loss mse: 0.0000, Train Loss ce: 0.0673, Train Steps/Sec: 0.12,
|
| 3679 |
-
[[34m2026-01-19 13:39:11[39m] (step=0003439) Train Loss mse: 0.0000, Train Loss ce: 0.0693, Train Steps/Sec: 0.12,
|
| 3680 |
-
[[34m2026-01-19 13:39:19[39m] (step=0003440) Train Loss mse: 0.0000, Train Loss ce: 0.0674, Train Steps/Sec: 0.12,
|
| 3681 |
-
[[34m2026-01-19 13:39:27[39m] (step=0003441) Train Loss mse: 0.0000, Train Loss ce: 0.0682, Train Steps/Sec: 0.12,
|
| 3682 |
-
[[34m2026-01-19 13:39:35[39m] (step=0003442) Train Loss mse: 0.0000, Train Loss ce: 0.0674, Train Steps/Sec: 0.12,
|
| 3683 |
-
[[34m2026-01-19 13:39:43[39m] (step=0003443) Train Loss mse: 0.0000, Train Loss ce: 0.0679, Train Steps/Sec: 0.12,
|
| 3684 |
[[34m2026-01-19 13:39:52[39m] (step=0003444) Train Loss mse: 0.0000, Train Loss ce: 0.0683, Train Steps/Sec: 0.12,
|
| 3685 |
[[34m2026-01-19 13:40:00[39m] (step=0003445) Train Loss mse: 0.0000, Train Loss ce: 0.0679, Train Steps/Sec: 0.12,
|
| 3686 |
[[34m2026-01-19 13:40:08[39m] (step=0003446) Train Loss mse: 0.0000, Train Loss ce: 0.0681, Train Steps/Sec: 0.12,
|
|
@@ -4996,6 +4964,13 @@ ce_avg: 0.07141611725091934, mse_avg: 0.0
|
|
| 4996 |
[[34m2026-01-19 16:40:06[39m] (step=0004756) Train Loss mse: 0.0000, Train Loss ce: 0.0654, Train Steps/Sec: 0.12,
|
| 4997 |
[[34m2026-01-19 16:40:14[39m] (step=0004757) Train Loss mse: 0.0000, Train Loss ce: 0.0667, Train Steps/Sec: 0.12,
|
| 4998 |
[[34m2026-01-19 16:40:22[39m] (step=0004758) Train Loss mse: 0.0000, Train Loss ce: 0.0655, Train Steps/Sec: 0.12,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4999 |
[[34m2026-01-19 16:40:31[39m] (step=0004759) Train Loss mse: 0.0000, Train Loss ce: 0.0645, Train Steps/Sec: 0.12,
|
| 5000 |
[[34m2026-01-19 16:40:39[39m] (step=0004760) Train Loss mse: 0.0000, Train Loss ce: 0.0656, Train Steps/Sec: 0.12,
|
| 5001 |
[[34m2026-01-19 16:40:47[39m] (step=0004761) Train Loss mse: 0.0000, Train Loss ce: 0.0654, Train Steps/Sec: 0.12,
|
|
@@ -5018,13 +4993,6 @@ ce_avg: 0.07141611725091934, mse_avg: 0.0
|
|
| 5018 |
[[34m2026-01-19 16:43:06[39m] (step=0004778) Train Loss mse: 0.0000, Train Loss ce: 0.0658, Train Steps/Sec: 0.12,
|
| 5019 |
[[34m2026-01-19 16:43:14[39m] (step=0004779) Train Loss mse: 0.0000, Train Loss ce: 0.0661, Train Steps/Sec: 0.12,
|
| 5020 |
[[34m2026-01-19 16:43:22[39m] (step=0004780) Train Loss mse: 0.0000, Train Loss ce: 0.0658, Train Steps/Sec: 0.12,
|
| 5021 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins_step5000
|
| 5022 |
-
Preparing Dataset vlm_gym_jigsaw_swap_celoss_no_mse_evalonce/vlm_gym_jigsaw_swap_val
|
| 5023 |
-
[eval debug] first 3 batch fingerprints:
|
| 5024 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 5025 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 5026 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 5027 |
-
ce_avg: 0.07195709645748138, mse_avg: 0.0
|
| 5028 |
[[34m2026-01-19 16:43:31[39m] (step=0004781) Train Loss mse: 0.0000, Train Loss ce: 0.0664, Train Steps/Sec: 0.12,
|
| 5029 |
[[34m2026-01-19 16:43:39[39m] (step=0004782) Train Loss mse: 0.0000, Train Loss ce: 0.0653, Train Steps/Sec: 0.12,
|
| 5030 |
[[34m2026-01-19 16:43:47[39m] (step=0004783) Train Loss mse: 0.0000, Train Loss ce: 0.0657, Train Steps/Sec: 0.12,
|
|
|
|
| 1213 |
[[34m2026-01-19 08:05:14[39m] (step=0001025) Train Loss mse: 0.0000, Train Loss ce: 0.0710, Train Steps/Sec: 0.12,
|
| 1214 |
[[34m2026-01-19 08:05:22[39m] (step=0001026) Train Loss mse: 0.0000, Train Loss ce: 0.0719, Train Steps/Sec: 0.12,
|
| 1215 |
[[34m2026-01-19 08:05:31[39m] (step=0001027) Train Loss mse: 0.0000, Train Loss ce: 0.0701, Train Steps/Sec: 0.12,
|
| 1216 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins_step1500
|
| 1217 |
+
Preparing Dataset vlm_gym_jigsaw_swap_celoss_no_mse_evalonce/vlm_gym_jigsaw_swap_val
|
| 1218 |
+
[eval debug] first 3 batch fingerprints:
|
| 1219 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 1220 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 1221 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 1222 |
+
ce_avg: 0.1267234981060028, mse_avg: 0.0
|
| 1223 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins_step2000
|
| 1224 |
+
Preparing Dataset vlm_gym_jigsaw_swap_celoss_no_mse_evalonce/vlm_gym_jigsaw_swap_val
|
| 1225 |
+
[eval debug] first 3 batch fingerprints:
|
| 1226 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 1227 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 1228 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 1229 |
+
ce_avg: 0.1393442451953888, mse_avg: 0.0
|
| 1230 |
[[34m2026-01-19 08:05:39[39m] (step=0001028) Train Loss mse: 0.0000, Train Loss ce: 0.0715, Train Steps/Sec: 0.12,
|
| 1231 |
[[34m2026-01-19 08:05:47[39m] (step=0001029) Train Loss mse: 0.0000, Train Loss ce: 0.0707, Train Steps/Sec: 0.13,
|
| 1232 |
[[34m2026-01-19 08:05:55[39m] (step=0001030) Train Loss mse: 0.0000, Train Loss ce: 0.0728, Train Steps/Sec: 0.12,
|
|
|
|
| 1243 |
[[34m2026-01-19 08:07:25[39m] (step=0001041) Train Loss mse: 0.0000, Train Loss ce: 0.0711, Train Steps/Sec: 0.12,
|
| 1244 |
[[34m2026-01-19 08:07:34[39m] (step=0001042) Train Loss mse: 0.0000, Train Loss ce: 0.0709, Train Steps/Sec: 0.12,
|
| 1245 |
[[34m2026-01-19 08:07:42[39m] (step=0001043) Train Loss mse: 0.0000, Train Loss ce: 0.0700, Train Steps/Sec: 0.12,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1246 |
[[34m2026-01-19 08:07:50[39m] (step=0001044) Train Loss mse: 0.0000, Train Loss ce: 0.0709, Train Steps/Sec: 0.12,
|
| 1247 |
[[34m2026-01-19 08:07:59[39m] (step=0001045) Train Loss mse: 0.0000, Train Loss ce: 0.0697, Train Steps/Sec: 0.12,
|
| 1248 |
[[34m2026-01-19 08:08:07[39m] (step=0001046) Train Loss mse: 0.0000, Train Loss ce: 0.0718, Train Steps/Sec: 0.12,
|
|
|
|
| 2593 |
[[34m2026-01-19 11:12:34[39m] (step=0002391) Train Loss mse: 0.0000, Train Loss ce: 0.0692, Train Steps/Sec: 0.12,
|
| 2594 |
[[34m2026-01-19 11:12:43[39m] (step=0002392) Train Loss mse: 0.0000, Train Loss ce: 0.0692, Train Steps/Sec: 0.12,
|
| 2595 |
[[34m2026-01-19 11:12:51[39m] (step=0002393) Train Loss mse: 0.0000, Train Loss ce: 0.0686, Train Steps/Sec: 0.12,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2596 |
[[34m2026-01-19 11:12:59[39m] (step=0002394) Train Loss mse: 0.0000, Train Loss ce: 0.0692, Train Steps/Sec: 0.12,
|
| 2597 |
[[34m2026-01-19 11:13:07[39m] (step=0002395) Train Loss mse: 0.0000, Train Loss ce: 0.0696, Train Steps/Sec: 0.12,
|
| 2598 |
[[34m2026-01-19 11:13:16[39m] (step=0002396) Train Loss mse: 0.0000, Train Loss ce: 0.0689, Train Steps/Sec: 0.12,
|
|
|
|
| 2643 |
[[34m2026-01-19 11:19:26[39m] (step=0002441) Train Loss mse: 0.0000, Train Loss ce: 0.0691, Train Steps/Sec: 0.12,
|
| 2644 |
[[34m2026-01-19 11:19:35[39m] (step=0002442) Train Loss mse: 0.0000, Train Loss ce: 0.0688, Train Steps/Sec: 0.12,
|
| 2645 |
[[34m2026-01-19 11:19:43[39m] (step=0002443) Train Loss mse: 0.0000, Train Loss ce: 0.0692, Train Steps/Sec: 0.12,
|
| 2646 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins_step2500
|
| 2647 |
+
Preparing Dataset vlm_gym_jigsaw_swap_celoss_no_mse_evalonce/vlm_gym_jigsaw_swap_val
|
| 2648 |
+
[eval debug] first 3 batch fingerprints:
|
| 2649 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 2650 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 2651 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 2652 |
+
ce_avg: 0.14870576560497284, mse_avg: 0.0
|
| 2653 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins_step3000
|
| 2654 |
+
Preparing Dataset vlm_gym_jigsaw_swap_celoss_no_mse_evalonce/vlm_gym_jigsaw_swap_val
|
| 2655 |
+
[eval debug] first 3 batch fingerprints:
|
| 2656 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 2657 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 2658 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 2659 |
+
ce_avg: 0.07034339010715485, mse_avg: 0.0
|
| 2660 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins_step3500
|
| 2661 |
+
Preparing Dataset vlm_gym_jigsaw_swap_celoss_no_mse_evalonce/vlm_gym_jigsaw_swap_val
|
| 2662 |
+
[eval debug] first 3 batch fingerprints:
|
| 2663 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 2664 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 2665 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 2666 |
+
ce_avg: 0.07060375064611435, mse_avg: 0.0
|
| 2667 |
[[34m2026-01-19 11:19:51[39m] (step=0002444) Train Loss mse: 0.0000, Train Loss ce: 0.0689, Train Steps/Sec: 0.12,
|
| 2668 |
[[34m2026-01-19 11:19:59[39m] (step=0002445) Train Loss mse: 0.0000, Train Loss ce: 0.0693, Train Steps/Sec: 0.12,
|
| 2669 |
[[34m2026-01-19 11:20:08[39m] (step=0002446) Train Loss mse: 0.0000, Train Loss ce: 0.0680, Train Steps/Sec: 0.12,
|
|
|
|
| 3624 |
[[34m2026-01-19 13:33:33[39m] (step=0003398) Train Loss mse: 0.0000, Train Loss ce: 0.0686, Train Steps/Sec: 0.12,
|
| 3625 |
[[34m2026-01-19 13:33:42[39m] (step=0003399) Train Loss mse: 0.0000, Train Loss ce: 0.0682, Train Steps/Sec: 0.11,
|
| 3626 |
[[34m2026-01-19 13:33:50[39m] (step=0003400) Train Loss mse: 0.0000, Train Loss ce: 0.0678, Train Steps/Sec: 0.12,
|
| 3627 |
+
[[34m2026-01-19 13:33:58[39m] (step=0003401) Train Loss mse: 0.0000, Train Loss ce: 0.0673, Train Steps/Sec: 0.12,
|
| 3628 |
+
[[34m2026-01-19 13:34:06[39m] (step=0003402) Train Loss mse: 0.0000, Train Loss ce: 0.0683, Train Steps/Sec: 0.12,
|
| 3629 |
+
[[34m2026-01-19 13:34:14[39m] (step=0003403) Train Loss mse: 0.0000, Train Loss ce: 0.0683, Train Steps/Sec: 0.12,
|
| 3630 |
+
[[34m2026-01-19 13:34:23[39m] (step=0003404) Train Loss mse: 0.0000, Train Loss ce: 0.0686, Train Steps/Sec: 0.12,
|
| 3631 |
+
[[34m2026-01-19 13:34:31[39m] (step=0003405) Train Loss mse: 0.0000, Train Loss ce: 0.0682, Train Steps/Sec: 0.12,
|
| 3632 |
+
[[34m2026-01-19 13:34:39[39m] (step=0003406) Train Loss mse: 0.0000, Train Loss ce: 0.0702, Train Steps/Sec: 0.12,
|
| 3633 |
+
[[34m2026-01-19 13:34:47[39m] (step=0003407) Train Loss mse: 0.0000, Train Loss ce: 0.0688, Train Steps/Sec: 0.12,
|
| 3634 |
+
[[34m2026-01-19 13:34:55[39m] (step=0003408) Train Loss mse: 0.0000, Train Loss ce: 0.0679, Train Steps/Sec: 0.12,
|
| 3635 |
+
[[34m2026-01-19 13:35:03[39m] (step=0003409) Train Loss mse: 0.0000, Train Loss ce: 0.0682, Train Steps/Sec: 0.13,
|
| 3636 |
+
[[34m2026-01-19 13:35:12[39m] (step=0003410) Train Loss mse: 0.0000, Train Loss ce: 0.0680, Train Steps/Sec: 0.12,
|
| 3637 |
+
[[34m2026-01-19 13:35:20[39m] (step=0003411) Train Loss mse: 0.0000, Train Loss ce: 0.0678, Train Steps/Sec: 0.12,
|
| 3638 |
base_dir is /dev/shm/models/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins_step4000
|
| 3639 |
Preparing Dataset vlm_gym_jigsaw_swap_celoss_no_mse_evalonce/vlm_gym_jigsaw_swap_val
|
| 3640 |
[eval debug] first 3 batch fingerprints:
|
|
|
|
| 3649 |
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 3650 |
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 3651 |
ce_avg: 0.07141611725091934, mse_avg: 0.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3652 |
[[34m2026-01-19 13:39:52[39m] (step=0003444) Train Loss mse: 0.0000, Train Loss ce: 0.0683, Train Steps/Sec: 0.12,
|
| 3653 |
[[34m2026-01-19 13:40:00[39m] (step=0003445) Train Loss mse: 0.0000, Train Loss ce: 0.0679, Train Steps/Sec: 0.12,
|
| 3654 |
[[34m2026-01-19 13:40:08[39m] (step=0003446) Train Loss mse: 0.0000, Train Loss ce: 0.0681, Train Steps/Sec: 0.12,
|
|
|
|
| 4964 |
[[34m2026-01-19 16:40:06[39m] (step=0004756) Train Loss mse: 0.0000, Train Loss ce: 0.0654, Train Steps/Sec: 0.12,
|
| 4965 |
[[34m2026-01-19 16:40:14[39m] (step=0004757) Train Loss mse: 0.0000, Train Loss ce: 0.0667, Train Steps/Sec: 0.12,
|
| 4966 |
[[34m2026-01-19 16:40:22[39m] (step=0004758) Train Loss mse: 0.0000, Train Loss ce: 0.0655, Train Steps/Sec: 0.12,
|
| 4967 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins_step5000
|
| 4968 |
+
Preparing Dataset vlm_gym_jigsaw_swap_celoss_no_mse_evalonce/vlm_gym_jigsaw_swap_val
|
| 4969 |
+
[eval debug] first 3 batch fingerprints:
|
| 4970 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 4971 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 4972 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 4973 |
+
ce_avg: 0.07195709645748138, mse_avg: 0.0
|
| 4974 |
[[34m2026-01-19 16:40:31[39m] (step=0004759) Train Loss mse: 0.0000, Train Loss ce: 0.0645, Train Steps/Sec: 0.12,
|
| 4975 |
[[34m2026-01-19 16:40:39[39m] (step=0004760) Train Loss mse: 0.0000, Train Loss ce: 0.0656, Train Steps/Sec: 0.12,
|
| 4976 |
[[34m2026-01-19 16:40:47[39m] (step=0004761) Train Loss mse: 0.0000, Train Loss ce: 0.0654, Train Steps/Sec: 0.12,
|
|
|
|
| 4993 |
[[34m2026-01-19 16:43:06[39m] (step=0004778) Train Loss mse: 0.0000, Train Loss ce: 0.0658, Train Steps/Sec: 0.12,
|
| 4994 |
[[34m2026-01-19 16:43:14[39m] (step=0004779) Train Loss mse: 0.0000, Train Loss ce: 0.0661, Train Steps/Sec: 0.12,
|
| 4995 |
[[34m2026-01-19 16:43:22[39m] (step=0004780) Train Loss mse: 0.0000, Train Loss ce: 0.0658, Train Steps/Sec: 0.12,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4996 |
[[34m2026-01-19 16:43:31[39m] (step=0004781) Train Loss mse: 0.0000, Train Loss ce: 0.0664, Train Steps/Sec: 0.12,
|
| 4997 |
[[34m2026-01-19 16:43:39[39m] (step=0004782) Train Loss mse: 0.0000, Train Loss ce: 0.0653, Train Steps/Sec: 0.12,
|
| 4998 |
[[34m2026-01-19 16:43:47[39m] (step=0004783) Train Loss mse: 0.0000, Train Loss ce: 0.0657, Train Steps/Sec: 0.12,
|
checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins/wandb/offline-run-20260122_153153-checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins-run0/files/output.log
CHANGED
|
@@ -871,6 +871,129 @@ wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/
|
|
| 871 |
[[34m2026-01-22 17:18:30[39m] (step=0000860) Train Loss mse: 0.0000, Train Loss ce: 0.0712, Train Steps/Sec: 0.15,
|
| 872 |
[[34m2026-01-22 17:18:37[39m] (step=0000861) Train Loss mse: 0.0000, Train Loss ce: 0.0715, Train Steps/Sec: 0.14,
|
| 873 |
[[34m2026-01-22 17:18:44[39m] (step=0000862) Train Loss mse: 0.0000, Train Loss ce: 0.0702, Train Steps/Sec: 0.15,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 874 |
FullyShardedDataParallel(
|
| 875 |
(_fsdp_wrapped_module): Bagel(
|
| 876 |
(language_model): Qwen2ForCausalLM(
|
|
@@ -1055,129 +1178,6 @@ Preparing Dataset vlm_gym_jigsaw_swap_celoss_no_mse_evalonce/vlm_gym_jigsaw_swap
|
|
| 1055 |
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 1056 |
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 1057 |
ce_avg: 0.14597271382808685, mse_avg: 0.0
|
| 1058 |
-
[[34m2026-01-22 17:18:51[39m] (step=0000863) Train Loss mse: 0.0000, Train Loss ce: 0.0719, Train Steps/Sec: 0.14,
|
| 1059 |
-
[[34m2026-01-22 17:18:58[39m] (step=0000864) Train Loss mse: 0.0000, Train Loss ce: 0.0714, Train Steps/Sec: 0.15,
|
| 1060 |
-
[[34m2026-01-22 17:19:04[39m] (step=0000865) Train Loss mse: 0.0000, Train Loss ce: 0.0717, Train Steps/Sec: 0.15,
|
| 1061 |
-
[[34m2026-01-22 17:19:12[39m] (step=0000866) Train Loss mse: 0.0000, Train Loss ce: 0.0730, Train Steps/Sec: 0.13,
|
| 1062 |
-
[[34m2026-01-22 17:19:19[39m] (step=0000867) Train Loss mse: 0.0000, Train Loss ce: 0.0715, Train Steps/Sec: 0.15,
|
| 1063 |
-
[[34m2026-01-22 17:19:25[39m] (step=0000868) Train Loss mse: 0.0000, Train Loss ce: 0.0706, Train Steps/Sec: 0.15,
|
| 1064 |
-
[[34m2026-01-22 17:19:32[39m] (step=0000869) Train Loss mse: 0.0000, Train Loss ce: 0.0715, Train Steps/Sec: 0.15,
|
| 1065 |
-
[[34m2026-01-22 17:19:39[39m] (step=0000870) Train Loss mse: 0.0000, Train Loss ce: 0.0710, Train Steps/Sec: 0.15,
|
| 1066 |
-
[[34m2026-01-22 17:19:46[39m] (step=0000871) Train Loss mse: 0.0000, Train Loss ce: 0.0730, Train Steps/Sec: 0.15,
|
| 1067 |
-
[[34m2026-01-22 17:19:54[39m] (step=0000872) Train Loss mse: 0.0000, Train Loss ce: 0.0704, Train Steps/Sec: 0.13,
|
| 1068 |
-
[[34m2026-01-22 17:20:01[39m] (step=0000873) Train Loss mse: 0.0000, Train Loss ce: 0.0713, Train Steps/Sec: 0.14,
|
| 1069 |
-
[[34m2026-01-22 17:20:07[39m] (step=0000874) Train Loss mse: 0.0000, Train Loss ce: 0.0723, Train Steps/Sec: 0.15,
|
| 1070 |
-
[[34m2026-01-22 17:20:14[39m] (step=0000875) Train Loss mse: 0.0000, Train Loss ce: 0.0707, Train Steps/Sec: 0.15,
|
| 1071 |
-
[[34m2026-01-22 17:20:20[39m] (step=0000876) Train Loss mse: 0.0000, Train Loss ce: 0.0735, Train Steps/Sec: 0.15,
|
| 1072 |
-
[[34m2026-01-22 17:20:27[39m] (step=0000877) Train Loss mse: 0.0000, Train Loss ce: 0.0719, Train Steps/Sec: 0.15,
|
| 1073 |
-
[[34m2026-01-22 17:20:34[39m] (step=0000878) Train Loss mse: 0.0000, Train Loss ce: 0.0719, Train Steps/Sec: 0.14,
|
| 1074 |
-
[[34m2026-01-22 17:20:41[39m] (step=0000879) Train Loss mse: 0.0000, Train Loss ce: 0.0729, Train Steps/Sec: 0.13,
|
| 1075 |
-
[[34m2026-01-22 17:20:49[39m] (step=0000880) Train Loss mse: 0.0000, Train Loss ce: 0.0727, Train Steps/Sec: 0.13,
|
| 1076 |
-
[[34m2026-01-22 17:20:56[39m] (step=0000881) Train Loss mse: 0.0000, Train Loss ce: 0.0714, Train Steps/Sec: 0.14,
|
| 1077 |
-
[[34m2026-01-22 17:21:03[39m] (step=0000882) Train Loss mse: 0.0000, Train Loss ce: 0.0722, Train Steps/Sec: 0.14,
|
| 1078 |
-
[[34m2026-01-22 17:21:10[39m] (step=0000883) Train Loss mse: 0.0000, Train Loss ce: 0.0699, Train Steps/Sec: 0.14,
|
| 1079 |
-
[[34m2026-01-22 17:21:17[39m] (step=0000884) Train Loss mse: 0.0000, Train Loss ce: 0.0729, Train Steps/Sec: 0.14,
|
| 1080 |
-
[[34m2026-01-22 17:21:24[39m] (step=0000885) Train Loss mse: 0.0000, Train Loss ce: 0.0717, Train Steps/Sec: 0.14,
|
| 1081 |
-
[[34m2026-01-22 17:21:31[39m] (step=0000886) Train Loss mse: 0.0000, Train Loss ce: 0.0698, Train Steps/Sec: 0.15,
|
| 1082 |
-
[[34m2026-01-22 17:21:38[39m] (step=0000887) Train Loss mse: 0.0000, Train Loss ce: 0.0709, Train Steps/Sec: 0.15,
|
| 1083 |
-
[[34m2026-01-22 17:21:44[39m] (step=0000888) Train Loss mse: 0.0000, Train Loss ce: 0.0713, Train Steps/Sec: 0.15,
|
| 1084 |
-
[[34m2026-01-22 17:21:51[39m] (step=0000889) Train Loss mse: 0.0000, Train Loss ce: 0.0704, Train Steps/Sec: 0.15,
|
| 1085 |
-
[[34m2026-01-22 17:21:58[39m] (step=0000890) Train Loss mse: 0.0000, Train Loss ce: 0.0721, Train Steps/Sec: 0.14,
|
| 1086 |
-
[[34m2026-01-22 17:22:06[39m] (step=0000891) Train Loss mse: 0.0000, Train Loss ce: 0.0711, Train Steps/Sec: 0.13,
|
| 1087 |
-
[[34m2026-01-22 17:22:12[39m] (step=0000892) Train Loss mse: 0.0000, Train Loss ce: 0.0741, Train Steps/Sec: 0.15,
|
| 1088 |
-
[[34m2026-01-22 17:22:19[39m] (step=0000893) Train Loss mse: 0.0000, Train Loss ce: 0.0709, Train Steps/Sec: 0.15,
|
| 1089 |
-
[[34m2026-01-22 17:22:26[39m] (step=0000894) Train Loss mse: 0.0000, Train Loss ce: 0.0720, Train Steps/Sec: 0.14,
|
| 1090 |
-
[[34m2026-01-22 17:22:33[39m] (step=0000895) Train Loss mse: 0.0000, Train Loss ce: 0.0705, Train Steps/Sec: 0.15,
|
| 1091 |
-
[[34m2026-01-22 17:22:40[39m] (step=0000896) Train Loss mse: 0.0000, Train Loss ce: 0.0720, Train Steps/Sec: 0.13,
|
| 1092 |
-
[[34m2026-01-22 17:22:47[39m] (step=0000897) Train Loss mse: 0.0000, Train Loss ce: 0.0715, Train Steps/Sec: 0.15,
|
| 1093 |
-
[[34m2026-01-22 17:22:54[39m] (step=0000898) Train Loss mse: 0.0000, Train Loss ce: 0.0719, Train Steps/Sec: 0.14,
|
| 1094 |
-
[[34m2026-01-22 17:23:01[39m] (step=0000899) Train Loss mse: 0.0000, Train Loss ce: 0.0713, Train Steps/Sec: 0.15,
|
| 1095 |
-
[[34m2026-01-22 17:23:08[39m] (step=0000900) Train Loss mse: 0.0000, Train Loss ce: 0.0717, Train Steps/Sec: 0.15,
|
| 1096 |
-
[[34m2026-01-22 17:23:14[39m] (step=0000901) Train Loss mse: 0.0000, Train Loss ce: 0.0731, Train Steps/Sec: 0.15,
|
| 1097 |
-
[[34m2026-01-22 17:23:22[39m] (step=0000902) Train Loss mse: 0.0000, Train Loss ce: 0.0701, Train Steps/Sec: 0.14,
|
| 1098 |
-
[[34m2026-01-22 17:23:29[39m] (step=0000903) Train Loss mse: 0.0000, Train Loss ce: 0.0726, Train Steps/Sec: 0.14,
|
| 1099 |
-
[[34m2026-01-22 17:23:36[39m] (step=0000904) Train Loss mse: 0.0000, Train Loss ce: 0.0711, Train Steps/Sec: 0.15,
|
| 1100 |
-
[[34m2026-01-22 17:23:42[39m] (step=0000905) Train Loss mse: 0.0000, Train Loss ce: 0.0736, Train Steps/Sec: 0.15,
|
| 1101 |
-
[[34m2026-01-22 17:23:49[39m] (step=0000906) Train Loss mse: 0.0000, Train Loss ce: 0.0708, Train Steps/Sec: 0.14,
|
| 1102 |
-
[[34m2026-01-22 17:23:57[39m] (step=0000907) Train Loss mse: 0.0000, Train Loss ce: 0.0712, Train Steps/Sec: 0.14,
|
| 1103 |
-
[[34m2026-01-22 17:24:04[39m] (step=0000908) Train Loss mse: 0.0000, Train Loss ce: 0.0736, Train Steps/Sec: 0.14,
|
| 1104 |
-
[[34m2026-01-22 17:24:11[39m] (step=0000909) Train Loss mse: 0.0000, Train Loss ce: 0.0709, Train Steps/Sec: 0.14,
|
| 1105 |
-
[[34m2026-01-22 17:24:17[39m] (step=0000910) Train Loss mse: 0.0000, Train Loss ce: 0.0711, Train Steps/Sec: 0.15,
|
| 1106 |
-
[[34m2026-01-22 17:24:25[39m] (step=0000911) Train Loss mse: 0.0000, Train Loss ce: 0.0730, Train Steps/Sec: 0.13,
|
| 1107 |
-
[[34m2026-01-22 17:24:32[39m] (step=0000912) Train Loss mse: 0.0000, Train Loss ce: 0.0745, Train Steps/Sec: 0.15,
|
| 1108 |
-
[[34m2026-01-22 17:24:38[39m] (step=0000913) Train Loss mse: 0.0000, Train Loss ce: 0.0722, Train Steps/Sec: 0.15,
|
| 1109 |
-
[[34m2026-01-22 17:24:45[39m] (step=0000914) Train Loss mse: 0.0000, Train Loss ce: 0.0705, Train Steps/Sec: 0.14,
|
| 1110 |
-
[[34m2026-01-22 17:24:53[39m] (step=0000915) Train Loss mse: 0.0000, Train Loss ce: 0.0704, Train Steps/Sec: 0.14,
|
| 1111 |
-
[[34m2026-01-22 17:24:59[39m] (step=0000916) Train Loss mse: 0.0000, Train Loss ce: 0.0720, Train Steps/Sec: 0.15,
|
| 1112 |
-
[[34m2026-01-22 17:25:06[39m] (step=0000917) Train Loss mse: 0.0000, Train Loss ce: 0.0714, Train Steps/Sec: 0.15,
|
| 1113 |
-
[[34m2026-01-22 17:25:13[39m] (step=0000918) Train Loss mse: 0.0000, Train Loss ce: 0.0727, Train Steps/Sec: 0.15,
|
| 1114 |
-
[[34m2026-01-22 17:25:20[39m] (step=0000919) Train Loss mse: 0.0000, Train Loss ce: 0.0725, Train Steps/Sec: 0.14,
|
| 1115 |
-
[[34m2026-01-22 17:25:27[39m] (step=0000920) Train Loss mse: 0.0000, Train Loss ce: 0.0723, Train Steps/Sec: 0.14,
|
| 1116 |
-
[[34m2026-01-22 17:25:35[39m] (step=0000921) Train Loss mse: 0.0000, Train Loss ce: 0.0734, Train Steps/Sec: 0.14,
|
| 1117 |
-
[[34m2026-01-22 17:25:41[39m] (step=0000922) Train Loss mse: 0.0000, Train Loss ce: 0.0729, Train Steps/Sec: 0.15,
|
| 1118 |
-
[[34m2026-01-22 17:25:48[39m] (step=0000923) Train Loss mse: 0.0000, Train Loss ce: 0.0705, Train Steps/Sec: 0.15,
|
| 1119 |
-
[[34m2026-01-22 17:25:55[39m] (step=0000924) Train Loss mse: 0.0000, Train Loss ce: 0.0713, Train Steps/Sec: 0.14,
|
| 1120 |
-
[[34m2026-01-22 17:26:02[39m] (step=0000925) Train Loss mse: 0.0000, Train Loss ce: 0.0730, Train Steps/Sec: 0.15,
|
| 1121 |
-
[[34m2026-01-22 17:26:09[39m] (step=0000926) Train Loss mse: 0.0000, Train Loss ce: 0.0706, Train Steps/Sec: 0.15,
|
| 1122 |
-
[[34m2026-01-22 17:26:16[39m] (step=0000927) Train Loss mse: 0.0000, Train Loss ce: 0.0732, Train Steps/Sec: 0.14,
|
| 1123 |
-
[[34m2026-01-22 17:26:23[39m] (step=0000928) Train Loss mse: 0.0000, Train Loss ce: 0.0733, Train Steps/Sec: 0.15,
|
| 1124 |
-
[[34m2026-01-22 17:26:29[39m] (step=0000929) Train Loss mse: 0.0000, Train Loss ce: 0.0700, Train Steps/Sec: 0.15,
|
| 1125 |
-
[[34m2026-01-22 17:26:36[39m] (step=0000930) Train Loss mse: 0.0000, Train Loss ce: 0.0698, Train Steps/Sec: 0.16,
|
| 1126 |
-
[[34m2026-01-22 17:26:43[39m] (step=0000931) Train Loss mse: 0.0000, Train Loss ce: 0.0708, Train Steps/Sec: 0.15,
|
| 1127 |
-
[[34m2026-01-22 17:26:50[39m] (step=0000932) Train Loss mse: 0.0000, Train Loss ce: 0.0699, Train Steps/Sec: 0.13,
|
| 1128 |
-
[[34m2026-01-22 17:26:58[39m] (step=0000933) Train Loss mse: 0.0000, Train Loss ce: 0.0707, Train Steps/Sec: 0.13,
|
| 1129 |
-
[[34m2026-01-22 17:27:05[39m] (step=0000934) Train Loss mse: 0.0000, Train Loss ce: 0.0708, Train Steps/Sec: 0.14,
|
| 1130 |
-
[[34m2026-01-22 17:27:12[39m] (step=0000935) Train Loss mse: 0.0000, Train Loss ce: 0.0715, Train Steps/Sec: 0.14,
|
| 1131 |
-
[[34m2026-01-22 17:27:19[39m] (step=0000936) Train Loss mse: 0.0000, Train Loss ce: 0.0724, Train Steps/Sec: 0.13,
|
| 1132 |
-
[[34m2026-01-22 17:27:26[39m] (step=0000937) Train Loss mse: 0.0000, Train Loss ce: 0.0721, Train Steps/Sec: 0.15,
|
| 1133 |
-
[[34m2026-01-22 17:27:32[39m] (step=0000938) Train Loss mse: 0.0000, Train Loss ce: 0.0724, Train Steps/Sec: 0.15,
|
| 1134 |
-
[[34m2026-01-22 17:27:40[39m] (step=0000939) Train Loss mse: 0.0000, Train Loss ce: 0.0710, Train Steps/Sec: 0.14,
|
| 1135 |
-
[[34m2026-01-22 17:27:46[39m] (step=0000940) Train Loss mse: 0.0000, Train Loss ce: 0.0695, Train Steps/Sec: 0.15,
|
| 1136 |
-
[[34m2026-01-22 17:27:53[39m] (step=0000941) Train Loss mse: 0.0000, Train Loss ce: 0.0715, Train Steps/Sec: 0.14,
|
| 1137 |
-
[[34m2026-01-22 17:28:00[39m] (step=0000942) Train Loss mse: 0.0000, Train Loss ce: 0.0715, Train Steps/Sec: 0.15,
|
| 1138 |
-
[[34m2026-01-22 17:28:07[39m] (step=0000943) Train Loss mse: 0.0000, Train Loss ce: 0.0744, Train Steps/Sec: 0.15,
|
| 1139 |
-
[[34m2026-01-22 17:28:14[39m] (step=0000944) Train Loss mse: 0.0000, Train Loss ce: 0.0714, Train Steps/Sec: 0.15,
|
| 1140 |
-
[[34m2026-01-22 17:28:21[39m] (step=0000945) Train Loss mse: 0.0000, Train Loss ce: 0.0712, Train Steps/Sec: 0.14,
|
| 1141 |
-
[[34m2026-01-22 17:28:29[39m] (step=0000946) Train Loss mse: 0.0000, Train Loss ce: 0.0706, Train Steps/Sec: 0.13,
|
| 1142 |
-
[[34m2026-01-22 17:28:35[39m] (step=0000947) Train Loss mse: 0.0000, Train Loss ce: 0.0723, Train Steps/Sec: 0.15,
|
| 1143 |
-
[[34m2026-01-22 17:28:42[39m] (step=0000948) Train Loss mse: 0.0000, Train Loss ce: 0.0707, Train Steps/Sec: 0.15,
|
| 1144 |
-
[[34m2026-01-22 17:28:49[39m] (step=0000949) Train Loss mse: 0.0000, Train Loss ce: 0.0706, Train Steps/Sec: 0.14,
|
| 1145 |
-
[[34m2026-01-22 17:28:56[39m] (step=0000950) Train Loss mse: 0.0000, Train Loss ce: 0.0711, Train Steps/Sec: 0.15,
|
| 1146 |
-
[[34m2026-01-22 17:29:03[39m] (step=0000951) Train Loss mse: 0.0000, Train Loss ce: 0.0697, Train Steps/Sec: 0.14,
|
| 1147 |
-
[[34m2026-01-22 17:29:11[39m] (step=0000952) Train Loss mse: 0.0000, Train Loss ce: 0.0703, Train Steps/Sec: 0.13,
|
| 1148 |
-
[[34m2026-01-22 17:29:18[39m] (step=0000953) Train Loss mse: 0.0000, Train Loss ce: 0.0714, Train Steps/Sec: 0.13,
|
| 1149 |
-
[[34m2026-01-22 17:29:26[39m] (step=0000954) Train Loss mse: 0.0000, Train Loss ce: 0.0713, Train Steps/Sec: 0.14,
|
| 1150 |
-
[[34m2026-01-22 17:29:32[39m] (step=0000955) Train Loss mse: 0.0000, Train Loss ce: 0.0714, Train Steps/Sec: 0.15,
|
| 1151 |
-
[[34m2026-01-22 17:29:39[39m] (step=0000956) Train Loss mse: 0.0000, Train Loss ce: 0.0696, Train Steps/Sec: 0.15,
|
| 1152 |
-
[[34m2026-01-22 17:29:46[39m] (step=0000957) Train Loss mse: 0.0000, Train Loss ce: 0.0720, Train Steps/Sec: 0.14,
|
| 1153 |
-
[[34m2026-01-22 17:29:53[39m] (step=0000958) Train Loss mse: 0.0000, Train Loss ce: 0.0712, Train Steps/Sec: 0.14,
|
| 1154 |
-
[[34m2026-01-22 17:30:01[39m] (step=0000959) Train Loss mse: 0.0000, Train Loss ce: 0.0727, Train Steps/Sec: 0.13,
|
| 1155 |
-
[[34m2026-01-22 17:30:08[39m] (step=0000960) Train Loss mse: 0.0000, Train Loss ce: 0.0722, Train Steps/Sec: 0.13,
|
| 1156 |
-
[[34m2026-01-22 17:30:15[39m] (step=0000961) Train Loss mse: 0.0000, Train Loss ce: 0.0740, Train Steps/Sec: 0.15,
|
| 1157 |
-
[[34m2026-01-22 17:30:22[39m] (step=0000962) Train Loss mse: 0.0000, Train Loss ce: 0.0714, Train Steps/Sec: 0.14,
|
| 1158 |
-
[[34m2026-01-22 17:30:30[39m] (step=0000963) Train Loss mse: 0.0000, Train Loss ce: 0.0717, Train Steps/Sec: 0.13,
|
| 1159 |
-
[[34m2026-01-22 17:30:37[39m] (step=0000964) Train Loss mse: 0.0000, Train Loss ce: 0.0712, Train Steps/Sec: 0.14,
|
| 1160 |
-
[[34m2026-01-22 17:30:44[39m] (step=0000965) Train Loss mse: 0.0000, Train Loss ce: 0.0697, Train Steps/Sec: 0.14,
|
| 1161 |
-
[[34m2026-01-22 17:30:51[39m] (step=0000966) Train Loss mse: 0.0000, Train Loss ce: 0.0707, Train Steps/Sec: 0.15,
|
| 1162 |
-
[[34m2026-01-22 17:30:57[39m] (step=0000967) Train Loss mse: 0.0000, Train Loss ce: 0.0709, Train Steps/Sec: 0.15,
|
| 1163 |
-
[[34m2026-01-22 17:31:04[39m] (step=0000968) Train Loss mse: 0.0000, Train Loss ce: 0.0701, Train Steps/Sec: 0.15,
|
| 1164 |
-
[[34m2026-01-22 17:31:11[39m] (step=0000969) Train Loss mse: 0.0000, Train Loss ce: 0.0712, Train Steps/Sec: 0.14,
|
| 1165 |
-
[[34m2026-01-22 17:31:18[39m] (step=0000970) Train Loss mse: 0.0000, Train Loss ce: 0.0717, Train Steps/Sec: 0.14,
|
| 1166 |
-
[[34m2026-01-22 17:31:25[39m] (step=0000971) Train Loss mse: 0.0000, Train Loss ce: 0.0715, Train Steps/Sec: 0.15,
|
| 1167 |
-
[[34m2026-01-22 17:31:32[39m] (step=0000972) Train Loss mse: 0.0000, Train Loss ce: 0.0696, Train Steps/Sec: 0.15,
|
| 1168 |
-
[[34m2026-01-22 17:31:39[39m] (step=0000973) Train Loss mse: 0.0000, Train Loss ce: 0.0715, Train Steps/Sec: 0.14,
|
| 1169 |
-
[[34m2026-01-22 17:31:46[39m] (step=0000974) Train Loss mse: 0.0000, Train Loss ce: 0.0721, Train Steps/Sec: 0.14,
|
| 1170 |
-
[[34m2026-01-22 17:31:52[39m] (step=0000975) Train Loss mse: 0.0000, Train Loss ce: 0.0721, Train Steps/Sec: 0.15,
|
| 1171 |
-
[[34m2026-01-22 17:32:00[39m] (step=0000976) Train Loss mse: 0.0000, Train Loss ce: 0.0719, Train Steps/Sec: 0.14,
|
| 1172 |
-
[[34m2026-01-22 17:32:07[39m] (step=0000977) Train Loss mse: 0.0000, Train Loss ce: 0.0726, Train Steps/Sec: 0.14,
|
| 1173 |
-
[[34m2026-01-22 17:32:13[39m] (step=0000978) Train Loss mse: 0.0000, Train Loss ce: 0.0716, Train Steps/Sec: 0.15,
|
| 1174 |
-
[[34m2026-01-22 17:32:20[39m] (step=0000979) Train Loss mse: 0.0000, Train Loss ce: 0.0713, Train Steps/Sec: 0.15,
|
| 1175 |
-
[[34m2026-01-22 17:32:27[39m] (step=0000980) Train Loss mse: 0.0000, Train Loss ce: 0.0738, Train Steps/Sec: 0.14,
|
| 1176 |
-
[[34m2026-01-22 17:32:34[39m] (step=0000981) Train Loss mse: 0.0000, Train Loss ce: 0.0728, Train Steps/Sec: 0.14,
|
| 1177 |
-
[[34m2026-01-22 17:32:42[39m] (step=0000982) Train Loss mse: 0.0000, Train Loss ce: 0.0739, Train Steps/Sec: 0.14,
|
| 1178 |
-
[[34m2026-01-22 17:32:49[39m] (step=0000983) Train Loss mse: 0.0000, Train Loss ce: 0.0705, Train Steps/Sec: 0.14,
|
| 1179 |
-
[[34m2026-01-22 17:32:55[39m] (step=0000984) Train Loss mse: 0.0000, Train Loss ce: 0.0706, Train Steps/Sec: 0.15,
|
| 1180 |
-
[[34m2026-01-22 17:33:02[39m] (step=0000985) Train Loss mse: 0.0000, Train Loss ce: 0.0720, Train Steps/Sec: 0.14,
|
| 1181 |
[[34m2026-01-22 17:33:10[39m] (step=0000986) Train Loss mse: 0.0000, Train Loss ce: 0.0711, Train Steps/Sec: 0.14,
|
| 1182 |
[[34m2026-01-22 17:33:16[39m] (step=0000987) Train Loss mse: 0.0000, Train Loss ce: 0.0720, Train Steps/Sec: 0.16,
|
| 1183 |
[[34m2026-01-22 17:33:23[39m] (step=0000988) Train Loss mse: 0.0000, Train Loss ce: 0.0711, Train Steps/Sec: 0.15,
|
|
@@ -2426,20 +2426,6 @@ ce_avg: 0.14597271382808685, mse_avg: 0.0
|
|
| 2426 |
[[34m2026-01-22 19:57:55[39m] (step=0002231) Train Loss mse: 0.0000, Train Loss ce: 0.0706, Train Steps/Sec: 0.14,
|
| 2427 |
[[34m2026-01-22 19:58:01[39m] (step=0002232) Train Loss mse: 0.0000, Train Loss ce: 0.0705, Train Steps/Sec: 0.15,
|
| 2428 |
[[34m2026-01-22 19:58:09[39m] (step=0002233) Train Loss mse: 0.0000, Train Loss ce: 0.0713, Train Steps/Sec: 0.13,
|
| 2429 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins_step2500
|
| 2430 |
-
Preparing Dataset vlm_gym_jigsaw_swap_celoss_no_mse_evalonce/vlm_gym_jigsaw_swap_val
|
| 2431 |
-
[eval debug] first 3 batch fingerprints:
|
| 2432 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 2433 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 2434 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 2435 |
-
ce_avg: 0.1571701020002365, mse_avg: 0.0
|
| 2436 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins_step3000
|
| 2437 |
-
Preparing Dataset vlm_gym_jigsaw_swap_celoss_no_mse_evalonce/vlm_gym_jigsaw_swap_val
|
| 2438 |
-
[eval debug] first 3 batch fingerprints:
|
| 2439 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 2440 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 2441 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 2442 |
-
ce_avg: 0.0698586106300354, mse_avg: 0.0
|
| 2443 |
[[34m2026-01-22 19:58:15[39m] (step=0002234) Train Loss mse: 0.0000, Train Loss ce: 0.0691, Train Steps/Sec: 0.15,
|
| 2444 |
[[34m2026-01-22 19:58:23[39m] (step=0002235) Train Loss mse: 0.0000, Train Loss ce: 0.0701, Train Steps/Sec: 0.14,
|
| 2445 |
[[34m2026-01-22 19:58:29[39m] (step=0002236) Train Loss mse: 0.0000, Train Loss ce: 0.0688, Train Steps/Sec: 0.15,
|
|
@@ -2567,6 +2553,27 @@ ce_avg: 0.0698586106300354, mse_avg: 0.0
|
|
| 2567 |
[[34m2026-01-22 20:12:43[39m] (step=0002358) Train Loss mse: 0.0000, Train Loss ce: 0.0688, Train Steps/Sec: 0.14,
|
| 2568 |
[[34m2026-01-22 20:12:50[39m] (step=0002359) Train Loss mse: 0.0000, Train Loss ce: 0.0684, Train Steps/Sec: 0.14,
|
| 2569 |
[[34m2026-01-22 20:12:57[39m] (step=0002360) Train Loss mse: 0.0000, Train Loss ce: 0.0691, Train Steps/Sec: 0.14,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2570 |
[[34m2026-01-22 20:13:03[39m] (step=0002361) Train Loss mse: 0.0000, Train Loss ce: 0.0687, Train Steps/Sec: 0.15,
|
| 2571 |
[[34m2026-01-22 20:13:10[39m] (step=0002362) Train Loss mse: 0.0000, Train Loss ce: 0.0672, Train Steps/Sec: 0.15,
|
| 2572 |
[[34m2026-01-22 20:13:17[39m] (step=0002363) Train Loss mse: 0.0000, Train Loss ce: 0.0698, Train Steps/Sec: 0.14,
|
|
@@ -3521,6 +3528,20 @@ ce_avg: 0.0698586106300354, mse_avg: 0.0
|
|
| 3521 |
[[34m2026-01-22 22:06:04[39m] (step=0003309) Train Loss mse: 0.0000, Train Loss ce: 0.0683, Train Steps/Sec: 0.15,
|
| 3522 |
[[34m2026-01-22 22:06:11[39m] (step=0003310) Train Loss mse: 0.0000, Train Loss ce: 0.0683, Train Steps/Sec: 0.14,
|
| 3523 |
[[34m2026-01-22 22:06:18[39m] (step=0003311) Train Loss mse: 0.0000, Train Loss ce: 0.0683, Train Steps/Sec: 0.14,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3524 |
[[34m2026-01-22 22:06:26[39m] (step=0003312) Train Loss mse: 0.0000, Train Loss ce: 0.0709, Train Steps/Sec: 0.13,
|
| 3525 |
[[34m2026-01-22 22:06:33[39m] (step=0003313) Train Loss mse: 0.0000, Train Loss ce: 0.0682, Train Steps/Sec: 0.15,
|
| 3526 |
[[34m2026-01-22 22:06:39[39m] (step=0003314) Train Loss mse: 0.0000, Train Loss ce: 0.0677, Train Steps/Sec: 0.14,
|
|
@@ -3597,27 +3618,6 @@ ce_avg: 0.0698586106300354, mse_avg: 0.0
|
|
| 3597 |
[[34m2026-01-22 22:14:53[39m] (step=0003385) Train Loss mse: 0.0000, Train Loss ce: 0.0689, Train Steps/Sec: 0.15,
|
| 3598 |
[[34m2026-01-22 22:15:00[39m] (step=0003386) Train Loss mse: 0.0000, Train Loss ce: 0.0678, Train Steps/Sec: 0.15,
|
| 3599 |
[[34m2026-01-22 22:15:07[39m] (step=0003387) Train Loss mse: 0.0000, Train Loss ce: 0.0687, Train Steps/Sec: 0.15,
|
| 3600 |
-
[
|
| 3601 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins_step3500
|
| 3602 |
-
Preparing Dataset vlm_gym_jigsaw_swap_celoss_no_mse_evalonce/vlm_gym_jigsaw_swap_val
|
| 3603 |
-
[eval debug] first 3 batch fingerprints:
|
| 3604 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 3605 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 3606 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 3607 |
-
ce_avg: 0.07012835144996643, mse_avg: 0.0
|
| 3608 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins_step4000
|
| 3609 |
-
Preparing Dataset vlm_gym_jigsaw_swap_celoss_no_mse_evalonce/vlm_gym_jigsaw_swap_val
|
| 3610 |
-
[eval debug] first 3 batch fingerprints:
|
| 3611 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 3612 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 3613 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 3614 |
-
ce_avg: 0.06947071850299835, mse_avg: 0.0
|
| 3615 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins_step4500
|
| 3616 |
-
Preparing Dataset vlm_gym_jigsaw_swap_celoss_no_mse_evalonce/vlm_gym_jigsaw_swap_val
|
| 3617 |
-
[eval debug] first 3 batch fingerprints:
|
| 3618 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 3619 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 3620 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 3621 |
[[34m2026-01-22 22:15:14[39m] (step=0003388) Train Loss mse: 0.0000, Train Loss ce: 0.0697, Train Steps/Sec: 0.13,
|
| 3622 |
[[34m2026-01-22 22:15:22[39m] (step=0003389) Train Loss mse: 0.0000, Train Loss ce: 0.0691, Train Steps/Sec: 0.13,
|
| 3623 |
[[34m2026-01-22 22:15:29[39m] (step=0003390) Train Loss mse: 0.0000, Train Loss ce: 0.0710, Train Steps/Sec: 0.14,
|
|
@@ -4930,13 +4930,6 @@ Preparing Dataset vlm_gym_jigsaw_swap_celoss_no_mse_evalonce/vlm_gym_jigsaw_swap
|
|
| 4930 |
[[34m2026-01-23 00:48:12[39m] (step=0004697) Train Loss mse: 0.0000, Train Loss ce: 0.0683, Train Steps/Sec: 0.13,
|
| 4931 |
[[34m2026-01-23 00:48:19[39m] (step=0004698) Train Loss mse: 0.0000, Train Loss ce: 0.0700, Train Steps/Sec: 0.15,
|
| 4932 |
[[34m2026-01-23 00:48:25[39m] (step=0004699) Train Loss mse: 0.0000, Train Loss ce: 0.0676, Train Steps/Sec: 0.15,
|
| 4933 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins_step5000
|
| 4934 |
-
Preparing Dataset vlm_gym_jigsaw_swap_celoss_no_mse_evalonce/vlm_gym_jigsaw_swap_val
|
| 4935 |
-
[eval debug] first 3 batch fingerprints:
|
| 4936 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 4937 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 4938 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 4939 |
-
ce_avg: 0.07005834579467773, mse_avg: 0.0
|
| 4940 |
[[34m2026-01-23 00:48:32[39m] (step=0004700) Train Loss mse: 0.0000, Train Loss ce: 0.0675, Train Steps/Sec: 0.14,
|
| 4941 |
[[34m2026-01-23 00:48:40[39m] (step=0004701) Train Loss mse: 0.0000, Train Loss ce: 0.0679, Train Steps/Sec: 0.14,
|
| 4942 |
[[34m2026-01-23 00:48:47[39m] (step=0004702) Train Loss mse: 0.0000, Train Loss ce: 0.0690, Train Steps/Sec: 0.14,
|
|
@@ -4965,6 +4958,13 @@ ce_avg: 0.07005834579467773, mse_avg: 0.0
|
|
| 4965 |
[[34m2026-01-23 00:51:29[39m] (step=0004725) Train Loss mse: 0.0000, Train Loss ce: 0.0672, Train Steps/Sec: 0.14,
|
| 4966 |
[[34m2026-01-23 00:51:37[39m] (step=0004726) Train Loss mse: 0.0000, Train Loss ce: 0.0686, Train Steps/Sec: 0.14,
|
| 4967 |
[[34m2026-01-23 00:51:44[39m] (step=0004727) Train Loss mse: 0.0000, Train Loss ce: 0.0688, Train Steps/Sec: 0.14,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4968 |
[[34m2026-01-23 00:51:51[39m] (step=0004728) Train Loss mse: 0.0000, Train Loss ce: 0.0680, Train Steps/Sec: 0.14,
|
| 4969 |
[[34m2026-01-23 00:51:57[39m] (step=0004729) Train Loss mse: 0.0000, Train Loss ce: 0.0686, Train Steps/Sec: 0.15,
|
| 4970 |
[[34m2026-01-23 00:52:04[39m] (step=0004730) Train Loss mse: 0.0000, Train Loss ce: 0.0677, Train Steps/Sec: 0.14,
|
|
|
|
| 871 |
[[34m2026-01-22 17:18:30[39m] (step=0000860) Train Loss mse: 0.0000, Train Loss ce: 0.0712, Train Steps/Sec: 0.15,
|
| 872 |
[[34m2026-01-22 17:18:37[39m] (step=0000861) Train Loss mse: 0.0000, Train Loss ce: 0.0715, Train Steps/Sec: 0.14,
|
| 873 |
[[34m2026-01-22 17:18:44[39m] (step=0000862) Train Loss mse: 0.0000, Train Loss ce: 0.0702, Train Steps/Sec: 0.15,
|
| 874 |
+
[[34m2026-01-22 17:18:51[39m] (step=0000863) Train Loss mse: 0.0000, Train Loss ce: 0.0719, Train Steps/Sec: 0.14,
|
| 875 |
+
[[34m2026-01-22 17:18:58[39m] (step=0000864) Train Loss mse: 0.0000, Train Loss ce: 0.0714, Train Steps/Sec: 0.15,
|
| 876 |
+
[[34m2026-01-22 17:19:04[39m] (step=0000865) Train Loss mse: 0.0000, Train Loss ce: 0.0717, Train Steps/Sec: 0.15,
|
| 877 |
+
[[34m2026-01-22 17:19:12[39m] (step=0000866) Train Loss mse: 0.0000, Train Loss ce: 0.0730, Train Steps/Sec: 0.13,
|
| 878 |
+
[[34m2026-01-22 17:19:19[39m] (step=0000867) Train Loss mse: 0.0000, Train Loss ce: 0.0715, Train Steps/Sec: 0.15,
|
| 879 |
+
[[34m2026-01-22 17:19:25[39m] (step=0000868) Train Loss mse: 0.0000, Train Loss ce: 0.0706, Train Steps/Sec: 0.15,
|
| 880 |
+
[[34m2026-01-22 17:19:32[39m] (step=0000869) Train Loss mse: 0.0000, Train Loss ce: 0.0715, Train Steps/Sec: 0.15,
|
| 881 |
+
[[34m2026-01-22 17:19:39[39m] (step=0000870) Train Loss mse: 0.0000, Train Loss ce: 0.0710, Train Steps/Sec: 0.15,
|
| 882 |
+
[[34m2026-01-22 17:19:46[39m] (step=0000871) Train Loss mse: 0.0000, Train Loss ce: 0.0730, Train Steps/Sec: 0.15,
|
| 883 |
+
[[34m2026-01-22 17:19:54[39m] (step=0000872) Train Loss mse: 0.0000, Train Loss ce: 0.0704, Train Steps/Sec: 0.13,
|
| 884 |
+
[[34m2026-01-22 17:20:01[39m] (step=0000873) Train Loss mse: 0.0000, Train Loss ce: 0.0713, Train Steps/Sec: 0.14,
|
| 885 |
+
[[34m2026-01-22 17:20:07[39m] (step=0000874) Train Loss mse: 0.0000, Train Loss ce: 0.0723, Train Steps/Sec: 0.15,
|
| 886 |
+
[[34m2026-01-22 17:20:14[39m] (step=0000875) Train Loss mse: 0.0000, Train Loss ce: 0.0707, Train Steps/Sec: 0.15,
|
| 887 |
+
[[34m2026-01-22 17:20:20[39m] (step=0000876) Train Loss mse: 0.0000, Train Loss ce: 0.0735, Train Steps/Sec: 0.15,
|
| 888 |
+
[[34m2026-01-22 17:20:27[39m] (step=0000877) Train Loss mse: 0.0000, Train Loss ce: 0.0719, Train Steps/Sec: 0.15,
|
| 889 |
+
[[34m2026-01-22 17:20:34[39m] (step=0000878) Train Loss mse: 0.0000, Train Loss ce: 0.0719, Train Steps/Sec: 0.14,
|
| 890 |
+
[[34m2026-01-22 17:20:41[39m] (step=0000879) Train Loss mse: 0.0000, Train Loss ce: 0.0729, Train Steps/Sec: 0.13,
|
| 891 |
+
[[34m2026-01-22 17:20:49[39m] (step=0000880) Train Loss mse: 0.0000, Train Loss ce: 0.0727, Train Steps/Sec: 0.13,
|
| 892 |
+
[[34m2026-01-22 17:20:56[39m] (step=0000881) Train Loss mse: 0.0000, Train Loss ce: 0.0714, Train Steps/Sec: 0.14,
|
| 893 |
+
[[34m2026-01-22 17:21:03[39m] (step=0000882) Train Loss mse: 0.0000, Train Loss ce: 0.0722, Train Steps/Sec: 0.14,
|
| 894 |
+
[[34m2026-01-22 17:21:10[39m] (step=0000883) Train Loss mse: 0.0000, Train Loss ce: 0.0699, Train Steps/Sec: 0.14,
|
| 895 |
+
[[34m2026-01-22 17:21:17[39m] (step=0000884) Train Loss mse: 0.0000, Train Loss ce: 0.0729, Train Steps/Sec: 0.14,
|
| 896 |
+
[[34m2026-01-22 17:21:24[39m] (step=0000885) Train Loss mse: 0.0000, Train Loss ce: 0.0717, Train Steps/Sec: 0.14,
|
| 897 |
+
[[34m2026-01-22 17:21:31[39m] (step=0000886) Train Loss mse: 0.0000, Train Loss ce: 0.0698, Train Steps/Sec: 0.15,
|
| 898 |
+
[[34m2026-01-22 17:21:38[39m] (step=0000887) Train Loss mse: 0.0000, Train Loss ce: 0.0709, Train Steps/Sec: 0.15,
|
| 899 |
+
[[34m2026-01-22 17:21:44[39m] (step=0000888) Train Loss mse: 0.0000, Train Loss ce: 0.0713, Train Steps/Sec: 0.15,
|
| 900 |
+
[[34m2026-01-22 17:21:51[39m] (step=0000889) Train Loss mse: 0.0000, Train Loss ce: 0.0704, Train Steps/Sec: 0.15,
|
| 901 |
+
[[34m2026-01-22 17:21:58[39m] (step=0000890) Train Loss mse: 0.0000, Train Loss ce: 0.0721, Train Steps/Sec: 0.14,
|
| 902 |
+
[[34m2026-01-22 17:22:06[39m] (step=0000891) Train Loss mse: 0.0000, Train Loss ce: 0.0711, Train Steps/Sec: 0.13,
|
| 903 |
+
[[34m2026-01-22 17:22:12[39m] (step=0000892) Train Loss mse: 0.0000, Train Loss ce: 0.0741, Train Steps/Sec: 0.15,
|
| 904 |
+
[[34m2026-01-22 17:22:19[39m] (step=0000893) Train Loss mse: 0.0000, Train Loss ce: 0.0709, Train Steps/Sec: 0.15,
|
| 905 |
+
[[34m2026-01-22 17:22:26[39m] (step=0000894) Train Loss mse: 0.0000, Train Loss ce: 0.0720, Train Steps/Sec: 0.14,
|
| 906 |
+
[[34m2026-01-22 17:22:33[39m] (step=0000895) Train Loss mse: 0.0000, Train Loss ce: 0.0705, Train Steps/Sec: 0.15,
|
| 907 |
+
[[34m2026-01-22 17:22:40[39m] (step=0000896) Train Loss mse: 0.0000, Train Loss ce: 0.0720, Train Steps/Sec: 0.13,
|
| 908 |
+
[[34m2026-01-22 17:22:47[39m] (step=0000897) Train Loss mse: 0.0000, Train Loss ce: 0.0715, Train Steps/Sec: 0.15,
|
| 909 |
+
[[34m2026-01-22 17:22:54[39m] (step=0000898) Train Loss mse: 0.0000, Train Loss ce: 0.0719, Train Steps/Sec: 0.14,
|
| 910 |
+
[[34m2026-01-22 17:23:01[39m] (step=0000899) Train Loss mse: 0.0000, Train Loss ce: 0.0713, Train Steps/Sec: 0.15,
|
| 911 |
+
[[34m2026-01-22 17:23:08[39m] (step=0000900) Train Loss mse: 0.0000, Train Loss ce: 0.0717, Train Steps/Sec: 0.15,
|
| 912 |
+
[[34m2026-01-22 17:23:14[39m] (step=0000901) Train Loss mse: 0.0000, Train Loss ce: 0.0731, Train Steps/Sec: 0.15,
|
| 913 |
+
[[34m2026-01-22 17:23:22[39m] (step=0000902) Train Loss mse: 0.0000, Train Loss ce: 0.0701, Train Steps/Sec: 0.14,
|
| 914 |
+
[[34m2026-01-22 17:23:29[39m] (step=0000903) Train Loss mse: 0.0000, Train Loss ce: 0.0726, Train Steps/Sec: 0.14,
|
| 915 |
+
[[34m2026-01-22 17:23:36[39m] (step=0000904) Train Loss mse: 0.0000, Train Loss ce: 0.0711, Train Steps/Sec: 0.15,
|
| 916 |
+
[[34m2026-01-22 17:23:42[39m] (step=0000905) Train Loss mse: 0.0000, Train Loss ce: 0.0736, Train Steps/Sec: 0.15,
|
| 917 |
+
[[34m2026-01-22 17:23:49[39m] (step=0000906) Train Loss mse: 0.0000, Train Loss ce: 0.0708, Train Steps/Sec: 0.14,
|
| 918 |
+
[[34m2026-01-22 17:23:57[39m] (step=0000907) Train Loss mse: 0.0000, Train Loss ce: 0.0712, Train Steps/Sec: 0.14,
|
| 919 |
+
[[34m2026-01-22 17:24:04[39m] (step=0000908) Train Loss mse: 0.0000, Train Loss ce: 0.0736, Train Steps/Sec: 0.14,
|
| 920 |
+
[[34m2026-01-22 17:24:11[39m] (step=0000909) Train Loss mse: 0.0000, Train Loss ce: 0.0709, Train Steps/Sec: 0.14,
|
| 921 |
+
[[34m2026-01-22 17:24:17[39m] (step=0000910) Train Loss mse: 0.0000, Train Loss ce: 0.0711, Train Steps/Sec: 0.15,
|
| 922 |
+
[[34m2026-01-22 17:24:25[39m] (step=0000911) Train Loss mse: 0.0000, Train Loss ce: 0.0730, Train Steps/Sec: 0.13,
|
| 923 |
+
[[34m2026-01-22 17:24:32[39m] (step=0000912) Train Loss mse: 0.0000, Train Loss ce: 0.0745, Train Steps/Sec: 0.15,
|
| 924 |
+
[[34m2026-01-22 17:24:38[39m] (step=0000913) Train Loss mse: 0.0000, Train Loss ce: 0.0722, Train Steps/Sec: 0.15,
|
| 925 |
+
[[34m2026-01-22 17:24:45[39m] (step=0000914) Train Loss mse: 0.0000, Train Loss ce: 0.0705, Train Steps/Sec: 0.14,
|
| 926 |
+
[[34m2026-01-22 17:24:53[39m] (step=0000915) Train Loss mse: 0.0000, Train Loss ce: 0.0704, Train Steps/Sec: 0.14,
|
| 927 |
+
[[34m2026-01-22 17:24:59[39m] (step=0000916) Train Loss mse: 0.0000, Train Loss ce: 0.0720, Train Steps/Sec: 0.15,
|
| 928 |
+
[[34m2026-01-22 17:25:06[39m] (step=0000917) Train Loss mse: 0.0000, Train Loss ce: 0.0714, Train Steps/Sec: 0.15,
|
| 929 |
+
[[34m2026-01-22 17:25:13[39m] (step=0000918) Train Loss mse: 0.0000, Train Loss ce: 0.0727, Train Steps/Sec: 0.15,
|
| 930 |
+
[[34m2026-01-22 17:25:20[39m] (step=0000919) Train Loss mse: 0.0000, Train Loss ce: 0.0725, Train Steps/Sec: 0.14,
|
| 931 |
+
[[34m2026-01-22 17:25:27[39m] (step=0000920) Train Loss mse: 0.0000, Train Loss ce: 0.0723, Train Steps/Sec: 0.14,
|
| 932 |
+
[[34m2026-01-22 17:25:35[39m] (step=0000921) Train Loss mse: 0.0000, Train Loss ce: 0.0734, Train Steps/Sec: 0.14,
|
| 933 |
+
[[34m2026-01-22 17:25:41[39m] (step=0000922) Train Loss mse: 0.0000, Train Loss ce: 0.0729, Train Steps/Sec: 0.15,
|
| 934 |
+
[[34m2026-01-22 17:25:48[39m] (step=0000923) Train Loss mse: 0.0000, Train Loss ce: 0.0705, Train Steps/Sec: 0.15,
|
| 935 |
+
[[34m2026-01-22 17:25:55[39m] (step=0000924) Train Loss mse: 0.0000, Train Loss ce: 0.0713, Train Steps/Sec: 0.14,
|
| 936 |
+
[[34m2026-01-22 17:26:02[39m] (step=0000925) Train Loss mse: 0.0000, Train Loss ce: 0.0730, Train Steps/Sec: 0.15,
|
| 937 |
+
[[34m2026-01-22 17:26:09[39m] (step=0000926) Train Loss mse: 0.0000, Train Loss ce: 0.0706, Train Steps/Sec: 0.15,
|
| 938 |
+
[[34m2026-01-22 17:26:16[39m] (step=0000927) Train Loss mse: 0.0000, Train Loss ce: 0.0732, Train Steps/Sec: 0.14,
|
| 939 |
+
[[34m2026-01-22 17:26:23[39m] (step=0000928) Train Loss mse: 0.0000, Train Loss ce: 0.0733, Train Steps/Sec: 0.15,
|
| 940 |
+
[[34m2026-01-22 17:26:29[39m] (step=0000929) Train Loss mse: 0.0000, Train Loss ce: 0.0700, Train Steps/Sec: 0.15,
|
| 941 |
+
[[34m2026-01-22 17:26:36[39m] (step=0000930) Train Loss mse: 0.0000, Train Loss ce: 0.0698, Train Steps/Sec: 0.16,
|
| 942 |
+
[[34m2026-01-22 17:26:43[39m] (step=0000931) Train Loss mse: 0.0000, Train Loss ce: 0.0708, Train Steps/Sec: 0.15,
|
| 943 |
+
[[34m2026-01-22 17:26:50[39m] (step=0000932) Train Loss mse: 0.0000, Train Loss ce: 0.0699, Train Steps/Sec: 0.13,
|
| 944 |
+
[[34m2026-01-22 17:26:58[39m] (step=0000933) Train Loss mse: 0.0000, Train Loss ce: 0.0707, Train Steps/Sec: 0.13,
|
| 945 |
+
[[34m2026-01-22 17:27:05[39m] (step=0000934) Train Loss mse: 0.0000, Train Loss ce: 0.0708, Train Steps/Sec: 0.14,
|
| 946 |
+
[[34m2026-01-22 17:27:12[39m] (step=0000935) Train Loss mse: 0.0000, Train Loss ce: 0.0715, Train Steps/Sec: 0.14,
|
| 947 |
+
[[34m2026-01-22 17:27:19[39m] (step=0000936) Train Loss mse: 0.0000, Train Loss ce: 0.0724, Train Steps/Sec: 0.13,
|
| 948 |
+
[[34m2026-01-22 17:27:26[39m] (step=0000937) Train Loss mse: 0.0000, Train Loss ce: 0.0721, Train Steps/Sec: 0.15,
|
| 949 |
+
[[34m2026-01-22 17:27:32[39m] (step=0000938) Train Loss mse: 0.0000, Train Loss ce: 0.0724, Train Steps/Sec: 0.15,
|
| 950 |
+
[[34m2026-01-22 17:27:40[39m] (step=0000939) Train Loss mse: 0.0000, Train Loss ce: 0.0710, Train Steps/Sec: 0.14,
|
| 951 |
+
[[34m2026-01-22 17:27:46[39m] (step=0000940) Train Loss mse: 0.0000, Train Loss ce: 0.0695, Train Steps/Sec: 0.15,
|
| 952 |
+
[[34m2026-01-22 17:27:53[39m] (step=0000941) Train Loss mse: 0.0000, Train Loss ce: 0.0715, Train Steps/Sec: 0.14,
|
| 953 |
+
[[34m2026-01-22 17:28:00[39m] (step=0000942) Train Loss mse: 0.0000, Train Loss ce: 0.0715, Train Steps/Sec: 0.15,
|
| 954 |
+
[[34m2026-01-22 17:28:07[39m] (step=0000943) Train Loss mse: 0.0000, Train Loss ce: 0.0744, Train Steps/Sec: 0.15,
|
| 955 |
+
[[34m2026-01-22 17:28:14[39m] (step=0000944) Train Loss mse: 0.0000, Train Loss ce: 0.0714, Train Steps/Sec: 0.15,
|
| 956 |
+
[[34m2026-01-22 17:28:21[39m] (step=0000945) Train Loss mse: 0.0000, Train Loss ce: 0.0712, Train Steps/Sec: 0.14,
|
| 957 |
+
[[34m2026-01-22 17:28:29[39m] (step=0000946) Train Loss mse: 0.0000, Train Loss ce: 0.0706, Train Steps/Sec: 0.13,
|
| 958 |
+
[[34m2026-01-22 17:28:35[39m] (step=0000947) Train Loss mse: 0.0000, Train Loss ce: 0.0723, Train Steps/Sec: 0.15,
|
| 959 |
+
[[34m2026-01-22 17:28:42[39m] (step=0000948) Train Loss mse: 0.0000, Train Loss ce: 0.0707, Train Steps/Sec: 0.15,
|
| 960 |
+
[[34m2026-01-22 17:28:49[39m] (step=0000949) Train Loss mse: 0.0000, Train Loss ce: 0.0706, Train Steps/Sec: 0.14,
|
| 961 |
+
[[34m2026-01-22 17:28:56[39m] (step=0000950) Train Loss mse: 0.0000, Train Loss ce: 0.0711, Train Steps/Sec: 0.15,
|
| 962 |
+
[[34m2026-01-22 17:29:03[39m] (step=0000951) Train Loss mse: 0.0000, Train Loss ce: 0.0697, Train Steps/Sec: 0.14,
|
| 963 |
+
[[34m2026-01-22 17:29:11[39m] (step=0000952) Train Loss mse: 0.0000, Train Loss ce: 0.0703, Train Steps/Sec: 0.13,
|
| 964 |
+
[[34m2026-01-22 17:29:18[39m] (step=0000953) Train Loss mse: 0.0000, Train Loss ce: 0.0714, Train Steps/Sec: 0.13,
|
| 965 |
+
[[34m2026-01-22 17:29:26[39m] (step=0000954) Train Loss mse: 0.0000, Train Loss ce: 0.0713, Train Steps/Sec: 0.14,
|
| 966 |
+
[[34m2026-01-22 17:29:32[39m] (step=0000955) Train Loss mse: 0.0000, Train Loss ce: 0.0714, Train Steps/Sec: 0.15,
|
| 967 |
+
[[34m2026-01-22 17:29:39[39m] (step=0000956) Train Loss mse: 0.0000, Train Loss ce: 0.0696, Train Steps/Sec: 0.15,
|
| 968 |
+
[[34m2026-01-22 17:29:46[39m] (step=0000957) Train Loss mse: 0.0000, Train Loss ce: 0.0720, Train Steps/Sec: 0.14,
|
| 969 |
+
[[34m2026-01-22 17:29:53[39m] (step=0000958) Train Loss mse: 0.0000, Train Loss ce: 0.0712, Train Steps/Sec: 0.14,
|
| 970 |
+
[[34m2026-01-22 17:30:01[39m] (step=0000959) Train Loss mse: 0.0000, Train Loss ce: 0.0727, Train Steps/Sec: 0.13,
|
| 971 |
+
[[34m2026-01-22 17:30:08[39m] (step=0000960) Train Loss mse: 0.0000, Train Loss ce: 0.0722, Train Steps/Sec: 0.13,
|
| 972 |
+
[[34m2026-01-22 17:30:15[39m] (step=0000961) Train Loss mse: 0.0000, Train Loss ce: 0.0740, Train Steps/Sec: 0.15,
|
| 973 |
+
[[34m2026-01-22 17:30:22[39m] (step=0000962) Train Loss mse: 0.0000, Train Loss ce: 0.0714, Train Steps/Sec: 0.14,
|
| 974 |
+
[[34m2026-01-22 17:30:30[39m] (step=0000963) Train Loss mse: 0.0000, Train Loss ce: 0.0717, Train Steps/Sec: 0.13,
|
| 975 |
+
[[34m2026-01-22 17:30:37[39m] (step=0000964) Train Loss mse: 0.0000, Train Loss ce: 0.0712, Train Steps/Sec: 0.14,
|
| 976 |
+
[[34m2026-01-22 17:30:44[39m] (step=0000965) Train Loss mse: 0.0000, Train Loss ce: 0.0697, Train Steps/Sec: 0.14,
|
| 977 |
+
[[34m2026-01-22 17:30:51[39m] (step=0000966) Train Loss mse: 0.0000, Train Loss ce: 0.0707, Train Steps/Sec: 0.15,
|
| 978 |
+
[[34m2026-01-22 17:30:57[39m] (step=0000967) Train Loss mse: 0.0000, Train Loss ce: 0.0709, Train Steps/Sec: 0.15,
|
| 979 |
+
[[34m2026-01-22 17:31:04[39m] (step=0000968) Train Loss mse: 0.0000, Train Loss ce: 0.0701, Train Steps/Sec: 0.15,
|
| 980 |
+
[[34m2026-01-22 17:31:11[39m] (step=0000969) Train Loss mse: 0.0000, Train Loss ce: 0.0712, Train Steps/Sec: 0.14,
|
| 981 |
+
[[34m2026-01-22 17:31:18[39m] (step=0000970) Train Loss mse: 0.0000, Train Loss ce: 0.0717, Train Steps/Sec: 0.14,
|
| 982 |
+
[[34m2026-01-22 17:31:25[39m] (step=0000971) Train Loss mse: 0.0000, Train Loss ce: 0.0715, Train Steps/Sec: 0.15,
|
| 983 |
+
[[34m2026-01-22 17:31:32[39m] (step=0000972) Train Loss mse: 0.0000, Train Loss ce: 0.0696, Train Steps/Sec: 0.15,
|
| 984 |
+
[[34m2026-01-22 17:31:39[39m] (step=0000973) Train Loss mse: 0.0000, Train Loss ce: 0.0715, Train Steps/Sec: 0.14,
|
| 985 |
+
[[34m2026-01-22 17:31:46[39m] (step=0000974) Train Loss mse: 0.0000, Train Loss ce: 0.0721, Train Steps/Sec: 0.14,
|
| 986 |
+
[[34m2026-01-22 17:31:52[39m] (step=0000975) Train Loss mse: 0.0000, Train Loss ce: 0.0721, Train Steps/Sec: 0.15,
|
| 987 |
+
[[34m2026-01-22 17:32:00[39m] (step=0000976) Train Loss mse: 0.0000, Train Loss ce: 0.0719, Train Steps/Sec: 0.14,
|
| 988 |
+
[[34m2026-01-22 17:32:07[39m] (step=0000977) Train Loss mse: 0.0000, Train Loss ce: 0.0726, Train Steps/Sec: 0.14,
|
| 989 |
+
[[34m2026-01-22 17:32:13[39m] (step=0000978) Train Loss mse: 0.0000, Train Loss ce: 0.0716, Train Steps/Sec: 0.15,
|
| 990 |
+
[[34m2026-01-22 17:32:20[39m] (step=0000979) Train Loss mse: 0.0000, Train Loss ce: 0.0713, Train Steps/Sec: 0.15,
|
| 991 |
+
[[34m2026-01-22 17:32:27[39m] (step=0000980) Train Loss mse: 0.0000, Train Loss ce: 0.0738, Train Steps/Sec: 0.14,
|
| 992 |
+
[[34m2026-01-22 17:32:34[39m] (step=0000981) Train Loss mse: 0.0000, Train Loss ce: 0.0728, Train Steps/Sec: 0.14,
|
| 993 |
+
[[34m2026-01-22 17:32:42[39m] (step=0000982) Train Loss mse: 0.0000, Train Loss ce: 0.0739, Train Steps/Sec: 0.14,
|
| 994 |
+
[[34m2026-01-22 17:32:49[39m] (step=0000983) Train Loss mse: 0.0000, Train Loss ce: 0.0705, Train Steps/Sec: 0.14,
|
| 995 |
+
[[34m2026-01-22 17:32:55[39m] (step=0000984) Train Loss mse: 0.0000, Train Loss ce: 0.0706, Train Steps/Sec: 0.15,
|
| 996 |
+
[[34m2026-01-22 17:33:02[39m] (step=0000985) Train Loss mse: 0.0000, Train Loss ce: 0.0720, Train Steps/Sec: 0.14,
|
| 997 |
FullyShardedDataParallel(
|
| 998 |
(_fsdp_wrapped_module): Bagel(
|
| 999 |
(language_model): Qwen2ForCausalLM(
|
|
|
|
| 1178 |
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 1179 |
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 1180 |
ce_avg: 0.14597271382808685, mse_avg: 0.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1181 |
[[34m2026-01-22 17:33:10[39m] (step=0000986) Train Loss mse: 0.0000, Train Loss ce: 0.0711, Train Steps/Sec: 0.14,
|
| 1182 |
[[34m2026-01-22 17:33:16[39m] (step=0000987) Train Loss mse: 0.0000, Train Loss ce: 0.0720, Train Steps/Sec: 0.16,
|
| 1183 |
[[34m2026-01-22 17:33:23[39m] (step=0000988) Train Loss mse: 0.0000, Train Loss ce: 0.0711, Train Steps/Sec: 0.15,
|
|
|
|
| 2426 |
[[34m2026-01-22 19:57:55[39m] (step=0002231) Train Loss mse: 0.0000, Train Loss ce: 0.0706, Train Steps/Sec: 0.14,
|
| 2427 |
[[34m2026-01-22 19:58:01[39m] (step=0002232) Train Loss mse: 0.0000, Train Loss ce: 0.0705, Train Steps/Sec: 0.15,
|
| 2428 |
[[34m2026-01-22 19:58:09[39m] (step=0002233) Train Loss mse: 0.0000, Train Loss ce: 0.0713, Train Steps/Sec: 0.13,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2429 |
[[34m2026-01-22 19:58:15[39m] (step=0002234) Train Loss mse: 0.0000, Train Loss ce: 0.0691, Train Steps/Sec: 0.15,
|
| 2430 |
[[34m2026-01-22 19:58:23[39m] (step=0002235) Train Loss mse: 0.0000, Train Loss ce: 0.0701, Train Steps/Sec: 0.14,
|
| 2431 |
[[34m2026-01-22 19:58:29[39m] (step=0002236) Train Loss mse: 0.0000, Train Loss ce: 0.0688, Train Steps/Sec: 0.15,
|
|
|
|
| 2553 |
[[34m2026-01-22 20:12:43[39m] (step=0002358) Train Loss mse: 0.0000, Train Loss ce: 0.0688, Train Steps/Sec: 0.14,
|
| 2554 |
[[34m2026-01-22 20:12:50[39m] (step=0002359) Train Loss mse: 0.0000, Train Loss ce: 0.0684, Train Steps/Sec: 0.14,
|
| 2555 |
[[34m2026-01-22 20:12:57[39m] (step=0002360) Train Loss mse: 0.0000, Train Loss ce: 0.0691, Train Steps/Sec: 0.14,
|
| 2556 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins_step2500
|
| 2557 |
+
Preparing Dataset vlm_gym_jigsaw_swap_celoss_no_mse_evalonce/vlm_gym_jigsaw_swap_val
|
| 2558 |
+
[eval debug] first 3 batch fingerprints:
|
| 2559 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 2560 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 2561 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 2562 |
+
ce_avg: 0.1571701020002365, mse_avg: 0.0
|
| 2563 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins_step3000
|
| 2564 |
+
Preparing Dataset vlm_gym_jigsaw_swap_celoss_no_mse_evalonce/vlm_gym_jigsaw_swap_val
|
| 2565 |
+
[eval debug] first 3 batch fingerprints:
|
| 2566 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 2567 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 2568 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 2569 |
+
ce_avg: 0.0698586106300354, mse_avg: 0.0
|
| 2570 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins_step3500
|
| 2571 |
+
Preparing Dataset vlm_gym_jigsaw_swap_celoss_no_mse_evalonce/vlm_gym_jigsaw_swap_val
|
| 2572 |
+
[eval debug] first 3 batch fingerprints:
|
| 2573 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 2574 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 2575 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 2576 |
+
ce_avg: 0.07012835144996643, mse_avg: 0.0
|
| 2577 |
[[34m2026-01-22 20:13:03[39m] (step=0002361) Train Loss mse: 0.0000, Train Loss ce: 0.0687, Train Steps/Sec: 0.15,
|
| 2578 |
[[34m2026-01-22 20:13:10[39m] (step=0002362) Train Loss mse: 0.0000, Train Loss ce: 0.0672, Train Steps/Sec: 0.15,
|
| 2579 |
[[34m2026-01-22 20:13:17[39m] (step=0002363) Train Loss mse: 0.0000, Train Loss ce: 0.0698, Train Steps/Sec: 0.14,
|
|
|
|
| 3528 |
[[34m2026-01-22 22:06:04[39m] (step=0003309) Train Loss mse: 0.0000, Train Loss ce: 0.0683, Train Steps/Sec: 0.15,
|
| 3529 |
[[34m2026-01-22 22:06:11[39m] (step=0003310) Train Loss mse: 0.0000, Train Loss ce: 0.0683, Train Steps/Sec: 0.14,
|
| 3530 |
[[34m2026-01-22 22:06:18[39m] (step=0003311) Train Loss mse: 0.0000, Train Loss ce: 0.0683, Train Steps/Sec: 0.14,
|
| 3531 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins_step4000
|
| 3532 |
+
Preparing Dataset vlm_gym_jigsaw_swap_celoss_no_mse_evalonce/vlm_gym_jigsaw_swap_val
|
| 3533 |
+
[eval debug] first 3 batch fingerprints:
|
| 3534 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 3535 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 3536 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 3537 |
+
ce_avg: 0.06947071850299835, mse_avg: 0.0
|
| 3538 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins_step4500
|
| 3539 |
+
Preparing Dataset vlm_gym_jigsaw_swap_celoss_no_mse_evalonce/vlm_gym_jigsaw_swap_val
|
| 3540 |
+
[eval debug] first 3 batch fingerprints:
|
| 3541 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 3542 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 3543 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 3544 |
+
ce_avg: 0.069743812084198, mse_avg: 0.0
|
| 3545 |
[[34m2026-01-22 22:06:26[39m] (step=0003312) Train Loss mse: 0.0000, Train Loss ce: 0.0709, Train Steps/Sec: 0.13,
|
| 3546 |
[[34m2026-01-22 22:06:33[39m] (step=0003313) Train Loss mse: 0.0000, Train Loss ce: 0.0682, Train Steps/Sec: 0.15,
|
| 3547 |
[[34m2026-01-22 22:06:39[39m] (step=0003314) Train Loss mse: 0.0000, Train Loss ce: 0.0677, Train Steps/Sec: 0.14,
|
|
|
|
| 3618 |
[[34m2026-01-22 22:14:53[39m] (step=0003385) Train Loss mse: 0.0000, Train Loss ce: 0.0689, Train Steps/Sec: 0.15,
|
| 3619 |
[[34m2026-01-22 22:15:00[39m] (step=0003386) Train Loss mse: 0.0000, Train Loss ce: 0.0678, Train Steps/Sec: 0.15,
|
| 3620 |
[[34m2026-01-22 22:15:07[39m] (step=0003387) Train Loss mse: 0.0000, Train Loss ce: 0.0687, Train Steps/Sec: 0.15,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3621 |
[[34m2026-01-22 22:15:14[39m] (step=0003388) Train Loss mse: 0.0000, Train Loss ce: 0.0697, Train Steps/Sec: 0.13,
|
| 3622 |
[[34m2026-01-22 22:15:22[39m] (step=0003389) Train Loss mse: 0.0000, Train Loss ce: 0.0691, Train Steps/Sec: 0.13,
|
| 3623 |
[[34m2026-01-22 22:15:29[39m] (step=0003390) Train Loss mse: 0.0000, Train Loss ce: 0.0710, Train Steps/Sec: 0.14,
|
|
|
|
| 4930 |
[[34m2026-01-23 00:48:12[39m] (step=0004697) Train Loss mse: 0.0000, Train Loss ce: 0.0683, Train Steps/Sec: 0.13,
|
| 4931 |
[[34m2026-01-23 00:48:19[39m] (step=0004698) Train Loss mse: 0.0000, Train Loss ce: 0.0700, Train Steps/Sec: 0.15,
|
| 4932 |
[[34m2026-01-23 00:48:25[39m] (step=0004699) Train Loss mse: 0.0000, Train Loss ce: 0.0676, Train Steps/Sec: 0.15,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4933 |
[[34m2026-01-23 00:48:32[39m] (step=0004700) Train Loss mse: 0.0000, Train Loss ce: 0.0675, Train Steps/Sec: 0.14,
|
| 4934 |
[[34m2026-01-23 00:48:40[39m] (step=0004701) Train Loss mse: 0.0000, Train Loss ce: 0.0679, Train Steps/Sec: 0.14,
|
| 4935 |
[[34m2026-01-23 00:48:47[39m] (step=0004702) Train Loss mse: 0.0000, Train Loss ce: 0.0690, Train Steps/Sec: 0.14,
|
|
|
|
| 4958 |
[[34m2026-01-23 00:51:29[39m] (step=0004725) Train Loss mse: 0.0000, Train Loss ce: 0.0672, Train Steps/Sec: 0.14,
|
| 4959 |
[[34m2026-01-23 00:51:37[39m] (step=0004726) Train Loss mse: 0.0000, Train Loss ce: 0.0686, Train Steps/Sec: 0.14,
|
| 4960 |
[[34m2026-01-23 00:51:44[39m] (step=0004727) Train Loss mse: 0.0000, Train Loss ce: 0.0688, Train Steps/Sec: 0.14,
|
| 4961 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ins_step5000
|
| 4962 |
+
Preparing Dataset vlm_gym_jigsaw_swap_celoss_no_mse_evalonce/vlm_gym_jigsaw_swap_val
|
| 4963 |
+
[eval debug] first 3 batch fingerprints:
|
| 4964 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 4965 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 4966 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_celoss_no_mse_evalonce'}]
|
| 4967 |
+
ce_avg: 0.07005834579467773, mse_avg: 0.0
|
| 4968 |
[[34m2026-01-23 00:51:51[39m] (step=0004728) Train Loss mse: 0.0000, Train Loss ce: 0.0680, Train Steps/Sec: 0.14,
|
| 4969 |
[[34m2026-01-23 00:51:57[39m] (step=0004729) Train Loss mse: 0.0000, Train Loss ce: 0.0686, Train Steps/Sec: 0.15,
|
| 4970 |
[[34m2026-01-23 00:52:04[39m] (step=0004730) Train Loss mse: 0.0000, Train Loss ce: 0.0677, Train Steps/Sec: 0.14,
|