Upload checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ema9999_hashed/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ema9999_hashed
Browse files
checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ema9999_hashed/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ema9999_hashed/wandb/offline-run-20260111_233506-checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ema9999_hashed-run0/files/output.log
CHANGED
|
@@ -1205,6 +1205,20 @@ wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/
|
|
| 1205 |
[[34m2026-01-12 02:01:10[39m] (step=0001017) Train Loss mse: 0.0000, Train Loss ce: 0.0599, Train Steps/Sec: 0.12,
|
| 1206 |
[[34m2026-01-12 02:01:18[39m] (step=0001018) Train Loss mse: 0.0000, Train Loss ce: 0.0600, Train Steps/Sec: 0.12,
|
| 1207 |
[[34m2026-01-12 02:01:26[39m] (step=0001019) Train Loss mse: 0.0000, Train Loss ce: 0.0601, Train Steps/Sec: 0.12,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1208 |
[[34m2026-01-12 02:01:34[39m] (step=0001020) Train Loss mse: 0.0000, Train Loss ce: 0.0598, Train Steps/Sec: 0.12,
|
| 1209 |
[[34m2026-01-12 02:01:42[39m] (step=0001021) Train Loss mse: 0.0000, Train Loss ce: 0.0594, Train Steps/Sec: 0.12,
|
| 1210 |
[[34m2026-01-12 02:01:51[39m] (step=0001022) Train Loss mse: 0.0000, Train Loss ce: 0.0601, Train Steps/Sec: 0.12,
|
|
@@ -2571,20 +2585,6 @@ wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/
|
|
| 2571 |
[[34m2026-01-12 05:08:21[39m] (step=0002383) Train Loss mse: 0.0000, Train Loss ce: 0.0587, Train Steps/Sec: 0.12,
|
| 2572 |
[[34m2026-01-12 05:08:30[39m] (step=0002384) Train Loss mse: 0.0000, Train Loss ce: 0.0585, Train Steps/Sec: 0.12,
|
| 2573 |
[[34m2026-01-12 05:08:38[39m] (step=0002385) Train Loss mse: 0.0000, Train Loss ce: 0.0575, Train Steps/Sec: 0.12,
|
| 2574 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ema9999_hashed/eval_used_rows, step_tag is checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ema9999_hashed_step1500
|
| 2575 |
-
Preparing Dataset vlm_gym_jigsaw_celoss_no_mse_evalonce/vlm_gym_jigsaw_val
|
| 2576 |
-
[eval debug] first 3 batch fingerprints:
|
| 2577 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 2578 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 2579 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 2580 |
-
ce_avg: 0.16342228651046753, mse_avg: 0.0
|
| 2581 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ema9999_hashed/eval_used_rows, step_tag is checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ema9999_hashed_step2000
|
| 2582 |
-
Preparing Dataset vlm_gym_jigsaw_celoss_no_mse_evalonce/vlm_gym_jigsaw_val
|
| 2583 |
-
[eval debug] first 3 batch fingerprints:
|
| 2584 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 2585 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 2586 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 2587 |
-
ce_avg: 0.10344104468822479, mse_avg: 0.0
|
| 2588 |
[[34m2026-01-12 05:08:46[39m] (step=0002386) Train Loss mse: 0.0000, Train Loss ce: 0.0579, Train Steps/Sec: 0.12,
|
| 2589 |
[[34m2026-01-12 05:08:54[39m] (step=0002387) Train Loss mse: 0.0000, Train Loss ce: 0.0571, Train Steps/Sec: 0.12,
|
| 2590 |
[[34m2026-01-12 05:09:03[39m] (step=0002388) Train Loss mse: 0.0000, Train Loss ce: 0.0576, Train Steps/Sec: 0.12,
|
|
@@ -3527,41 +3527,6 @@ ce_avg: 0.10344104468822479, mse_avg: 0.0
|
|
| 3527 |
[[34m2026-01-12 07:19:45[39m] (step=0003322) Train Loss mse: 0.0000, Train Loss ce: 0.0580, Train Steps/Sec: 0.11,
|
| 3528 |
[[34m2026-01-12 07:19:53[39m] (step=0003323) Train Loss mse: 0.0000, Train Loss ce: 0.0580, Train Steps/Sec: 0.12,
|
| 3529 |
[[34m2026-01-12 07:20:01[39m] (step=0003324) Train Loss mse: 0.0000, Train Loss ce: 0.0565, Train Steps/Sec: 0.12,
|
| 3530 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ema9999_hashed/eval_used_rows, step_tag is checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ema9999_hashed_step2500
|
| 3531 |
-
Preparing Dataset vlm_gym_jigsaw_celoss_no_mse_evalonce/vlm_gym_jigsaw_val
|
| 3532 |
-
[eval debug] first 3 batch fingerprints:
|
| 3533 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 3534 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 3535 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 3536 |
-
ce_avg: 0.08726762980222702, mse_avg: 0.0
|
| 3537 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ema9999_hashed/eval_used_rows, step_tag is checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ema9999_hashed_step3000
|
| 3538 |
-
Preparing Dataset vlm_gym_jigsaw_celoss_no_mse_evalonce/vlm_gym_jigsaw_val
|
| 3539 |
-
[eval debug] first 3 batch fingerprints:
|
| 3540 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 3541 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 3542 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 3543 |
-
ce_avg: 0.07871276885271072, mse_avg: 0.0
|
| 3544 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ema9999_hashed/eval_used_rows, step_tag is checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ema9999_hashed_step3500
|
| 3545 |
-
Preparing Dataset vlm_gym_jigsaw_celoss_no_mse_evalonce/vlm_gym_jigsaw_val
|
| 3546 |
-
[eval debug] first 3 batch fingerprints:
|
| 3547 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 3548 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 3549 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 3550 |
-
ce_avg: 0.07428010553121567, mse_avg: 0.0
|
| 3551 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ema9999_hashed/eval_used_rows, step_tag is checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ema9999_hashed_step4000
|
| 3552 |
-
Preparing Dataset vlm_gym_jigsaw_celoss_no_mse_evalonce/vlm_gym_jigsaw_val
|
| 3553 |
-
[eval debug] first 3 batch fingerprints:
|
| 3554 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 3555 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 3556 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 3557 |
-
ce_avg: 0.07037562131881714, mse_avg: 0.0
|
| 3558 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ema9999_hashed/eval_used_rows, step_tag is checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ema9999_hashed_step4500
|
| 3559 |
-
Preparing Dataset vlm_gym_jigsaw_celoss_no_mse_evalonce/vlm_gym_jigsaw_val
|
| 3560 |
-
[eval debug] first 3 batch fingerprints:
|
| 3561 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 3562 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 3563 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 3564 |
-
ce_avg: 0.06715264171361923, mse_avg: 0.0
|
| 3565 |
[[34m2026-01-12 07:20:09[39m] (step=0003325) Train Loss mse: 0.0000, Train Loss ce: 0.0572, Train Steps/Sec: 0.12,
|
| 3566 |
[[34m2026-01-12 07:20:17[39m] (step=0003326) Train Loss mse: 0.0000, Train Loss ce: 0.0570, Train Steps/Sec: 0.13,
|
| 3567 |
[[34m2026-01-12 07:20:25[39m] (step=0003327) Train Loss mse: 0.0000, Train Loss ce: 0.0567, Train Steps/Sec: 0.12,
|
|
@@ -3681,6 +3646,27 @@ ce_avg: 0.06715264171361923, mse_avg: 0.0
|
|
| 3681 |
[[34m2026-01-12 07:36:02[39m] (step=0003441) Train Loss mse: 0.0000, Train Loss ce: 0.0566, Train Steps/Sec: 0.12,
|
| 3682 |
[[34m2026-01-12 07:36:10[39m] (step=0003442) Train Loss mse: 0.0000, Train Loss ce: 0.0556, Train Steps/Sec: 0.12,
|
| 3683 |
[[34m2026-01-12 07:36:18[39m] (step=0003443) Train Loss mse: 0.0000, Train Loss ce: 0.0563, Train Steps/Sec: 0.12,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3684 |
[[34m2026-01-12 07:36:26[39m] (step=0003444) Train Loss mse: 0.0000, Train Loss ce: 0.0564, Train Steps/Sec: 0.12,
|
| 3685 |
[[34m2026-01-12 07:36:35[39m] (step=0003445) Train Loss mse: 0.0000, Train Loss ce: 0.0570, Train Steps/Sec: 0.12,
|
| 3686 |
[[34m2026-01-12 07:36:43[39m] (step=0003446) Train Loss mse: 0.0000, Train Loss ce: 0.0565, Train Steps/Sec: 0.12,
|
|
@@ -4961,13 +4947,6 @@ ce_avg: 0.06715264171361923, mse_avg: 0.0
|
|
| 4961 |
[[34m2026-01-12 10:31:42[39m] (step=0004721) Train Loss mse: 0.0000, Train Loss ce: 0.0545, Train Steps/Sec: 0.12,
|
| 4962 |
[[34m2026-01-12 10:31:51[39m] (step=0004722) Train Loss mse: 0.0000, Train Loss ce: 0.0559, Train Steps/Sec: 0.12,
|
| 4963 |
[[34m2026-01-12 10:31:59[39m] (step=0004723) Train Loss mse: 0.0000, Train Loss ce: 0.0550, Train Steps/Sec: 0.12,
|
| 4964 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ema9999_hashed/eval_used_rows, step_tag is checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ema9999_hashed_step5000
|
| 4965 |
-
Preparing Dataset vlm_gym_jigsaw_celoss_no_mse_evalonce/vlm_gym_jigsaw_val
|
| 4966 |
-
[eval debug] first 3 batch fingerprints:
|
| 4967 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 4968 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 4969 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 4970 |
-
ce_avg: 0.06502918154001236, mse_avg: 0.0
|
| 4971 |
[[34m2026-01-12 10:32:07[39m] (step=0004724) Train Loss mse: 0.0000, Train Loss ce: 0.0563, Train Steps/Sec: 0.12,
|
| 4972 |
[[34m2026-01-12 10:32:15[39m] (step=0004725) Train Loss mse: 0.0000, Train Loss ce: 0.0559, Train Steps/Sec: 0.12,
|
| 4973 |
[[34m2026-01-12 10:32:23[39m] (step=0004726) Train Loss mse: 0.0000, Train Loss ce: 0.0550, Train Steps/Sec: 0.12,
|
|
@@ -5140,6 +5119,27 @@ ce_avg: 0.06502918154001236, mse_avg: 0.0
|
|
| 5140 |
[[34m2026-01-12 10:55:17[39m] (step=0004893) Train Loss mse: 0.0000, Train Loss ce: 0.0551, Train Steps/Sec: 0.12,
|
| 5141 |
[[34m2026-01-12 10:55:25[39m] (step=0004894) Train Loss mse: 0.0000, Train Loss ce: 0.0543, Train Steps/Sec: 0.12,
|
| 5142 |
[[34m2026-01-12 10:55:33[39m] (step=0004895) Train Loss mse: 0.0000, Train Loss ce: 0.0552, Train Steps/Sec: 0.12,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5143 |
[[34m2026-01-12 10:55:41[39m] (step=0004896) Train Loss mse: 0.0000, Train Loss ce: 0.0543, Train Steps/Sec: 0.12,
|
| 5144 |
[[34m2026-01-12 10:55:50[39m] (step=0004897) Train Loss mse: 0.0000, Train Loss ce: 0.0558, Train Steps/Sec: 0.12,
|
| 5145 |
[[34m2026-01-12 10:55:58[39m] (step=0004898) Train Loss mse: 0.0000, Train Loss ce: 0.0540, Train Steps/Sec: 0.12,
|
|
|
|
| 1205 |
[[34m2026-01-12 02:01:10[39m] (step=0001017) Train Loss mse: 0.0000, Train Loss ce: 0.0599, Train Steps/Sec: 0.12,
|
| 1206 |
[[34m2026-01-12 02:01:18[39m] (step=0001018) Train Loss mse: 0.0000, Train Loss ce: 0.0600, Train Steps/Sec: 0.12,
|
| 1207 |
[[34m2026-01-12 02:01:26[39m] (step=0001019) Train Loss mse: 0.0000, Train Loss ce: 0.0601, Train Steps/Sec: 0.12,
|
| 1208 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ema9999_hashed/eval_used_rows, step_tag is checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ema9999_hashed_step1500
|
| 1209 |
+
Preparing Dataset vlm_gym_jigsaw_celoss_no_mse_evalonce/vlm_gym_jigsaw_val
|
| 1210 |
+
[eval debug] first 3 batch fingerprints:
|
| 1211 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 1212 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 1213 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 1214 |
+
ce_avg: 0.16342228651046753, mse_avg: 0.0
|
| 1215 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ema9999_hashed/eval_used_rows, step_tag is checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ema9999_hashed_step2000
|
| 1216 |
+
Preparing Dataset vlm_gym_jigsaw_celoss_no_mse_evalonce/vlm_gym_jigsaw_val
|
| 1217 |
+
[eval debug] first 3 batch fingerprints:
|
| 1218 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 1219 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 1220 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 1221 |
+
ce_avg: 0.10344104468822479, mse_avg: 0.0
|
| 1222 |
[[34m2026-01-12 02:01:34[39m] (step=0001020) Train Loss mse: 0.0000, Train Loss ce: 0.0598, Train Steps/Sec: 0.12,
|
| 1223 |
[[34m2026-01-12 02:01:42[39m] (step=0001021) Train Loss mse: 0.0000, Train Loss ce: 0.0594, Train Steps/Sec: 0.12,
|
| 1224 |
[[34m2026-01-12 02:01:51[39m] (step=0001022) Train Loss mse: 0.0000, Train Loss ce: 0.0601, Train Steps/Sec: 0.12,
|
|
|
|
| 2585 |
[[34m2026-01-12 05:08:21[39m] (step=0002383) Train Loss mse: 0.0000, Train Loss ce: 0.0587, Train Steps/Sec: 0.12,
|
| 2586 |
[[34m2026-01-12 05:08:30[39m] (step=0002384) Train Loss mse: 0.0000, Train Loss ce: 0.0585, Train Steps/Sec: 0.12,
|
| 2587 |
[[34m2026-01-12 05:08:38[39m] (step=0002385) Train Loss mse: 0.0000, Train Loss ce: 0.0575, Train Steps/Sec: 0.12,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2588 |
[[34m2026-01-12 05:08:46[39m] (step=0002386) Train Loss mse: 0.0000, Train Loss ce: 0.0579, Train Steps/Sec: 0.12,
|
| 2589 |
[[34m2026-01-12 05:08:54[39m] (step=0002387) Train Loss mse: 0.0000, Train Loss ce: 0.0571, Train Steps/Sec: 0.12,
|
| 2590 |
[[34m2026-01-12 05:09:03[39m] (step=0002388) Train Loss mse: 0.0000, Train Loss ce: 0.0576, Train Steps/Sec: 0.12,
|
|
|
|
| 3527 |
[[34m2026-01-12 07:19:45[39m] (step=0003322) Train Loss mse: 0.0000, Train Loss ce: 0.0580, Train Steps/Sec: 0.11,
|
| 3528 |
[[34m2026-01-12 07:19:53[39m] (step=0003323) Train Loss mse: 0.0000, Train Loss ce: 0.0580, Train Steps/Sec: 0.12,
|
| 3529 |
[[34m2026-01-12 07:20:01[39m] (step=0003324) Train Loss mse: 0.0000, Train Loss ce: 0.0565, Train Steps/Sec: 0.12,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3530 |
[[34m2026-01-12 07:20:09[39m] (step=0003325) Train Loss mse: 0.0000, Train Loss ce: 0.0572, Train Steps/Sec: 0.12,
|
| 3531 |
[[34m2026-01-12 07:20:17[39m] (step=0003326) Train Loss mse: 0.0000, Train Loss ce: 0.0570, Train Steps/Sec: 0.13,
|
| 3532 |
[[34m2026-01-12 07:20:25[39m] (step=0003327) Train Loss mse: 0.0000, Train Loss ce: 0.0567, Train Steps/Sec: 0.12,
|
|
|
|
| 3646 |
[[34m2026-01-12 07:36:02[39m] (step=0003441) Train Loss mse: 0.0000, Train Loss ce: 0.0566, Train Steps/Sec: 0.12,
|
| 3647 |
[[34m2026-01-12 07:36:10[39m] (step=0003442) Train Loss mse: 0.0000, Train Loss ce: 0.0556, Train Steps/Sec: 0.12,
|
| 3648 |
[[34m2026-01-12 07:36:18[39m] (step=0003443) Train Loss mse: 0.0000, Train Loss ce: 0.0563, Train Steps/Sec: 0.12,
|
| 3649 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ema9999_hashed/eval_used_rows, step_tag is checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ema9999_hashed_step2500
|
| 3650 |
+
Preparing Dataset vlm_gym_jigsaw_celoss_no_mse_evalonce/vlm_gym_jigsaw_val
|
| 3651 |
+
[eval debug] first 3 batch fingerprints:
|
| 3652 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 3653 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 3654 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 3655 |
+
ce_avg: 0.08726762980222702, mse_avg: 0.0
|
| 3656 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ema9999_hashed/eval_used_rows, step_tag is checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ema9999_hashed_step3000
|
| 3657 |
+
Preparing Dataset vlm_gym_jigsaw_celoss_no_mse_evalonce/vlm_gym_jigsaw_val
|
| 3658 |
+
[eval debug] first 3 batch fingerprints:
|
| 3659 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 3660 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 3661 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 3662 |
+
ce_avg: 0.07871276885271072, mse_avg: 0.0
|
| 3663 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ema9999_hashed/eval_used_rows, step_tag is checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ema9999_hashed_step3500
|
| 3664 |
+
Preparing Dataset vlm_gym_jigsaw_celoss_no_mse_evalonce/vlm_gym_jigsaw_val
|
| 3665 |
+
[eval debug] first 3 batch fingerprints:
|
| 3666 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 3667 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 3668 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 3669 |
+
ce_avg: 0.07428010553121567, mse_avg: 0.0
|
| 3670 |
[[34m2026-01-12 07:36:26[39m] (step=0003444) Train Loss mse: 0.0000, Train Loss ce: 0.0564, Train Steps/Sec: 0.12,
|
| 3671 |
[[34m2026-01-12 07:36:35[39m] (step=0003445) Train Loss mse: 0.0000, Train Loss ce: 0.0570, Train Steps/Sec: 0.12,
|
| 3672 |
[[34m2026-01-12 07:36:43[39m] (step=0003446) Train Loss mse: 0.0000, Train Loss ce: 0.0565, Train Steps/Sec: 0.12,
|
|
|
|
| 4947 |
[[34m2026-01-12 10:31:42[39m] (step=0004721) Train Loss mse: 0.0000, Train Loss ce: 0.0545, Train Steps/Sec: 0.12,
|
| 4948 |
[[34m2026-01-12 10:31:51[39m] (step=0004722) Train Loss mse: 0.0000, Train Loss ce: 0.0559, Train Steps/Sec: 0.12,
|
| 4949 |
[[34m2026-01-12 10:31:59[39m] (step=0004723) Train Loss mse: 0.0000, Train Loss ce: 0.0550, Train Steps/Sec: 0.12,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4950 |
[[34m2026-01-12 10:32:07[39m] (step=0004724) Train Loss mse: 0.0000, Train Loss ce: 0.0563, Train Steps/Sec: 0.12,
|
| 4951 |
[[34m2026-01-12 10:32:15[39m] (step=0004725) Train Loss mse: 0.0000, Train Loss ce: 0.0559, Train Steps/Sec: 0.12,
|
| 4952 |
[[34m2026-01-12 10:32:23[39m] (step=0004726) Train Loss mse: 0.0000, Train Loss ce: 0.0550, Train Steps/Sec: 0.12,
|
|
|
|
| 5119 |
[[34m2026-01-12 10:55:17[39m] (step=0004893) Train Loss mse: 0.0000, Train Loss ce: 0.0551, Train Steps/Sec: 0.12,
|
| 5120 |
[[34m2026-01-12 10:55:25[39m] (step=0004894) Train Loss mse: 0.0000, Train Loss ce: 0.0543, Train Steps/Sec: 0.12,
|
| 5121 |
[[34m2026-01-12 10:55:33[39m] (step=0004895) Train Loss mse: 0.0000, Train Loss ce: 0.0552, Train Steps/Sec: 0.12,
|
| 5122 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ema9999_hashed/eval_used_rows, step_tag is checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ema9999_hashed_step4000
|
| 5123 |
+
Preparing Dataset vlm_gym_jigsaw_celoss_no_mse_evalonce/vlm_gym_jigsaw_val
|
| 5124 |
+
[eval debug] first 3 batch fingerprints:
|
| 5125 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 5126 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 5127 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 5128 |
+
ce_avg: 0.07037562131881714, mse_avg: 0.0
|
| 5129 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ema9999_hashed/eval_used_rows, step_tag is checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ema9999_hashed_step4500
|
| 5130 |
+
Preparing Dataset vlm_gym_jigsaw_celoss_no_mse_evalonce/vlm_gym_jigsaw_val
|
| 5131 |
+
[eval debug] first 3 batch fingerprints:
|
| 5132 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 5133 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 5134 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 5135 |
+
ce_avg: 0.06715264171361923, mse_avg: 0.0
|
| 5136 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ema9999_hashed/eval_used_rows, step_tag is checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_ce_no_mse_ema9999_hashed_step5000
|
| 5137 |
+
Preparing Dataset vlm_gym_jigsaw_celoss_no_mse_evalonce/vlm_gym_jigsaw_val
|
| 5138 |
+
[eval debug] first 3 batch fingerprints:
|
| 5139 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 5140 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 5141 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_celoss_no_mse_evalonce'}]
|
| 5142 |
+
ce_avg: 0.06502918154001236, mse_avg: 0.0
|
| 5143 |
[[34m2026-01-12 10:55:41[39m] (step=0004896) Train Loss mse: 0.0000, Train Loss ce: 0.0543, Train Steps/Sec: 0.12,
|
| 5144 |
[[34m2026-01-12 10:55:50[39m] (step=0004897) Train Loss mse: 0.0000, Train Loss ce: 0.0558, Train Steps/Sec: 0.12,
|
| 5145 |
[[34m2026-01-12 10:55:58[39m] (step=0004898) Train Loss mse: 0.0000, Train Loss ce: 0.0540, Train Steps/Sec: 0.12,
|