Junyi42 commited on
Commit
c692aea
·
verified ·
1 Parent(s): fb60345

Upload checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_mse_only_ins/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_mse_only_ins

Browse files
checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_mse_only_ins/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_mse_only_ins/wandb/offline-run-20260119_052528-vlm_gym_jigsaw_one_img_lr2e_5_mse_only_ins-run0/files/output.log CHANGED
@@ -925,23 +925,6 @@ wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/
925
  [2026-01-19 08:39:42] (step=0000914) Train Loss mse: 0.0659, Train Loss ce: 0.0000, Train Steps/Sec: 0.09,
926
  [2026-01-19 08:39:54] (step=0000915) Train Loss mse: 0.0571, Train Loss ce: 0.0000, Train Steps/Sec: 0.08,
927
  [2026-01-19 08:40:07] (step=0000916) Train Loss mse: 0.0690, Train Loss ce: 0.0000, Train Steps/Sec: 0.08,
928
- [2026-01-19 08:40:19] (step=0000917) Train Loss mse: 0.0567, Train Loss ce: 0.0000, Train Steps/Sec: 0.08,
929
- [2026-01-19 08:40:30] (step=0000918) Train Loss mse: 0.0771, Train Loss ce: 0.0000, Train Steps/Sec: 0.09,
930
- [2026-01-19 08:40:43] (step=0000919) Train Loss mse: 0.0694, Train Loss ce: 0.0000, Train Steps/Sec: 0.07,
931
- [2026-01-19 08:40:56] (step=0000920) Train Loss mse: 0.0645, Train Loss ce: 0.0000, Train Steps/Sec: 0.08,
932
- [2026-01-19 08:41:07] (step=0000921) Train Loss mse: 0.0638, Train Loss ce: 0.0000, Train Steps/Sec: 0.09,
933
- [2026-01-19 08:41:18] (step=0000922) Train Loss mse: 0.0379, Train Loss ce: 0.0000, Train Steps/Sec: 0.09,
934
- [2026-01-19 08:41:31] (step=0000923) Train Loss mse: 0.0754, Train Loss ce: 0.0000, Train Steps/Sec: 0.08,
935
- [2026-01-19 08:41:42] (step=0000924) Train Loss mse: 0.0598, Train Loss ce: 0.0000, Train Steps/Sec: 0.09,
936
- [2026-01-19 08:41:53] (step=0000925) Train Loss mse: 0.0734, Train Loss ce: 0.0000, Train Steps/Sec: 0.09,
937
- [2026-01-19 08:42:03] (step=0000926) Train Loss mse: 0.0786, Train Loss ce: 0.0000, Train Steps/Sec: 0.10,
938
- [2026-01-19 08:42:14] (step=0000927) Train Loss mse: 0.0971, Train Loss ce: 0.0000, Train Steps/Sec: 0.09,
939
- [2026-01-19 08:42:25] (step=0000928) Train Loss mse: 0.0513, Train Loss ce: 0.0000, Train Steps/Sec: 0.09,
940
- [2026-01-19 08:42:39] (step=0000929) Train Loss mse: 0.0699, Train Loss ce: 0.0000, Train Steps/Sec: 0.07,
941
- [2026-01-19 08:42:51] (step=0000930) Train Loss mse: 0.0961, Train Loss ce: 0.0000, Train Steps/Sec: 0.08,
942
- [2026-01-19 08:43:00] (step=0000931) Train Loss mse: 0.0873, Train Loss ce: 0.0000, Train Steps/Sec: 0.12,
943
- [2026-01-19 08:43:15] (step=0000932) Train Loss mse: 0.0499, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
944
- [2026-01-19 08:43:27] (step=0000933) Train Loss mse: 0.0517, Train Loss ce: 0.0000, Train Steps/Sec: 0.08,
945
  FullyShardedDataParallel(
946
  (_fsdp_wrapped_module): Bagel(
947
  (language_model): Qwen2ForCausalLM(
@@ -1142,13 +1125,23 @@ Preparing Dataset vlm_gym_jigsaw_swap_mse_loss_only_evalonce/vlm_gym_jigsaw_swap
1142
  fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_mse_loss_only_evalonce'}]
1143
  fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_mse_loss_only_evalonce'}]
1144
  ce_avg: 0.0, mse_avg: 0.06354626268148422
1145
- base_dir is /dev/shm/models/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_mse_only_ins/eval_used_rows, step_tag is vlm_gym_jigsaw_one_img_lr2e_5_mse_only_ins_step2000
1146
- Preparing Dataset vlm_gym_jigsaw_swap_mse_loss_only_evalonce/vlm_gym_jigsaw_swap_val
1147
- [eval debug] first 3 batch fingerprints:
1148
- fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_mse_loss_only_evalonce'}]
1149
- fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_mse_loss_only_evalonce'}]
1150
- fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_mse_loss_only_evalonce'}]
1151
- ce_avg: 0.0, mse_avg: 0.06860851496458054
 
 
 
 
 
 
 
 
 
 
1152
  [2026-01-19 08:43:40] (step=0000934) Train Loss mse: 0.0704, Train Loss ce: 0.0000, Train Steps/Sec: 0.08,
1153
  [2026-01-19 08:43:51] (step=0000935) Train Loss mse: 0.0528, Train Loss ce: 0.0000, Train Steps/Sec: 0.09,
1154
  [2026-01-19 08:44:05] (step=0000936) Train Loss mse: 0.0624, Train Loss ce: 0.0000, Train Steps/Sec: 0.07,
@@ -2361,6 +2354,27 @@ ce_avg: 0.0, mse_avg: 0.06860851496458054
2361
  [2026-01-19 12:53:30] (step=0002143) Train Loss mse: 0.0761, Train Loss ce: 0.0000, Train Steps/Sec: 0.07,
2362
  [2026-01-19 12:53:44] (step=0002144) Train Loss mse: 0.0698, Train Loss ce: 0.0000, Train Steps/Sec: 0.07,
2363
  [2026-01-19 12:53:58] (step=0002145) Train Loss mse: 0.0700, Train Loss ce: 0.0000, Train Steps/Sec: 0.08,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2364
  [2026-01-19 12:54:11] (step=0002146) Train Loss mse: 0.0571, Train Loss ce: 0.0000, Train Steps/Sec: 0.07,
2365
  [2026-01-19 12:54:21] (step=0002147) Train Loss mse: 0.1154, Train Loss ce: 0.0000, Train Steps/Sec: 0.10,
2366
  [2026-01-19 12:54:33] (step=0002148) Train Loss mse: 0.0616, Train Loss ce: 0.0000, Train Steps/Sec: 0.09,
@@ -2500,20 +2514,6 @@ ce_avg: 0.0, mse_avg: 0.06860851496458054
2500
  [2026-01-19 13:21:45] (step=0002282) Train Loss mse: 0.0623, Train Loss ce: 0.0000, Train Steps/Sec: 0.09,
2501
  [2026-01-19 13:21:59] (step=0002283) Train Loss mse: 0.0643, Train Loss ce: 0.0000, Train Steps/Sec: 0.07,
2502
  [2026-01-19 13:22:14] (step=0002284) Train Loss mse: 0.0609, Train Loss ce: 0.0000, Train Steps/Sec: 0.07,
2503
- base_dir is /dev/shm/models/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_mse_only_ins/eval_used_rows, step_tag is vlm_gym_jigsaw_one_img_lr2e_5_mse_only_ins_step2500
2504
- Preparing Dataset vlm_gym_jigsaw_swap_mse_loss_only_evalonce/vlm_gym_jigsaw_swap_val
2505
- [eval debug] first 3 batch fingerprints:
2506
- fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_mse_loss_only_evalonce'}]
2507
- fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_mse_loss_only_evalonce'}]
2508
- fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_mse_loss_only_evalonce'}]
2509
- ce_avg: 0.0, mse_avg: 0.06288589537143707
2510
- base_dir is /dev/shm/models/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_mse_only_ins/eval_used_rows, step_tag is vlm_gym_jigsaw_one_img_lr2e_5_mse_only_ins_step3000
2511
- Preparing Dataset vlm_gym_jigsaw_swap_mse_loss_only_evalonce/vlm_gym_jigsaw_swap_val
2512
- [eval debug] first 3 batch fingerprints:
2513
- fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_mse_loss_only_evalonce'}]
2514
- fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_mse_loss_only_evalonce'}]
2515
- fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_mse_loss_only_evalonce'}]
2516
- ce_avg: 0.0, mse_avg: 0.07036882638931274
2517
  [2026-01-19 13:22:26] (step=0002285) Train Loss mse: 0.0670, Train Loss ce: 0.0000, Train Steps/Sec: 0.08,
2518
  [2026-01-19 13:22:42] (step=0002286) Train Loss mse: 0.0632, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
2519
  [2026-01-19 13:22:52] (step=0002287) Train Loss mse: 0.0844, Train Loss ce: 0.0000, Train Steps/Sec: 0.10,
@@ -3328,6 +3328,20 @@ ce_avg: 0.0, mse_avg: 0.07036882638931274
3328
  [2026-01-19 16:10:58] (step=0003093) Train Loss mse: 0.0637, Train Loss ce: 0.0000, Train Steps/Sec: 0.07,
3329
  [2026-01-19 16:11:10] (step=0003094) Train Loss mse: 0.0634, Train Loss ce: 0.0000, Train Steps/Sec: 0.08,
3330
  [2026-01-19 16:11:21] (step=0003095) Train Loss mse: 0.0589, Train Loss ce: 0.0000, Train Steps/Sec: 0.09,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3331
  [2026-01-19 16:11:31] (step=0003096) Train Loss mse: 0.0905, Train Loss ce: 0.0000, Train Steps/Sec: 0.09,
3332
  [2026-01-19 16:11:43] (step=0003097) Train Loss mse: 0.0591, Train Loss ce: 0.0000, Train Steps/Sec: 0.09,
3333
  [2026-01-19 16:11:55] (step=0003098) Train Loss mse: 0.0876, Train Loss ce: 0.0000, Train Steps/Sec: 0.08,
@@ -3495,27 +3509,6 @@ ce_avg: 0.0, mse_avg: 0.07036882638931274
3495
  [2026-01-19 16:44:49] (step=0003260) Train Loss mse: 0.0682, Train Loss ce: 0.0000, Train Steps/Sec: 0.08,
3496
  [2026-01-19 16:44:59] (step=0003261) Train Loss mse: 0.0587, Train Loss ce: 0.0000, Train Steps/Sec: 0.10,
3497
  [2026-01-19 16:45:11] (step=0003262) Train Loss mse: 0.0876, Train Loss ce: 0.0000, Train Steps/Sec: 0.08,
3498
- base_dir is /dev/shm/models/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_mse_only_ins/eval_used_rows, step_tag is vlm_gym_jigsaw_one_img_lr2e_5_mse_only_ins_step3500
3499
- Preparing Dataset vlm_gym_jigsaw_swap_mse_loss_only_evalonce/vlm_gym_jigsaw_swap_val
3500
- [eval debug] first 3 batch fingerprints:
3501
- fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_mse_loss_only_evalonce'}]
3502
- fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_mse_loss_only_evalonce'}]
3503
- fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_mse_loss_only_evalonce'}]
3504
- ce_avg: 0.0, mse_avg: 0.07131727039813995
3505
- base_dir is /dev/shm/models/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_mse_only_ins/eval_used_rows, step_tag is vlm_gym_jigsaw_one_img_lr2e_5_mse_only_ins_step4000
3506
- Preparing Dataset vlm_gym_jigsaw_swap_mse_loss_only_evalonce/vlm_gym_jigsaw_swap_val
3507
- [eval debug] first 3 batch fingerprints:
3508
- fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_mse_loss_only_evalonce'}]
3509
- fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_mse_loss_only_evalonce'}]
3510
- fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_mse_loss_only_evalonce'}]
3511
- ce_avg: 0.0, mse_avg: 0.06401161849498749
3512
- base_dir is /dev/shm/models/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_mse_only_ins/eval_used_rows, step_tag is vlm_gym_jigsaw_one_img_lr2e_5_mse_only_ins_step4500
3513
- Preparing Dataset vlm_gym_jigsaw_swap_mse_loss_only_evalonce/vlm_gym_jigsaw_swap_val
3514
- [eval debug] first 3 batch fingerprints:
3515
- fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_mse_loss_only_evalonce'}]
3516
- fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_mse_loss_only_evalonce'}]
3517
- fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_mse_loss_only_evalonce'}]
3518
- ce_avg: 0.0, mse_avg: 0.06277775019407272
3519
  [2026-01-19 16:45:24] (step=0003263) Train Loss mse: 0.0580, Train Loss ce: 0.0000, Train Steps/Sec: 0.08,
3520
  [2026-01-19 16:45:37] (step=0003264) Train Loss mse: 0.0725, Train Loss ce: 0.0000, Train Steps/Sec: 0.07,
3521
  [2026-01-19 16:45:48] (step=0003265) Train Loss mse: 0.0477, Train Loss ce: 0.0000, Train Steps/Sec: 0.09,
@@ -4625,6 +4618,20 @@ ce_avg: 0.0, mse_avg: 0.06277775019407272
4625
  [2026-01-19 20:33:07] (step=0004369) Train Loss mse: 0.1095, Train Loss ce: 0.0000, Train Steps/Sec: 0.10,
4626
  [2026-01-19 20:33:20] (step=0004370) Train Loss mse: 0.0465, Train Loss ce: 0.0000, Train Steps/Sec: 0.08,
4627
  [2026-01-19 20:33:34] (step=0004371) Train Loss mse: 0.0448, Train Loss ce: 0.0000, Train Steps/Sec: 0.07,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4628
  [2026-01-19 20:33:46] (step=0004372) Train Loss mse: 0.0563, Train Loss ce: 0.0000, Train Steps/Sec: 0.08,
4629
  [2026-01-19 20:33:56] (step=0004373) Train Loss mse: 0.0880, Train Loss ce: 0.0000, Train Steps/Sec: 0.09,
4630
  [2026-01-19 20:34:12] (step=0004374) Train Loss mse: 0.0460, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
@@ -4924,13 +4931,6 @@ ce_avg: 0.0, mse_avg: 0.06277775019407272
4924
  [2026-01-19 21:35:03] (step=0004668) Train Loss mse: 0.0452, Train Loss ce: 0.0000, Train Steps/Sec: 0.07,
4925
  [2026-01-19 21:35:15] (step=0004669) Train Loss mse: 0.0848, Train Loss ce: 0.0000, Train Steps/Sec: 0.08,
4926
  [2026-01-19 21:35:30] (step=0004670) Train Loss mse: 0.0643, Train Loss ce: 0.0000, Train Steps/Sec: 0.07,
4927
- base_dir is /dev/shm/models/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_mse_only_ins/eval_used_rows, step_tag is vlm_gym_jigsaw_one_img_lr2e_5_mse_only_ins_step5000
4928
- Preparing Dataset vlm_gym_jigsaw_swap_mse_loss_only_evalonce/vlm_gym_jigsaw_swap_val
4929
- [eval debug] first 3 batch fingerprints:
4930
- fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_mse_loss_only_evalonce'}]
4931
- fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_mse_loss_only_evalonce'}]
4932
- fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_mse_loss_only_evalonce'}]
4933
- ce_avg: 0.0, mse_avg: 0.06819717586040497
4934
  [2026-01-19 21:35:40] (step=0004671) Train Loss mse: 0.0504, Train Loss ce: 0.0000, Train Steps/Sec: 0.10,
4935
  [2026-01-19 21:35:52] (step=0004672) Train Loss mse: 0.0699, Train Loss ce: 0.0000, Train Steps/Sec: 0.08,
4936
  [2026-01-19 21:36:07] (step=0004673) Train Loss mse: 0.0593, Train Loss ce: 0.0000, Train Steps/Sec: 0.07,
 
925
  [2026-01-19 08:39:42] (step=0000914) Train Loss mse: 0.0659, Train Loss ce: 0.0000, Train Steps/Sec: 0.09,
926
  [2026-01-19 08:39:54] (step=0000915) Train Loss mse: 0.0571, Train Loss ce: 0.0000, Train Steps/Sec: 0.08,
927
  [2026-01-19 08:40:07] (step=0000916) Train Loss mse: 0.0690, Train Loss ce: 0.0000, Train Steps/Sec: 0.08,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
928
  FullyShardedDataParallel(
929
  (_fsdp_wrapped_module): Bagel(
930
  (language_model): Qwen2ForCausalLM(
 
1125
  fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_mse_loss_only_evalonce'}]
1126
  fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_mse_loss_only_evalonce'}]
1127
  ce_avg: 0.0, mse_avg: 0.06354626268148422
1128
+ [2026-01-19 08:40:19] (step=0000917) Train Loss mse: 0.0567, Train Loss ce: 0.0000, Train Steps/Sec: 0.08,
1129
+ [2026-01-19 08:40:30] (step=0000918) Train Loss mse: 0.0771, Train Loss ce: 0.0000, Train Steps/Sec: 0.09,
1130
+ [2026-01-19 08:40:43] (step=0000919) Train Loss mse: 0.0694, Train Loss ce: 0.0000, Train Steps/Sec: 0.07,
1131
+ [2026-01-19 08:40:56] (step=0000920) Train Loss mse: 0.0645, Train Loss ce: 0.0000, Train Steps/Sec: 0.08,
1132
+ [2026-01-19 08:41:07] (step=0000921) Train Loss mse: 0.0638, Train Loss ce: 0.0000, Train Steps/Sec: 0.09,
1133
+ [2026-01-19 08:41:18] (step=0000922) Train Loss mse: 0.0379, Train Loss ce: 0.0000, Train Steps/Sec: 0.09,
1134
+ [2026-01-19 08:41:31] (step=0000923) Train Loss mse: 0.0754, Train Loss ce: 0.0000, Train Steps/Sec: 0.08,
1135
+ [2026-01-19 08:41:42] (step=0000924) Train Loss mse: 0.0598, Train Loss ce: 0.0000, Train Steps/Sec: 0.09,
1136
+ [2026-01-19 08:41:53] (step=0000925) Train Loss mse: 0.0734, Train Loss ce: 0.0000, Train Steps/Sec: 0.09,
1137
+ [2026-01-19 08:42:03] (step=0000926) Train Loss mse: 0.0786, Train Loss ce: 0.0000, Train Steps/Sec: 0.10,
1138
+ [2026-01-19 08:42:14] (step=0000927) Train Loss mse: 0.0971, Train Loss ce: 0.0000, Train Steps/Sec: 0.09,
1139
+ [2026-01-19 08:42:25] (step=0000928) Train Loss mse: 0.0513, Train Loss ce: 0.0000, Train Steps/Sec: 0.09,
1140
+ [2026-01-19 08:42:39] (step=0000929) Train Loss mse: 0.0699, Train Loss ce: 0.0000, Train Steps/Sec: 0.07,
1141
+ [2026-01-19 08:42:51] (step=0000930) Train Loss mse: 0.0961, Train Loss ce: 0.0000, Train Steps/Sec: 0.08,
1142
+ [2026-01-19 08:43:00] (step=0000931) Train Loss mse: 0.0873, Train Loss ce: 0.0000, Train Steps/Sec: 0.12,
1143
+ [2026-01-19 08:43:15] (step=0000932) Train Loss mse: 0.0499, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
1144
+ [2026-01-19 08:43:27] (step=0000933) Train Loss mse: 0.0517, Train Loss ce: 0.0000, Train Steps/Sec: 0.08,
1145
  [2026-01-19 08:43:40] (step=0000934) Train Loss mse: 0.0704, Train Loss ce: 0.0000, Train Steps/Sec: 0.08,
1146
  [2026-01-19 08:43:51] (step=0000935) Train Loss mse: 0.0528, Train Loss ce: 0.0000, Train Steps/Sec: 0.09,
1147
  [2026-01-19 08:44:05] (step=0000936) Train Loss mse: 0.0624, Train Loss ce: 0.0000, Train Steps/Sec: 0.07,
 
2354
  [2026-01-19 12:53:30] (step=0002143) Train Loss mse: 0.0761, Train Loss ce: 0.0000, Train Steps/Sec: 0.07,
2355
  [2026-01-19 12:53:44] (step=0002144) Train Loss mse: 0.0698, Train Loss ce: 0.0000, Train Steps/Sec: 0.07,
2356
  [2026-01-19 12:53:58] (step=0002145) Train Loss mse: 0.0700, Train Loss ce: 0.0000, Train Steps/Sec: 0.08,
2357
+ base_dir is /dev/shm/models/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_mse_only_ins/eval_used_rows, step_tag is vlm_gym_jigsaw_one_img_lr2e_5_mse_only_ins_step2000
2358
+ Preparing Dataset vlm_gym_jigsaw_swap_mse_loss_only_evalonce/vlm_gym_jigsaw_swap_val
2359
+ [eval debug] first 3 batch fingerprints:
2360
+ fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_mse_loss_only_evalonce'}]
2361
+ fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_mse_loss_only_evalonce'}]
2362
+ fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_mse_loss_only_evalonce'}]
2363
+ ce_avg: 0.0, mse_avg: 0.06860851496458054
2364
+ base_dir is /dev/shm/models/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_mse_only_ins/eval_used_rows, step_tag is vlm_gym_jigsaw_one_img_lr2e_5_mse_only_ins_step2500
2365
+ Preparing Dataset vlm_gym_jigsaw_swap_mse_loss_only_evalonce/vlm_gym_jigsaw_swap_val
2366
+ [eval debug] first 3 batch fingerprints:
2367
+ fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_mse_loss_only_evalonce'}]
2368
+ fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_mse_loss_only_evalonce'}]
2369
+ fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_mse_loss_only_evalonce'}]
2370
+ ce_avg: 0.0, mse_avg: 0.06288589537143707
2371
+ base_dir is /dev/shm/models/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_mse_only_ins/eval_used_rows, step_tag is vlm_gym_jigsaw_one_img_lr2e_5_mse_only_ins_step3000
2372
+ Preparing Dataset vlm_gym_jigsaw_swap_mse_loss_only_evalonce/vlm_gym_jigsaw_swap_val
2373
+ [eval debug] first 3 batch fingerprints:
2374
+ fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_mse_loss_only_evalonce'}]
2375
+ fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_mse_loss_only_evalonce'}]
2376
+ fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_mse_loss_only_evalonce'}]
2377
+ ce_avg: 0.0, mse_avg: 0.07036882638931274
2378
  [2026-01-19 12:54:11] (step=0002146) Train Loss mse: 0.0571, Train Loss ce: 0.0000, Train Steps/Sec: 0.07,
2379
  [2026-01-19 12:54:21] (step=0002147) Train Loss mse: 0.1154, Train Loss ce: 0.0000, Train Steps/Sec: 0.10,
2380
  [2026-01-19 12:54:33] (step=0002148) Train Loss mse: 0.0616, Train Loss ce: 0.0000, Train Steps/Sec: 0.09,
 
2514
  [2026-01-19 13:21:45] (step=0002282) Train Loss mse: 0.0623, Train Loss ce: 0.0000, Train Steps/Sec: 0.09,
2515
  [2026-01-19 13:21:59] (step=0002283) Train Loss mse: 0.0643, Train Loss ce: 0.0000, Train Steps/Sec: 0.07,
2516
  [2026-01-19 13:22:14] (step=0002284) Train Loss mse: 0.0609, Train Loss ce: 0.0000, Train Steps/Sec: 0.07,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2517
  [2026-01-19 13:22:26] (step=0002285) Train Loss mse: 0.0670, Train Loss ce: 0.0000, Train Steps/Sec: 0.08,
2518
  [2026-01-19 13:22:42] (step=0002286) Train Loss mse: 0.0632, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
2519
  [2026-01-19 13:22:52] (step=0002287) Train Loss mse: 0.0844, Train Loss ce: 0.0000, Train Steps/Sec: 0.10,
 
3328
  [2026-01-19 16:10:58] (step=0003093) Train Loss mse: 0.0637, Train Loss ce: 0.0000, Train Steps/Sec: 0.07,
3329
  [2026-01-19 16:11:10] (step=0003094) Train Loss mse: 0.0634, Train Loss ce: 0.0000, Train Steps/Sec: 0.08,
3330
  [2026-01-19 16:11:21] (step=0003095) Train Loss mse: 0.0589, Train Loss ce: 0.0000, Train Steps/Sec: 0.09,
3331
+ [
3332
+ base_dir is /dev/shm/models/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_mse_only_ins/eval_used_rows, step_tag is vlm_gym_jigsaw_one_img_lr2e_5_mse_only_ins_step3500
3333
+ Preparing Dataset vlm_gym_jigsaw_swap_mse_loss_only_evalonce/vlm_gym_jigsaw_swap_val
3334
+ [eval debug] first 3 batch fingerprints:
3335
+ fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_mse_loss_only_evalonce'}]
3336
+ fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_mse_loss_only_evalonce'}]
3337
+ fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_mse_loss_only_evalonce'}]
3338
+ ce_avg: 0.0, mse_avg: 0.07131727039813995
3339
+ base_dir is /dev/shm/models/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_mse_only_ins/eval_used_rows, step_tag is vlm_gym_jigsaw_one_img_lr2e_5_mse_only_ins_step4000
3340
+ Preparing Dataset vlm_gym_jigsaw_swap_mse_loss_only_evalonce/vlm_gym_jigsaw_swap_val
3341
+ [eval debug] first 3 batch fingerprints:
3342
+ fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_mse_loss_only_evalonce'}]
3343
+ fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_mse_loss_only_evalonce'}]
3344
+ fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_mse_loss_only_evalonce'}]
3345
  [2026-01-19 16:11:31] (step=0003096) Train Loss mse: 0.0905, Train Loss ce: 0.0000, Train Steps/Sec: 0.09,
3346
  [2026-01-19 16:11:43] (step=0003097) Train Loss mse: 0.0591, Train Loss ce: 0.0000, Train Steps/Sec: 0.09,
3347
  [2026-01-19 16:11:55] (step=0003098) Train Loss mse: 0.0876, Train Loss ce: 0.0000, Train Steps/Sec: 0.08,
 
3509
  [2026-01-19 16:44:49] (step=0003260) Train Loss mse: 0.0682, Train Loss ce: 0.0000, Train Steps/Sec: 0.08,
3510
  [2026-01-19 16:44:59] (step=0003261) Train Loss mse: 0.0587, Train Loss ce: 0.0000, Train Steps/Sec: 0.10,
3511
  [2026-01-19 16:45:11] (step=0003262) Train Loss mse: 0.0876, Train Loss ce: 0.0000, Train Steps/Sec: 0.08,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3512
  [2026-01-19 16:45:24] (step=0003263) Train Loss mse: 0.0580, Train Loss ce: 0.0000, Train Steps/Sec: 0.08,
3513
  [2026-01-19 16:45:37] (step=0003264) Train Loss mse: 0.0725, Train Loss ce: 0.0000, Train Steps/Sec: 0.07,
3514
  [2026-01-19 16:45:48] (step=0003265) Train Loss mse: 0.0477, Train Loss ce: 0.0000, Train Steps/Sec: 0.09,
 
4618
  [2026-01-19 20:33:07] (step=0004369) Train Loss mse: 0.1095, Train Loss ce: 0.0000, Train Steps/Sec: 0.10,
4619
  [2026-01-19 20:33:20] (step=0004370) Train Loss mse: 0.0465, Train Loss ce: 0.0000, Train Steps/Sec: 0.08,
4620
  [2026-01-19 20:33:34] (step=0004371) Train Loss mse: 0.0448, Train Loss ce: 0.0000, Train Steps/Sec: 0.07,
4621
+ base_dir is /dev/shm/models/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_mse_only_ins/eval_used_rows, step_tag is vlm_gym_jigsaw_one_img_lr2e_5_mse_only_ins_step4500
4622
+ Preparing Dataset vlm_gym_jigsaw_swap_mse_loss_only_evalonce/vlm_gym_jigsaw_swap_val
4623
+ [eval debug] first 3 batch fingerprints:
4624
+ fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_mse_loss_only_evalonce'}]
4625
+ fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_mse_loss_only_evalonce'}]
4626
+ fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_mse_loss_only_evalonce'}]
4627
+ ce_avg: 0.0, mse_avg: 0.06277775019407272
4628
+ base_dir is /dev/shm/models/checkpoints_vlm_gym_jigsaw_one_image_lr2e_5_mse_only_ins/eval_used_rows, step_tag is vlm_gym_jigsaw_one_img_lr2e_5_mse_only_ins_step5000
4629
+ Preparing Dataset vlm_gym_jigsaw_swap_mse_loss_only_evalonce/vlm_gym_jigsaw_swap_val
4630
+ [eval debug] first 3 batch fingerprints:
4631
+ fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_mse_loss_only_evalonce'}]
4632
+ fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_mse_loss_only_evalonce'}]
4633
+ fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_jigsaw_swap_mse_loss_only_evalonce'}]
4634
+ ce_avg: 0.0, mse_avg: 0.06819717586040497
4635
  [2026-01-19 20:33:46] (step=0004372) Train Loss mse: 0.0563, Train Loss ce: 0.0000, Train Steps/Sec: 0.08,
4636
  [2026-01-19 20:33:56] (step=0004373) Train Loss mse: 0.0880, Train Loss ce: 0.0000, Train Steps/Sec: 0.09,
4637
  [2026-01-19 20:34:12] (step=0004374) Train Loss mse: 0.0460, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
 
4931
  [2026-01-19 21:35:03] (step=0004668) Train Loss mse: 0.0452, Train Loss ce: 0.0000, Train Steps/Sec: 0.07,
4932
  [2026-01-19 21:35:15] (step=0004669) Train Loss mse: 0.0848, Train Loss ce: 0.0000, Train Steps/Sec: 0.08,
4933
  [2026-01-19 21:35:30] (step=0004670) Train Loss mse: 0.0643, Train Loss ce: 0.0000, Train Steps/Sec: 0.07,
 
 
 
 
 
 
 
4934
  [2026-01-19 21:35:40] (step=0004671) Train Loss mse: 0.0504, Train Loss ce: 0.0000, Train Steps/Sec: 0.10,
4935
  [2026-01-19 21:35:52] (step=0004672) Train Loss mse: 0.0699, Train Loss ce: 0.0000, Train Steps/Sec: 0.08,
4936
  [2026-01-19 21:36:07] (step=0004673) Train Loss mse: 0.0593, Train Loss ce: 0.0000, Train Steps/Sec: 0.07,