Junyi42 commited on
Commit
405cf0c
·
verified ·
1 Parent(s): ed5379f

Upload checkpoints_vlm_gym_mental_rotation_3d_pad3_by_axis_one_image_lr2e_5_mse_only_ins/checkpoints_vlm_gym_mental_rotation_3d_pad3_by_axis_one_image_lr2e_5_mse_only_ins

Browse files
checkpoints_vlm_gym_mental_rotation_3d_pad3_by_axis_one_image_lr2e_5_mse_only_ins/checkpoints_vlm_gym_mental_rotation_3d_pad3_by_axis_one_image_lr2e_5_mse_only_ins/wandb/offline-run-20260129_221543-vlm_gym_mental_rotation_3d_pad3_by_axis_one_img_lr2e_5_mse_only_ins-run0/files/output.log CHANGED
@@ -850,6 +850,15 @@ wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/
850
  [2026-01-30 02:20:25] (step=0000839) Train Loss mse: 0.0258, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
851
  [2026-01-30 02:20:41] (step=0000840) Train Loss mse: 0.0242, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
852
  [2026-01-30 02:20:58] (step=0000841) Train Loss mse: 0.0250, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
 
 
 
 
 
 
 
 
 
853
  FullyShardedDataParallel(
854
  (_fsdp_wrapped_module): Bagel(
855
  (language_model): Qwen2ForCausalLM(
@@ -1043,15 +1052,13 @@ Preparing Dataset vlm_gym_mental_rotation_3d_pad3_by_axis_mse_loss_only_evalonce
1043
  fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_pad3_by_axis_mse_loss_only_evalonce'}]
1044
  fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_pad3_by_axis_mse_loss_only_evalonce'}]
1045
  ce_avg: 0.0, mse_avg: 0.024334488436579704
1046
- [2026-01-30 02:21:15] (step=0000842) Train Loss mse: 0.0278, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
1047
- [2026-01-30 02:21:32] (step=0000843) Train Loss mse: 0.0222, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
1048
- [2026-01-30 02:21:49] (step=0000844) Train Loss mse: 0.0243, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
1049
- [2026-01-30 02:22:05] (step=0000845) Train Loss mse: 0.0232, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
1050
- [2026-01-30 02:22:22] (step=0000846) Train Loss mse: 0.0242, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
1051
- [2026-01-30 02:22:39] (step=0000847) Train Loss mse: 0.0240, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
1052
- [2026-01-30 02:22:56] (step=0000848) Train Loss mse: 0.0224, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
1053
- [2026-01-30 02:23:12] (step=0000849) Train Loss mse: 0.0229, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
1054
- [2026-01-30 02:23:29] (step=0000850) Train Loss mse: 0.0265, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
1055
  [2026-01-30 02:23:46] (step=0000851) Train Loss mse: 0.0230, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
1056
  [2026-01-30 02:24:03] (step=0000852) Train Loss mse: 0.0253, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
1057
  [2026-01-30 02:24:20] (step=0000853) Train Loss mse: 0.0239, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
@@ -2200,6 +2207,20 @@ ce_avg: 0.0, mse_avg: 0.024334488436579704
2200
  [2026-01-30 07:47:18] (step=0001996) Train Loss mse: 0.0242, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
2201
  [2026-01-30 07:47:34] (step=0001997) Train Loss mse: 0.0238, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
2202
  [2026-01-30 07:47:51] (step=0001998) Train Loss mse: 0.0238, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2203
  [2026-01-30 07:48:08] (step=0001999) Train Loss mse: 0.0235, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
2204
  [2026-01-30 07:49:58] (step=0002000) Train Loss mse: 0.0214, Train Loss ce: 0.0000, Train Steps/Sec: 0.01,
2205
  [2026-01-30 07:50:15] (step=0002001) Train Loss mse: 0.0226, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
@@ -2264,20 +2285,6 @@ ce_avg: 0.0, mse_avg: 0.024334488436579704
2264
  [2026-01-30 08:06:43] (step=0002060) Train Loss mse: 0.0223, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
2265
  [2026-01-30 08:06:59] (step=0002061) Train Loss mse: 0.0213, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
2266
  [2026-01-30 08:07:16] (step=0002062) Train Loss mse: 0.0225, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
2267
- base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_3d_pad3_by_axis_one_image_lr2e_5_mse_only_ins/eval_used_rows, step_tag is vlm_gym_mental_rotation_3d_pad3_by_axis_one_img_lr2e_5_mse_only_ins_step2000
2268
- Preparing Dataset vlm_gym_mental_rotation_3d_pad3_by_axis_mse_loss_only_evalonce/vlm_gym_mental_rotation_3d_pad3_by_axis_val
2269
- [eval debug] first 3 batch fingerprints:
2270
- fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_pad3_by_axis_mse_loss_only_evalonce'}]
2271
- fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_pad3_by_axis_mse_loss_only_evalonce'}]
2272
- fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_pad3_by_axis_mse_loss_only_evalonce'}]
2273
- ce_avg: 0.0, mse_avg: 0.024451851844787598
2274
- base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_3d_pad3_by_axis_one_image_lr2e_5_mse_only_ins/eval_used_rows, step_tag is vlm_gym_mental_rotation_3d_pad3_by_axis_one_img_lr2e_5_mse_only_ins_step2500
2275
- Preparing Dataset vlm_gym_mental_rotation_3d_pad3_by_axis_mse_loss_only_evalonce/vlm_gym_mental_rotation_3d_pad3_by_axis_val
2276
- [eval debug] first 3 batch fingerprints:
2277
- fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_pad3_by_axis_mse_loss_only_evalonce'}]
2278
- fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_pad3_by_axis_mse_loss_only_evalonce'}]
2279
- fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_pad3_by_axis_mse_loss_only_evalonce'}]
2280
- ce_avg: 0.0, mse_avg: 0.024314723908901215
2281
  [2026-01-30 08:07:33] (step=0002063) Train Loss mse: 0.0232, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
2282
  [2026-01-30 08:07:50] (step=0002064) Train Loss mse: 0.0223, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
2283
  [2026-01-30 08:08:07] (step=0002065) Train Loss mse: 0.0223, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
@@ -3322,28 +3329,34 @@ ce_avg: 0.0, mse_avg: 0.024314723908901215
3322
  [2026-01-30 13:01:56] (step=0003104) Train Loss mse: 0.0206, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
3323
  [2026-01-30 13:02:13] (step=0003105) Train Loss mse: 0.0218, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
3324
  [2026-01-30 13:02:30] (step=0003106) Train Loss mse: 0.0204, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
3325
- base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_3d_pad3_by_axis_one_image_lr2e_5_mse_only_ins/eval_used_rows, step_tag is vlm_gym_mental_rotation_3d_pad3_by_axis_one_img_lr2e_5_mse_only_ins_step3000
3326
- Preparing Dataset vlm_gym_mental_rotation_3d_pad3_by_axis_mse_loss_only_evalonce/vlm_gym_mental_rotation_3d_pad3_by_axis_val
3327
- [eval debug] first 3 batch fingerprints:
3328
- fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_pad3_by_axis_mse_loss_only_evalonce'}]
3329
- fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_pad3_by_axis_mse_loss_only_evalonce'}]
3330
- fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_pad3_by_axis_mse_loss_only_evalonce'}]
3331
- ce_avg: 0.0, mse_avg: 0.024401195347309113
3332
- base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_3d_pad3_by_axis_one_image_lr2e_5_mse_only_ins/eval_used_rows, step_tag is vlm_gym_mental_rotation_3d_pad3_by_axis_one_img_lr2e_5_mse_only_ins_step3500
3333
- Preparing Dataset vlm_gym_mental_rotation_3d_pad3_by_axis_mse_loss_only_evalonce/vlm_gym_mental_rotation_3d_pad3_by_axis_val
3334
- [eval debug] first 3 batch fingerprints:
3335
- fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_pad3_by_axis_mse_loss_only_evalonce'}]
3336
- fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_pad3_by_axis_mse_loss_only_evalonce'}]
3337
- fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_pad3_by_axis_mse_loss_only_evalonce'}]
3338
- ce_avg: 0.0, mse_avg: 0.024277793243527412
3339
- base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_3d_pad3_by_axis_one_image_lr2e_5_mse_only_ins/eval_used_rows, step_tag is vlm_gym_mental_rotation_3d_pad3_by_axis_one_img_lr2e_5_mse_only_ins_step4000
3340
- Preparing Dataset vlm_gym_mental_rotation_3d_pad3_by_axis_mse_loss_only_evalonce/vlm_gym_mental_rotation_3d_pad3_by_axis_val
3341
- [eval debug] first 3 batch fingerprints:
3342
- fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_pad3_by_axis_mse_loss_only_evalonce'}]
3343
- fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_pad3_by_axis_mse_loss_only_evalonce'}]
3344
- fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_pad3_by_axis_mse_loss_only_evalonce'}]
3345
- ce_avg: 0.0, mse_avg: 0.024172412231564522
3346
- 2026-01-30 13:10:20] (step=0003134) Train Loss mse: 0.0218, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
 
 
 
 
 
 
3347
  [2026-01-30 13:10:37] (step=0003135) Train Loss mse: 0.0237, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
3348
  [2026-01-30 13:10:54] (step=0003136) Train Loss mse: 0.0221, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
3349
  [2026-01-30 13:11:11] (step=0003137) Train Loss mse: 0.0219, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
@@ -3364,6 +3377,20 @@ ce_avg: 0.0, mse_avg: 0.024172412231564522
3364
  [2026-01-30 13:15:23] (step=0003152) Train Loss mse: 0.0230, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
3365
  [2026-01-30 13:15:40] (step=0003153) Train Loss mse: 0.0212, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
3366
  [2026-01-30 13:15:56] (step=0003154) Train Loss mse: 0.0249, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3367
  [2026-01-30 13:16:13] (step=0003155) Train Loss mse: 0.0206, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
3368
  [2026-01-30 13:16:30] (step=0003156) Train Loss mse: 0.0216, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
3369
  [2026-01-30 13:16:47] (step=0003157) Train Loss mse: 0.0233, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
@@ -4389,20 +4416,6 @@ ce_avg: 0.0, mse_avg: 0.024172412231564522
4389
  [2026-01-30 18:05:06] (step=0004177) Train Loss mse: 0.0218, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
4390
  [2026-01-30 18:05:22] (step=0004178) Train Loss mse: 0.0224, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
4391
  [2026-01-30 18:05:39] (step=0004179) Train Loss mse: 0.0202, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
4392
- base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_3d_pad3_by_axis_one_image_lr2e_5_mse_only_ins/eval_used_rows, step_tag is vlm_gym_mental_rotation_3d_pad3_by_axis_one_img_lr2e_5_mse_only_ins_step4500
4393
- Preparing Dataset vlm_gym_mental_rotation_3d_pad3_by_axis_mse_loss_only_evalonce/vlm_gym_mental_rotation_3d_pad3_by_axis_val
4394
- [eval debug] first 3 batch fingerprints:
4395
- fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_pad3_by_axis_mse_loss_only_evalonce'}]
4396
- fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_pad3_by_axis_mse_loss_only_evalonce'}]
4397
- fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_pad3_by_axis_mse_loss_only_evalonce'}]
4398
- ce_avg: 0.0, mse_avg: 0.02414196915924549
4399
- base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_3d_pad3_by_axis_one_image_lr2e_5_mse_only_ins/eval_used_rows, step_tag is vlm_gym_mental_rotation_3d_pad3_by_axis_one_img_lr2e_5_mse_only_ins_step5000
4400
- Preparing Dataset vlm_gym_mental_rotation_3d_pad3_by_axis_mse_loss_only_evalonce/vlm_gym_mental_rotation_3d_pad3_by_axis_val
4401
- [eval debug] first 3 batch fingerprints:
4402
- fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_pad3_by_axis_mse_loss_only_evalonce'}]
4403
- fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_pad3_by_axis_mse_loss_only_evalonce'}]
4404
- fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_pad3_by_axis_mse_loss_only_evalonce'}]
4405
- ce_avg: 0.0, mse_avg: 0.024185948073863983
4406
  [2026-01-30 18:05:56] (step=0004180) Train Loss mse: 0.0244, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
4407
  [2026-01-30 18:06:13] (step=0004181) Train Loss mse: 0.0221, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
4408
  [2026-01-30 18:06:30] (step=0004182) Train Loss mse: 0.0214, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
@@ -4420,7 +4433,21 @@ ce_avg: 0.0, mse_avg: 0.024185948073863983
4420
  [2026-01-30 18:09:51] (step=0004194) Train Loss mse: 0.0225, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
4421
  [2026-01-30 18:10:08] (step=0004195) Train Loss mse: 0.0217, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
4422
  [2026-01-30 18:10:25] (step=0004196) Train Loss mse: 0.0208, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
4423
- [2026-01-30 18:10:41] (step=0004197) Train Loss mse: 0.0209, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4424
  [2026-01-30 18:10:58] (step=0004198) Train Loss mse: 0.0209, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
4425
  [2026-01-30 18:11:15] (step=0004199) Train Loss mse: 0.0245, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
4426
  [2026-01-30 18:11:32] (step=0004200) Train Loss mse: 0.0206, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
 
850
  [2026-01-30 02:20:25] (step=0000839) Train Loss mse: 0.0258, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
851
  [2026-01-30 02:20:41] (step=0000840) Train Loss mse: 0.0242, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
852
  [2026-01-30 02:20:58] (step=0000841) Train Loss mse: 0.0250, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
853
+ [2026-01-30 02:21:15] (step=0000842) Train Loss mse: 0.0278, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
854
+ [2026-01-30 02:21:32] (step=0000843) Train Loss mse: 0.0222, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
855
+ [2026-01-30 02:21:49] (step=0000844) Train Loss mse: 0.0243, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
856
+ [2026-01-30 02:22:05] (step=0000845) Train Loss mse: 0.0232, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
857
+ [2026-01-30 02:22:22] (step=0000846) Train Loss mse: 0.0242, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
858
+ [2026-01-30 02:22:39] (step=0000847) Train Loss mse: 0.0240, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
859
+ [2026-01-30 02:22:56] (step=0000848) Train Loss mse: 0.0224, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
860
+ [2026-01-30 02:23:12] (step=0000849) Train Loss mse: 0.0229, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
861
+ [2026-01-30 02:23:29] (step=0000850) Train Loss mse: 0.0265, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
862
  FullyShardedDataParallel(
863
  (_fsdp_wrapped_module): Bagel(
864
  (language_model): Qwen2ForCausalLM(
 
1052
  fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_pad3_by_axis_mse_loss_only_evalonce'}]
1053
  fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_pad3_by_axis_mse_loss_only_evalonce'}]
1054
  ce_avg: 0.0, mse_avg: 0.024334488436579704
1055
+ base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_3d_pad3_by_axis_one_image_lr2e_5_mse_only_ins/eval_used_rows, step_tag is vlm_gym_mental_rotation_3d_pad3_by_axis_one_img_lr2e_5_mse_only_ins_step2000
1056
+ Preparing Dataset vlm_gym_mental_rotation_3d_pad3_by_axis_mse_loss_only_evalonce/vlm_gym_mental_rotation_3d_pad3_by_axis_val
1057
+ [eval debug] first 3 batch fingerprints:
1058
+ fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_pad3_by_axis_mse_loss_only_evalonce'}]
1059
+ fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_pad3_by_axis_mse_loss_only_evalonce'}]
1060
+ fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_pad3_by_axis_mse_loss_only_evalonce'}]
1061
+ ce_avg: 0.0, mse_avg: 0.024451851844787598
 
 
1062
  [2026-01-30 02:23:46] (step=0000851) Train Loss mse: 0.0230, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
1063
  [2026-01-30 02:24:03] (step=0000852) Train Loss mse: 0.0253, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
1064
  [2026-01-30 02:24:20] (step=0000853) Train Loss mse: 0.0239, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
 
2207
  [2026-01-30 07:47:18] (step=0001996) Train Loss mse: 0.0242, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
2208
  [2026-01-30 07:47:34] (step=0001997) Train Loss mse: 0.0238, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
2209
  [2026-01-30 07:47:51] (step=0001998) Train Loss mse: 0.0238, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
2210
+ base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_3d_pad3_by_axis_one_image_lr2e_5_mse_only_ins/eval_used_rows, step_tag is vlm_gym_mental_rotation_3d_pad3_by_axis_one_img_lr2e_5_mse_only_ins_step2500
2211
+ Preparing Dataset vlm_gym_mental_rotation_3d_pad3_by_axis_mse_loss_only_evalonce/vlm_gym_mental_rotation_3d_pad3_by_axis_val
2212
+ [eval debug] first 3 batch fingerprints:
2213
+ fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_pad3_by_axis_mse_loss_only_evalonce'}]
2214
+ fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_pad3_by_axis_mse_loss_only_evalonce'}]
2215
+ fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_pad3_by_axis_mse_loss_only_evalonce'}]
2216
+ ce_avg: 0.0, mse_avg: 0.024314723908901215
2217
+ base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_3d_pad3_by_axis_one_image_lr2e_5_mse_only_ins/eval_used_rows, step_tag is vlm_gym_mental_rotation_3d_pad3_by_axis_one_img_lr2e_5_mse_only_ins_step3000
2218
+ Preparing Dataset vlm_gym_mental_rotation_3d_pad3_by_axis_mse_loss_only_evalonce/vlm_gym_mental_rotation_3d_pad3_by_axis_val
2219
+ [eval debug] first 3 batch fingerprints:
2220
+ fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_pad3_by_axis_mse_loss_only_evalonce'}]
2221
+ fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_pad3_by_axis_mse_loss_only_evalonce'}]
2222
+ fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_pad3_by_axis_mse_loss_only_evalonce'}]
2223
+ ce_avg: 0.0, mse_avg: 0.024401195347309113
2224
  [2026-01-30 07:48:08] (step=0001999) Train Loss mse: 0.0235, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
2225
  [2026-01-30 07:49:58] (step=0002000) Train Loss mse: 0.0214, Train Loss ce: 0.0000, Train Steps/Sec: 0.01,
2226
  [2026-01-30 07:50:15] (step=0002001) Train Loss mse: 0.0226, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
 
2285
  [2026-01-30 08:06:43] (step=0002060) Train Loss mse: 0.0223, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
2286
  [2026-01-30 08:06:59] (step=0002061) Train Loss mse: 0.0213, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
2287
  [2026-01-30 08:07:16] (step=0002062) Train Loss mse: 0.0225, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2288
  [2026-01-30 08:07:33] (step=0002063) Train Loss mse: 0.0232, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
2289
  [2026-01-30 08:07:50] (step=0002064) Train Loss mse: 0.0223, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
2290
  [2026-01-30 08:08:07] (step=0002065) Train Loss mse: 0.0223, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
 
3329
  [2026-01-30 13:01:56] (step=0003104) Train Loss mse: 0.0206, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
3330
  [2026-01-30 13:02:13] (step=0003105) Train Loss mse: 0.0218, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
3331
  [2026-01-30 13:02:30] (step=0003106) Train Loss mse: 0.0204, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
3332
+ [2026-01-30 13:02:47] (step=0003107) Train Loss mse: 0.0242, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
3333
+ [2026-01-30 13:03:04] (step=0003108) Train Loss mse: 0.0230, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
3334
+ [2026-01-30 13:03:20] (step=0003109) Train Loss mse: 0.0224, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
3335
+ [2026-01-30 13:03:37] (step=0003110) Train Loss mse: 0.0218, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
3336
+ [2026-01-30 13:03:54] (step=0003111) Train Loss mse: 0.0227, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
3337
+ [2026-01-30 13:04:11] (step=0003112) Train Loss mse: 0.0232, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
3338
+ [2026-01-30 13:04:28] (step=0003113) Train Loss mse: 0.0216, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
3339
+ [2026-01-30 13:04:44] (step=0003114) Train Loss mse: 0.0245, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
3340
+ [2026-01-30 13:05:01] (step=0003115) Train Loss mse: 0.0215, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
3341
+ [2026-01-30 13:05:18] (step=0003116) Train Loss mse: 0.0194, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
3342
+ [2026-01-30 13:05:35] (step=0003117) Train Loss mse: 0.0217, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
3343
+ [2026-01-30 13:05:51] (step=0003118) Train Loss mse: 0.0233, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
3344
+ [2026-01-30 13:06:08] (step=0003119) Train Loss mse: 0.0210, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
3345
+ [2026-01-30 13:06:25] (step=0003120) Train Loss mse: 0.0239, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
3346
+ [2026-01-30 13:06:42] (step=0003121) Train Loss mse: 0.0241, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
3347
+ [2026-01-30 13:06:59] (step=0003122) Train Loss mse: 0.0211, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
3348
+ [2026-01-30 13:07:16] (step=0003123) Train Loss mse: 0.0229, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
3349
+ [2026-01-30 13:07:33] (step=0003124) Train Loss mse: 0.0223, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
3350
+ [2026-01-30 13:07:50] (step=0003125) Train Loss mse: 0.0215, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
3351
+ [2026-01-30 13:08:06] (step=0003126) Train Loss mse: 0.0209, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
3352
+ [2026-01-30 13:08:23] (step=0003127) Train Loss mse: 0.0225, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
3353
+ [2026-01-30 13:08:40] (step=0003128) Train Loss mse: 0.0219, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
3354
+ [2026-01-30 13:08:57] (step=0003129) Train Loss mse: 0.0218, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
3355
+ [2026-01-30 13:09:13] (step=0003130) Train Loss mse: 0.0230, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
3356
+ [2026-01-30 13:09:30] (step=0003131) Train Loss mse: 0.0237, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
3357
+ [2026-01-30 13:09:47] (step=0003132) Train Loss mse: 0.0203, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
3358
+ [2026-01-30 13:10:04] (step=0003133) Train Loss mse: 0.0233, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
3359
+ [2026-01-30 13:10:20] (step=0003134) Train Loss mse: 0.0218, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
3360
  [2026-01-30 13:10:37] (step=0003135) Train Loss mse: 0.0237, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
3361
  [2026-01-30 13:10:54] (step=0003136) Train Loss mse: 0.0221, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
3362
  [2026-01-30 13:11:11] (step=0003137) Train Loss mse: 0.0219, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
 
3377
  [2026-01-30 13:15:23] (step=0003152) Train Loss mse: 0.0230, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
3378
  [2026-01-30 13:15:40] (step=0003153) Train Loss mse: 0.0212, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
3379
  [2026-01-30 13:15:56] (step=0003154) Train Loss mse: 0.0249, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
3380
+ base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_3d_pad3_by_axis_one_image_lr2e_5_mse_only_ins/eval_used_rows, step_tag is vlm_gym_mental_rotation_3d_pad3_by_axis_one_img_lr2e_5_mse_only_ins_step3500
3381
+ Preparing Dataset vlm_gym_mental_rotation_3d_pad3_by_axis_mse_loss_only_evalonce/vlm_gym_mental_rotation_3d_pad3_by_axis_val
3382
+ [eval debug] first 3 batch fingerprints:
3383
+ fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_pad3_by_axis_mse_loss_only_evalonce'}]
3384
+ fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_pad3_by_axis_mse_loss_only_evalonce'}]
3385
+ fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_pad3_by_axis_mse_loss_only_evalonce'}]
3386
+ ce_avg: 0.0, mse_avg: 0.024277793243527412
3387
+ base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_3d_pad3_by_axis_one_image_lr2e_5_mse_only_ins/eval_used_rows, step_tag is vlm_gym_mental_rotation_3d_pad3_by_axis_one_img_lr2e_5_mse_only_ins_step4000
3388
+ Preparing Dataset vlm_gym_mental_rotation_3d_pad3_by_axis_mse_loss_only_evalonce/vlm_gym_mental_rotation_3d_pad3_by_axis_val
3389
+ [eval debug] first 3 batch fingerprints:
3390
+ fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_pad3_by_axis_mse_loss_only_evalonce'}]
3391
+ fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_pad3_by_axis_mse_loss_only_evalonce'}]
3392
+ fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_pad3_by_axis_mse_loss_only_evalonce'}]
3393
+ ce_avg: 0.0, mse_avg: 0.024172412231564522
3394
  [2026-01-30 13:16:13] (step=0003155) Train Loss mse: 0.0206, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
3395
  [2026-01-30 13:16:30] (step=0003156) Train Loss mse: 0.0216, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
3396
  [2026-01-30 13:16:47] (step=0003157) Train Loss mse: 0.0233, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
 
4416
  [2026-01-30 18:05:06] (step=0004177) Train Loss mse: 0.0218, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
4417
  [2026-01-30 18:05:22] (step=0004178) Train Loss mse: 0.0224, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
4418
  [2026-01-30 18:05:39] (step=0004179) Train Loss mse: 0.0202, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4419
  [2026-01-30 18:05:56] (step=0004180) Train Loss mse: 0.0244, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
4420
  [2026-01-30 18:06:13] (step=0004181) Train Loss mse: 0.0221, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
4421
  [2026-01-30 18:06:30] (step=0004182) Train Loss mse: 0.0214, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
 
4433
  [2026-01-30 18:09:51] (step=0004194) Train Loss mse: 0.0225, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
4434
  [2026-01-30 18:10:08] (step=0004195) Train Loss mse: 0.0217, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
4435
  [2026-01-30 18:10:25] (step=0004196) Train Loss mse: 0.0208, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
4436
+ base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_3d_pad3_by_axis_one_image_lr2e_5_mse_only_ins/eval_used_rows, step_tag is vlm_gym_mental_rotation_3d_pad3_by_axis_one_img_lr2e_5_mse_only_ins_step4500
4437
+ Preparing Dataset vlm_gym_mental_rotation_3d_pad3_by_axis_mse_loss_only_evalonce/vlm_gym_mental_rotation_3d_pad3_by_axis_val
4438
+ [eval debug] first 3 batch fingerprints:
4439
+ fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_pad3_by_axis_mse_loss_only_evalonce'}]
4440
+ fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_pad3_by_axis_mse_loss_only_evalonce'}]
4441
+ fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_pad3_by_axis_mse_loss_only_evalonce'}]
4442
+ ce_avg: 0.0, mse_avg: 0.02414196915924549
4443
+ base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_3d_pad3_by_axis_one_image_lr2e_5_mse_only_ins/eval_used_rows, step_tag is vlm_gym_mental_rotation_3d_pad3_by_axis_one_img_lr2e_5_mse_only_ins_step5000
4444
+ Preparing Dataset vlm_gym_mental_rotation_3d_pad3_by_axis_mse_loss_only_evalonce/vlm_gym_mental_rotation_3d_pad3_by_axis_val
4445
+ [eval debug] first 3 batch fingerprints:
4446
+ fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_pad3_by_axis_mse_loss_only_evalonce'}]
4447
+ fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_pad3_by_axis_mse_loss_only_evalonce'}]
4448
+ fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_3d_pad3_by_axis_mse_loss_only_evalonce'}]
4449
+ ce_avg: 0.0, mse_avg: 0.024185948073863983
4450
+ 2026-01-30 18:10:41] (step=0004197) Train Loss mse: 0.0209, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
4451
  [2026-01-30 18:10:58] (step=0004198) Train Loss mse: 0.0209, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
4452
  [2026-01-30 18:11:15] (step=0004199) Train Loss mse: 0.0245, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,
4453
  [2026-01-30 18:11:32] (step=0004200) Train Loss mse: 0.0206, Train Loss ce: 0.0000, Train Steps/Sec: 0.06,