Upload checkpoints_vlm_gym_mental_rotation_2d_one_image_lr2e_5_ce_ins/checkpoints_vlm_gym_mental_rotation_2d_one_image_lr2e_5_ce_ins
Browse files
checkpoints_vlm_gym_mental_rotation_2d_one_image_lr2e_5_ce_ins/checkpoints_vlm_gym_mental_rotation_2d_one_image_lr2e_5_ce_ins/wandb/offline-run-20260128_050010-checkpoints_vlm_gym_mental_rotation_2d_one_image_lr2e_5_ce_ins-run0/files/output.log
CHANGED
|
@@ -184,6 +184,13 @@ Preparing Dataset vlm_gym_mental_rotation_2d_celoss_evalonce/vlm_gym_mental_rota
|
|
| 184 |
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_evalonce'}]
|
| 185 |
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_evalonce'}]
|
| 186 |
ce_avg: 0.2688937187194824, mse_avg: 0.10026200860738754
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 187 |
wandb: Detected [huggingface_hub.inference] in use.
|
| 188 |
wandb: Use W&B Weave for improved LLM call tracing. Install Weave with `pip install weave` then add `import weave` to the top of your script.
|
| 189 |
wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/
|
|
@@ -1161,27 +1168,6 @@ wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/
|
|
| 1161 |
[[34m2026-01-28 06:49:36[39m] (step=0000964) Train Loss mse: 0.0967, Train Loss ce: 0.2595, Train Steps/Sec: 0.16,
|
| 1162 |
[[34m2026-01-28 06:49:43[39m] (step=0000965) Train Loss mse: 0.0965, Train Loss ce: 0.2566, Train Steps/Sec: 0.16,
|
| 1163 |
[[34m2026-01-28 06:49:48[39m] (step=0000966) Train Loss mse: 0.1096, Train Loss ce: 0.2680, Train Steps/Sec: 0.17,
|
| 1164 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_2d_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_2d_one_image_lr2e_5_ce_ins_step1000
|
| 1165 |
-
Preparing Dataset vlm_gym_mental_rotation_2d_celoss_evalonce/vlm_gym_mental_rotation_2d_val
|
| 1166 |
-
[eval debug] first 3 batch fingerprints:
|
| 1167 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_evalonce'}]
|
| 1168 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_evalonce'}]
|
| 1169 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_evalonce'}]
|
| 1170 |
-
ce_avg: 0.34873995184898376, mse_avg: 0.09575071930885315
|
| 1171 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_2d_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_2d_one_image_lr2e_5_ce_ins_step1500
|
| 1172 |
-
Preparing Dataset vlm_gym_mental_rotation_2d_celoss_evalonce/vlm_gym_mental_rotation_2d_val
|
| 1173 |
-
[eval debug] first 3 batch fingerprints:
|
| 1174 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_evalonce'}]
|
| 1175 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_evalonce'}]
|
| 1176 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_evalonce'}]
|
| 1177 |
-
ce_avg: 0.459031879901886, mse_avg: 0.09415699541568756
|
| 1178 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_2d_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_2d_one_image_lr2e_5_ce_ins_step2000
|
| 1179 |
-
Preparing Dataset vlm_gym_mental_rotation_2d_celoss_evalonce/vlm_gym_mental_rotation_2d_val
|
| 1180 |
-
[eval debug] first 3 batch fingerprints:
|
| 1181 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_evalonce'}]
|
| 1182 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_evalonce'}]
|
| 1183 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_evalonce'}]
|
| 1184 |
-
ce_avg: 1.3050191402435303, mse_avg: 0.09429918974637985
|
| 1185 |
[[34m2026-01-28 06:49:56[39m] (step=0000967) Train Loss mse: 0.1004, Train Loss ce: 0.2610, Train Steps/Sec: 0.14,
|
| 1186 |
[[34m2026-01-28 06:50:02[39m] (step=0000968) Train Loss mse: 0.0932, Train Loss ce: 0.2676, Train Steps/Sec: 0.16,
|
| 1187 |
[[34m2026-01-28 06:50:08[39m] (step=0000969) Train Loss mse: 0.1012, Train Loss ce: 0.2224, Train Steps/Sec: 0.17,
|
|
@@ -1224,6 +1210,20 @@ ce_avg: 1.3050191402435303, mse_avg: 0.09429918974637985
|
|
| 1224 |
[[34m2026-01-28 06:54:35[39m] (step=0001006) Train Loss mse: 0.1081, Train Loss ce: 0.1910, Train Steps/Sec: 0.15,
|
| 1225 |
[[34m2026-01-28 06:54:42[39m] (step=0001007) Train Loss mse: 0.0917, Train Loss ce: 0.2918, Train Steps/Sec: 0.16,
|
| 1226 |
[[34m2026-01-28 06:54:48[39m] (step=0001008) Train Loss mse: 0.1051, Train Loss ce: 0.2713, Train Steps/Sec: 0.15,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1227 |
[[34m2026-01-28 06:54:55[39m] (step=0001009) Train Loss mse: 0.1093, Train Loss ce: 0.2607, Train Steps/Sec: 0.16,
|
| 1228 |
[[34m2026-01-28 06:55:02[39m] (step=0001010) Train Loss mse: 0.1076, Train Loss ce: 0.2569, Train Steps/Sec: 0.14,
|
| 1229 |
[[34m2026-01-28 06:55:09[39m] (step=0001011) Train Loss mse: 0.1154, Train Loss ce: 0.2570, Train Steps/Sec: 0.16,
|
|
@@ -2498,20 +2498,6 @@ ce_avg: 1.3050191402435303, mse_avg: 0.09429918974637985
|
|
| 2498 |
[[34m2026-01-28 09:10:58[39m] (step=0002280) Train Loss mse: 0.0883, Train Loss ce: 0.2570, Train Steps/Sec: 0.18,
|
| 2499 |
[[34m2026-01-28 09:11:04[39m] (step=0002281) Train Loss mse: 0.1298, Train Loss ce: 0.2740, Train Steps/Sec: 0.17,
|
| 2500 |
[[34m2026-01-28 09:11:10[39m] (step=0002282) Train Loss mse: 0.0929, Train Loss ce: 0.2899, Train Steps/Sec: 0.18,
|
| 2501 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_2d_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_2d_one_image_lr2e_5_ce_ins_step2500
|
| 2502 |
-
Preparing Dataset vlm_gym_mental_rotation_2d_celoss_evalonce/vlm_gym_mental_rotation_2d_val
|
| 2503 |
-
[eval debug] first 3 batch fingerprints:
|
| 2504 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_evalonce'}]
|
| 2505 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_evalonce'}]
|
| 2506 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_evalonce'}]
|
| 2507 |
-
ce_avg: 2.8753163814544678, mse_avg: 0.09435902535915375
|
| 2508 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_2d_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_2d_one_image_lr2e_5_ce_ins_step3000
|
| 2509 |
-
Preparing Dataset vlm_gym_mental_rotation_2d_celoss_evalonce/vlm_gym_mental_rotation_2d_val
|
| 2510 |
-
[eval debug] first 3 batch fingerprints:
|
| 2511 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_evalonce'}]
|
| 2512 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_evalonce'}]
|
| 2513 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_evalonce'}]
|
| 2514 |
-
ce_avg: 0.23429779708385468, mse_avg: 0.09545279294252396
|
| 2515 |
[[34m2026-01-28 09:11:17[39m] (step=0002283) Train Loss mse: 0.0946, Train Loss ce: 0.1869, Train Steps/Sec: 0.15,
|
| 2516 |
[[34m2026-01-28 09:11:24[39m] (step=0002284) Train Loss mse: 0.1032, Train Loss ce: 0.2511, Train Steps/Sec: 0.15,
|
| 2517 |
[[34m2026-01-28 09:11:29[39m] (step=0002285) Train Loss mse: 0.1330, Train Loss ce: 0.2405, Train Steps/Sec: 0.18,
|
|
@@ -2681,6 +2667,27 @@ ce_avg: 0.23429779708385468, mse_avg: 0.09545279294252396
|
|
| 2681 |
[[34m2026-01-28 09:28:46[39m] (step=0002449) Train Loss mse: 0.0956, Train Loss ce: 0.2226, Train Steps/Sec: 0.16,
|
| 2682 |
[[34m2026-01-28 09:28:52[39m] (step=0002450) Train Loss mse: 0.1061, Train Loss ce: 0.2766, Train Steps/Sec: 0.19,
|
| 2683 |
[[34m2026-01-28 09:28:58[39m] (step=0002451) Train Loss mse: 0.1174, Train Loss ce: 0.2472, Train Steps/Sec: 0.15,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2684 |
[[34m2026-01-28 09:29:06[39m] (step=0002452) Train Loss mse: 0.1081, Train Loss ce: 0.2334, Train Steps/Sec: 0.13,
|
| 2685 |
[[34m2026-01-28 09:29:12[39m] (step=0002453) Train Loss mse: 0.1002, Train Loss ce: 0.2645, Train Steps/Sec: 0.16,
|
| 2686 |
[[34m2026-01-28 09:29:18[39m] (step=0002454) Train Loss mse: 0.0897, Train Loss ce: 0.2104, Train Steps/Sec: 0.16,
|
|
@@ -3524,20 +3531,6 @@ ce_avg: 0.23429779708385468, mse_avg: 0.09545279294252396
|
|
| 3524 |
[[34m2026-01-28 11:01:58[39m] (step=0003289) Train Loss mse: 0.1045, Train Loss ce: 0.2524, Train Steps/Sec: 0.15,
|
| 3525 |
[[34m2026-01-28 11:02:04[39m] (step=0003290) Train Loss mse: 0.0882, Train Loss ce: 0.1963, Train Steps/Sec: 0.16,
|
| 3526 |
[[34m2026-01-28 11:02:11[39m] (step=0003291) Train Loss mse: 0.1076, Train Loss ce: 0.2416, Train Steps/Sec: 0.15,
|
| 3527 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_2d_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_2d_one_image_lr2e_5_ce_ins_step3500
|
| 3528 |
-
Preparing Dataset vlm_gym_mental_rotation_2d_celoss_evalonce/vlm_gym_mental_rotation_2d_val
|
| 3529 |
-
[eval debug] first 3 batch fingerprints:
|
| 3530 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_evalonce'}]
|
| 3531 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_evalonce'}]
|
| 3532 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_evalonce'}]
|
| 3533 |
-
ce_avg: 0.23339946568012238, mse_avg: 0.09179326146841049
|
| 3534 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_2d_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_2d_one_image_lr2e_5_ce_ins_step4000
|
| 3535 |
-
Preparing Dataset vlm_gym_mental_rotation_2d_celoss_evalonce/vlm_gym_mental_rotation_2d_val
|
| 3536 |
-
[eval debug] first 3 batch fingerprints:
|
| 3537 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_evalonce'}]
|
| 3538 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_evalonce'}]
|
| 3539 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_evalonce'}]
|
| 3540 |
-
ce_avg: 0.23216108977794647, mse_avg: 0.09232720732688904
|
| 3541 |
[[34m2026-01-28 11:02:18[39m] (step=0003292) Train Loss mse: 0.1043, Train Loss ce: 0.2579, Train Steps/Sec: 0.15,
|
| 3542 |
[[34m2026-01-28 11:02:24[39m] (step=0003293) Train Loss mse: 0.1035, Train Loss ce: 0.2152, Train Steps/Sec: 0.17,
|
| 3543 |
[[34m2026-01-28 11:02:30[39m] (step=0003294) Train Loss mse: 0.0917, Train Loss ce: 0.2382, Train Steps/Sec: 0.16,
|
|
@@ -3661,6 +3654,20 @@ ce_avg: 0.23216108977794647, mse_avg: 0.09232720732688904
|
|
| 3661 |
[[34m2026-01-28 11:15:08[39m] (step=0003412) Train Loss mse: 0.0918, Train Loss ce: 0.2324, Train Steps/Sec: 0.15,
|
| 3662 |
[[34m2026-01-28 11:15:14[39m] (step=0003413) Train Loss mse: 0.1073, Train Loss ce: 0.2467, Train Steps/Sec: 0.16,
|
| 3663 |
[[34m2026-01-28 11:15:21[39m] (step=0003414) Train Loss mse: 0.0914, Train Loss ce: 0.2380, Train Steps/Sec: 0.14,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3664 |
[[34m2026-01-28 11:15:28[39m] (step=0003415) Train Loss mse: 0.1021, Train Loss ce: 0.2517, Train Steps/Sec: 0.14,
|
| 3665 |
[[34m2026-01-28 11:15:34[39m] (step=0003416) Train Loss mse: 0.1034, Train Loss ce: 0.2328, Train Steps/Sec: 0.17,
|
| 3666 |
[[34m2026-01-28 11:15:40[39m] (step=0003417) Train Loss mse: 0.0965, Train Loss ce: 0.2408, Train Steps/Sec: 0.16,
|
|
@@ -4947,20 +4954,6 @@ ce_avg: 0.23216108977794647, mse_avg: 0.09232720732688904
|
|
| 4947 |
[[34m2026-01-28 13:32:55[39m] (step=0004698) Train Loss mse: 0.1010, Train Loss ce: 0.2219, Train Steps/Sec: 0.14,
|
| 4948 |
[[34m2026-01-28 13:33:02[39m] (step=0004699) Train Loss mse: 0.1133, Train Loss ce: 0.2032, Train Steps/Sec: 0.14,
|
| 4949 |
[[34m2026-01-28 13:33:09[39m] (step=0004700) Train Loss mse: 0.1043, Train Loss ce: 0.2092, Train Steps/Sec: 0.16,
|
| 4950 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_2d_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_2d_one_image_lr2e_5_ce_ins_step4500
|
| 4951 |
-
Preparing Dataset vlm_gym_mental_rotation_2d_celoss_evalonce/vlm_gym_mental_rotation_2d_val
|
| 4952 |
-
[eval debug] first 3 batch fingerprints:
|
| 4953 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_evalonce'}]
|
| 4954 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_evalonce'}]
|
| 4955 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_evalonce'}]
|
| 4956 |
-
ce_avg: 0.23146067559719086, mse_avg: 0.09148821234703064
|
| 4957 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_2d_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_2d_one_image_lr2e_5_ce_ins_step5000
|
| 4958 |
-
Preparing Dataset vlm_gym_mental_rotation_2d_celoss_evalonce/vlm_gym_mental_rotation_2d_val
|
| 4959 |
-
[eval debug] first 3 batch fingerprints:
|
| 4960 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_evalonce'}]
|
| 4961 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_evalonce'}]
|
| 4962 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_evalonce'}]
|
| 4963 |
-
ce_avg: 0.2311139553785324, mse_avg: 0.09259119629859924
|
| 4964 |
[[34m2026-01-28 13:33:15[39m] (step=0004701) Train Loss mse: 0.1018, Train Loss ce: 0.2420, Train Steps/Sec: 0.15,
|
| 4965 |
[[34m2026-01-28 13:33:22[39m] (step=0004702) Train Loss mse: 0.0966, Train Loss ce: 0.2338, Train Steps/Sec: 0.14,
|
| 4966 |
[[34m2026-01-28 13:33:29[39m] (step=0004703) Train Loss mse: 0.1212, Train Loss ce: 0.2302, Train Steps/Sec: 0.16,
|
|
@@ -5175,6 +5168,13 @@ ce_avg: 0.2311139553785324, mse_avg: 0.09259119629859924
|
|
| 5175 |
[[34m2026-01-28 13:55:45[39m] (step=0004912) Train Loss mse: 0.1120, Train Loss ce: 0.2248, Train Steps/Sec: 0.16,
|
| 5176 |
[[34m2026-01-28 13:55:52[39m] (step=0004913) Train Loss mse: 0.1049, Train Loss ce: 0.2181, Train Steps/Sec: 0.17,
|
| 5177 |
[[34m2026-01-28 13:55:58[39m] (step=0004914) Train Loss mse: 0.0920, Train Loss ce: 0.2414, Train Steps/Sec: 0.16,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5178 |
[[34m2026-01-28 13:56:05[39m] (step=0004915) Train Loss mse: 0.0982, Train Loss ce: 0.2438, Train Steps/Sec: 0.15,
|
| 5179 |
[[34m2026-01-28 13:56:11[39m] (step=0004916) Train Loss mse: 0.1100, Train Loss ce: 0.2129, Train Steps/Sec: 0.15,
|
| 5180 |
[[34m2026-01-28 13:56:17[39m] (step=0004917) Train Loss mse: 0.1248, Train Loss ce: 0.2591, Train Steps/Sec: 0.18,
|
|
|
|
| 184 |
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_evalonce'}]
|
| 185 |
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_evalonce'}]
|
| 186 |
ce_avg: 0.2688937187194824, mse_avg: 0.10026200860738754
|
| 187 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_2d_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_2d_one_image_lr2e_5_ce_ins_step1000
|
| 188 |
+
Preparing Dataset vlm_gym_mental_rotation_2d_celoss_evalonce/vlm_gym_mental_rotation_2d_val
|
| 189 |
+
[eval debug] first 3 batch fingerprints:
|
| 190 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_evalonce'}]
|
| 191 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_evalonce'}]
|
| 192 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_evalonce'}]
|
| 193 |
+
ce_avg: 0.34873995184898376, mse_avg: 0.09575071930885315
|
| 194 |
wandb: Detected [huggingface_hub.inference] in use.
|
| 195 |
wandb: Use W&B Weave for improved LLM call tracing. Install Weave with `pip install weave` then add `import weave` to the top of your script.
|
| 196 |
wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/
|
|
|
|
| 1168 |
[[34m2026-01-28 06:49:36[39m] (step=0000964) Train Loss mse: 0.0967, Train Loss ce: 0.2595, Train Steps/Sec: 0.16,
|
| 1169 |
[[34m2026-01-28 06:49:43[39m] (step=0000965) Train Loss mse: 0.0965, Train Loss ce: 0.2566, Train Steps/Sec: 0.16,
|
| 1170 |
[[34m2026-01-28 06:49:48[39m] (step=0000966) Train Loss mse: 0.1096, Train Loss ce: 0.2680, Train Steps/Sec: 0.17,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1171 |
[[34m2026-01-28 06:49:56[39m] (step=0000967) Train Loss mse: 0.1004, Train Loss ce: 0.2610, Train Steps/Sec: 0.14,
|
| 1172 |
[[34m2026-01-28 06:50:02[39m] (step=0000968) Train Loss mse: 0.0932, Train Loss ce: 0.2676, Train Steps/Sec: 0.16,
|
| 1173 |
[[34m2026-01-28 06:50:08[39m] (step=0000969) Train Loss mse: 0.1012, Train Loss ce: 0.2224, Train Steps/Sec: 0.17,
|
|
|
|
| 1210 |
[[34m2026-01-28 06:54:35[39m] (step=0001006) Train Loss mse: 0.1081, Train Loss ce: 0.1910, Train Steps/Sec: 0.15,
|
| 1211 |
[[34m2026-01-28 06:54:42[39m] (step=0001007) Train Loss mse: 0.0917, Train Loss ce: 0.2918, Train Steps/Sec: 0.16,
|
| 1212 |
[[34m2026-01-28 06:54:48[39m] (step=0001008) Train Loss mse: 0.1051, Train Loss ce: 0.2713, Train Steps/Sec: 0.15,
|
| 1213 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_2d_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_2d_one_image_lr2e_5_ce_ins_step1500
|
| 1214 |
+
Preparing Dataset vlm_gym_mental_rotation_2d_celoss_evalonce/vlm_gym_mental_rotation_2d_val
|
| 1215 |
+
[eval debug] first 3 batch fingerprints:
|
| 1216 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_evalonce'}]
|
| 1217 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_evalonce'}]
|
| 1218 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_evalonce'}]
|
| 1219 |
+
ce_avg: 0.459031879901886, mse_avg: 0.09415699541568756
|
| 1220 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_2d_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_2d_one_image_lr2e_5_ce_ins_step2000
|
| 1221 |
+
Preparing Dataset vlm_gym_mental_rotation_2d_celoss_evalonce/vlm_gym_mental_rotation_2d_val
|
| 1222 |
+
[eval debug] first 3 batch fingerprints:
|
| 1223 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_evalonce'}]
|
| 1224 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_evalonce'}]
|
| 1225 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_evalonce'}]
|
| 1226 |
+
ce_avg: 1.3050191402435303, mse_avg: 0.09429918974637985
|
| 1227 |
[[34m2026-01-28 06:54:55[39m] (step=0001009) Train Loss mse: 0.1093, Train Loss ce: 0.2607, Train Steps/Sec: 0.16,
|
| 1228 |
[[34m2026-01-28 06:55:02[39m] (step=0001010) Train Loss mse: 0.1076, Train Loss ce: 0.2569, Train Steps/Sec: 0.14,
|
| 1229 |
[[34m2026-01-28 06:55:09[39m] (step=0001011) Train Loss mse: 0.1154, Train Loss ce: 0.2570, Train Steps/Sec: 0.16,
|
|
|
|
| 2498 |
[[34m2026-01-28 09:10:58[39m] (step=0002280) Train Loss mse: 0.0883, Train Loss ce: 0.2570, Train Steps/Sec: 0.18,
|
| 2499 |
[[34m2026-01-28 09:11:04[39m] (step=0002281) Train Loss mse: 0.1298, Train Loss ce: 0.2740, Train Steps/Sec: 0.17,
|
| 2500 |
[[34m2026-01-28 09:11:10[39m] (step=0002282) Train Loss mse: 0.0929, Train Loss ce: 0.2899, Train Steps/Sec: 0.18,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2501 |
[[34m2026-01-28 09:11:17[39m] (step=0002283) Train Loss mse: 0.0946, Train Loss ce: 0.1869, Train Steps/Sec: 0.15,
|
| 2502 |
[[34m2026-01-28 09:11:24[39m] (step=0002284) Train Loss mse: 0.1032, Train Loss ce: 0.2511, Train Steps/Sec: 0.15,
|
| 2503 |
[[34m2026-01-28 09:11:29[39m] (step=0002285) Train Loss mse: 0.1330, Train Loss ce: 0.2405, Train Steps/Sec: 0.18,
|
|
|
|
| 2667 |
[[34m2026-01-28 09:28:46[39m] (step=0002449) Train Loss mse: 0.0956, Train Loss ce: 0.2226, Train Steps/Sec: 0.16,
|
| 2668 |
[[34m2026-01-28 09:28:52[39m] (step=0002450) Train Loss mse: 0.1061, Train Loss ce: 0.2766, Train Steps/Sec: 0.19,
|
| 2669 |
[[34m2026-01-28 09:28:58[39m] (step=0002451) Train Loss mse: 0.1174, Train Loss ce: 0.2472, Train Steps/Sec: 0.15,
|
| 2670 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_2d_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_2d_one_image_lr2e_5_ce_ins_step2500
|
| 2671 |
+
Preparing Dataset vlm_gym_mental_rotation_2d_celoss_evalonce/vlm_gym_mental_rotation_2d_val
|
| 2672 |
+
[eval debug] first 3 batch fingerprints:
|
| 2673 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_evalonce'}]
|
| 2674 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_evalonce'}]
|
| 2675 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_evalonce'}]
|
| 2676 |
+
ce_avg: 2.8753163814544678, mse_avg: 0.09435902535915375
|
| 2677 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_2d_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_2d_one_image_lr2e_5_ce_ins_step3000
|
| 2678 |
+
Preparing Dataset vlm_gym_mental_rotation_2d_celoss_evalonce/vlm_gym_mental_rotation_2d_val
|
| 2679 |
+
[eval debug] first 3 batch fingerprints:
|
| 2680 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_evalonce'}]
|
| 2681 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_evalonce'}]
|
| 2682 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_evalonce'}]
|
| 2683 |
+
ce_avg: 0.23429779708385468, mse_avg: 0.09545279294252396
|
| 2684 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_2d_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_2d_one_image_lr2e_5_ce_ins_step3500
|
| 2685 |
+
Preparing Dataset vlm_gym_mental_rotation_2d_celoss_evalonce/vlm_gym_mental_rotation_2d_val
|
| 2686 |
+
[eval debug] first 3 batch fingerprints:
|
| 2687 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_evalonce'}]
|
| 2688 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_evalonce'}]
|
| 2689 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_evalonce'}]
|
| 2690 |
+
ce_avg: 0.23339946568012238, mse_avg: 0.09179326146841049
|
| 2691 |
[[34m2026-01-28 09:29:06[39m] (step=0002452) Train Loss mse: 0.1081, Train Loss ce: 0.2334, Train Steps/Sec: 0.13,
|
| 2692 |
[[34m2026-01-28 09:29:12[39m] (step=0002453) Train Loss mse: 0.1002, Train Loss ce: 0.2645, Train Steps/Sec: 0.16,
|
| 2693 |
[[34m2026-01-28 09:29:18[39m] (step=0002454) Train Loss mse: 0.0897, Train Loss ce: 0.2104, Train Steps/Sec: 0.16,
|
|
|
|
| 3531 |
[[34m2026-01-28 11:01:58[39m] (step=0003289) Train Loss mse: 0.1045, Train Loss ce: 0.2524, Train Steps/Sec: 0.15,
|
| 3532 |
[[34m2026-01-28 11:02:04[39m] (step=0003290) Train Loss mse: 0.0882, Train Loss ce: 0.1963, Train Steps/Sec: 0.16,
|
| 3533 |
[[34m2026-01-28 11:02:11[39m] (step=0003291) Train Loss mse: 0.1076, Train Loss ce: 0.2416, Train Steps/Sec: 0.15,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3534 |
[[34m2026-01-28 11:02:18[39m] (step=0003292) Train Loss mse: 0.1043, Train Loss ce: 0.2579, Train Steps/Sec: 0.15,
|
| 3535 |
[[34m2026-01-28 11:02:24[39m] (step=0003293) Train Loss mse: 0.1035, Train Loss ce: 0.2152, Train Steps/Sec: 0.17,
|
| 3536 |
[[34m2026-01-28 11:02:30[39m] (step=0003294) Train Loss mse: 0.0917, Train Loss ce: 0.2382, Train Steps/Sec: 0.16,
|
|
|
|
| 3654 |
[[34m2026-01-28 11:15:08[39m] (step=0003412) Train Loss mse: 0.0918, Train Loss ce: 0.2324, Train Steps/Sec: 0.15,
|
| 3655 |
[[34m2026-01-28 11:15:14[39m] (step=0003413) Train Loss mse: 0.1073, Train Loss ce: 0.2467, Train Steps/Sec: 0.16,
|
| 3656 |
[[34m2026-01-28 11:15:21[39m] (step=0003414) Train Loss mse: 0.0914, Train Loss ce: 0.2380, Train Steps/Sec: 0.14,
|
| 3657 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_2d_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_2d_one_image_lr2e_5_ce_ins_step4000
|
| 3658 |
+
Preparing Dataset vlm_gym_mental_rotation_2d_celoss_evalonce/vlm_gym_mental_rotation_2d_val
|
| 3659 |
+
[eval debug] first 3 batch fingerprints:
|
| 3660 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_evalonce'}]
|
| 3661 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_evalonce'}]
|
| 3662 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_evalonce'}]
|
| 3663 |
+
ce_avg: 0.23216108977794647, mse_avg: 0.09232720732688904
|
| 3664 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_2d_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_2d_one_image_lr2e_5_ce_ins_step4500
|
| 3665 |
+
Preparing Dataset vlm_gym_mental_rotation_2d_celoss_evalonce/vlm_gym_mental_rotation_2d_val
|
| 3666 |
+
[eval debug] first 3 batch fingerprints:
|
| 3667 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_evalonce'}]
|
| 3668 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_evalonce'}]
|
| 3669 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_evalonce'}]
|
| 3670 |
+
ce_avg: 0.23146067559719086, mse_avg: 0.09148821234703064
|
| 3671 |
[[34m2026-01-28 11:15:28[39m] (step=0003415) Train Loss mse: 0.1021, Train Loss ce: 0.2517, Train Steps/Sec: 0.14,
|
| 3672 |
[[34m2026-01-28 11:15:34[39m] (step=0003416) Train Loss mse: 0.1034, Train Loss ce: 0.2328, Train Steps/Sec: 0.17,
|
| 3673 |
[[34m2026-01-28 11:15:40[39m] (step=0003417) Train Loss mse: 0.0965, Train Loss ce: 0.2408, Train Steps/Sec: 0.16,
|
|
|
|
| 4954 |
[[34m2026-01-28 13:32:55[39m] (step=0004698) Train Loss mse: 0.1010, Train Loss ce: 0.2219, Train Steps/Sec: 0.14,
|
| 4955 |
[[34m2026-01-28 13:33:02[39m] (step=0004699) Train Loss mse: 0.1133, Train Loss ce: 0.2032, Train Steps/Sec: 0.14,
|
| 4956 |
[[34m2026-01-28 13:33:09[39m] (step=0004700) Train Loss mse: 0.1043, Train Loss ce: 0.2092, Train Steps/Sec: 0.16,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4957 |
[[34m2026-01-28 13:33:15[39m] (step=0004701) Train Loss mse: 0.1018, Train Loss ce: 0.2420, Train Steps/Sec: 0.15,
|
| 4958 |
[[34m2026-01-28 13:33:22[39m] (step=0004702) Train Loss mse: 0.0966, Train Loss ce: 0.2338, Train Steps/Sec: 0.14,
|
| 4959 |
[[34m2026-01-28 13:33:29[39m] (step=0004703) Train Loss mse: 0.1212, Train Loss ce: 0.2302, Train Steps/Sec: 0.16,
|
|
|
|
| 5168 |
[[34m2026-01-28 13:55:45[39m] (step=0004912) Train Loss mse: 0.1120, Train Loss ce: 0.2248, Train Steps/Sec: 0.16,
|
| 5169 |
[[34m2026-01-28 13:55:52[39m] (step=0004913) Train Loss mse: 0.1049, Train Loss ce: 0.2181, Train Steps/Sec: 0.17,
|
| 5170 |
[[34m2026-01-28 13:55:58[39m] (step=0004914) Train Loss mse: 0.0920, Train Loss ce: 0.2414, Train Steps/Sec: 0.16,
|
| 5171 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_mental_rotation_2d_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_mental_rotation_2d_one_image_lr2e_5_ce_ins_step5000
|
| 5172 |
+
Preparing Dataset vlm_gym_mental_rotation_2d_celoss_evalonce/vlm_gym_mental_rotation_2d_val
|
| 5173 |
+
[eval debug] first 3 batch fingerprints:
|
| 5174 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_evalonce'}]
|
| 5175 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_evalonce'}]
|
| 5176 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_mental_rotation_2d_celoss_evalonce'}]
|
| 5177 |
+
ce_avg: 0.2311139553785324, mse_avg: 0.09259119629859924
|
| 5178 |
[[34m2026-01-28 13:56:05[39m] (step=0004915) Train Loss mse: 0.0982, Train Loss ce: 0.2438, Train Steps/Sec: 0.15,
|
| 5179 |
[[34m2026-01-28 13:56:11[39m] (step=0004916) Train Loss mse: 0.1100, Train Loss ce: 0.2129, Train Steps/Sec: 0.15,
|
| 5180 |
[[34m2026-01-28 13:56:17[39m] (step=0004917) Train Loss mse: 0.1248, Train Loss ce: 0.2591, Train Steps/Sec: 0.18,
|