Upload checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_ce_no_mse_ins/checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_ce_no_mse_ins
Browse files
checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_ce_no_mse_ins/checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_ce_no_mse_ins/wandb/offline-run-20260126_192812-checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_ce_no_mse_ins-run0/files/output.log
CHANGED
|
@@ -1077,20 +1077,6 @@ wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/
|
|
| 1077 |
[[34m2026-01-26 19:50:13[39m] (step=0000896) Train Loss mse: 0.0000, Train Loss ce: 0.1327, Train Steps/Sec: 1.02,
|
| 1078 |
[[34m2026-01-26 19:50:14[39m] (step=0000897) Train Loss mse: 0.0000, Train Loss ce: 0.0637, Train Steps/Sec: 1.02,
|
| 1079 |
[[34m2026-01-26 19:50:15[39m] (step=0000898) Train Loss mse: 0.0000, Train Loss ce: 0.0733, Train Steps/Sec: 1.02,
|
| 1080 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_ce_no_mse_ins_step1500
|
| 1081 |
-
Preparing Dataset vlm_gym_match_equation_sos_celoss_no_mse_evalonce/vlm_gym_match_equation_sos_val
|
| 1082 |
-
[eval debug] first 3 batch fingerprints:
|
| 1083 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_no_mse_evalonce'}]
|
| 1084 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_no_mse_evalonce'}]
|
| 1085 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_no_mse_evalonce'}]
|
| 1086 |
-
ce_avg: 0.05330345034599304, mse_avg: 0.0
|
| 1087 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_ce_no_mse_ins_step2000
|
| 1088 |
-
Preparing Dataset vlm_gym_match_equation_sos_celoss_no_mse_evalonce/vlm_gym_match_equation_sos_val
|
| 1089 |
-
[eval debug] first 3 batch fingerprints:
|
| 1090 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_no_mse_evalonce'}]
|
| 1091 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_no_mse_evalonce'}]
|
| 1092 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_no_mse_evalonce'}]
|
| 1093 |
-
ce_avg: 0.05127852410078049, mse_avg: 0.0
|
| 1094 |
[[34m2026-01-26 19:50:16[39m] (step=0000899) Train Loss mse: 0.0000, Train Loss ce: 0.0557, Train Steps/Sec: 1.02,
|
| 1095 |
[[34m2026-01-26 19:50:17[39m] (step=0000900) Train Loss mse: 0.0000, Train Loss ce: 0.0701, Train Steps/Sec: 0.80,
|
| 1096 |
[[34m2026-01-26 19:50:18[39m] (step=0000901) Train Loss mse: 0.0000, Train Loss ce: 0.0699, Train Steps/Sec: 1.01,
|
|
@@ -1303,6 +1289,27 @@ ce_avg: 0.05127852410078049, mse_avg: 0.0
|
|
| 1303 |
[[34m2026-01-26 19:54:02[39m] (step=0001108) Train Loss mse: 0.0000, Train Loss ce: 0.0604, Train Steps/Sec: 1.01,
|
| 1304 |
[[34m2026-01-26 19:54:03[39m] (step=0001109) Train Loss mse: 0.0000, Train Loss ce: 0.0570, Train Steps/Sec: 0.81,
|
| 1305 |
[[34m2026-01-26 19:54:04[39m] (step=0001110) Train Loss mse: 0.0000, Train Loss ce: 0.0559, Train Steps/Sec: 1.01,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1306 |
[[34m2026-01-26 19:54:05[39m] (step=0001111) Train Loss mse: 0.0000, Train Loss ce: 0.0425, Train Steps/Sec: 1.01,
|
| 1307 |
[[34m2026-01-26 19:54:06[39m] (step=0001112) Train Loss mse: 0.0000, Train Loss ce: 0.0792, Train Steps/Sec: 1.01,
|
| 1308 |
[[34m2026-01-26 19:54:08[39m] (step=0001113) Train Loss mse: 0.0000, Train Loss ce: 0.0488, Train Steps/Sec: 0.76,
|
|
@@ -2474,20 +2481,6 @@ ce_avg: 0.05127852410078049, mse_avg: 0.0
|
|
| 2474 |
[[34m2026-01-26 20:14:52[39m] (step=0002279) Train Loss mse: 0.0000, Train Loss ce: 0.0346, Train Steps/Sec: 1.02,
|
| 2475 |
[[34m2026-01-26 20:14:53[39m] (step=0002280) Train Loss mse: 0.0000, Train Loss ce: 0.0447, Train Steps/Sec: 1.02,
|
| 2476 |
[[34m2026-01-26 20:14:54[39m] (step=0002281) Train Loss mse: 0.0000, Train Loss ce: 0.0545, Train Steps/Sec: 0.75,
|
| 2477 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_ce_no_mse_ins_step2500
|
| 2478 |
-
Preparing Dataset vlm_gym_match_equation_sos_celoss_no_mse_evalonce/vlm_gym_match_equation_sos_val
|
| 2479 |
-
[eval debug] first 3 batch fingerprints:
|
| 2480 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_no_mse_evalonce'}]
|
| 2481 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_no_mse_evalonce'}]
|
| 2482 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_no_mse_evalonce'}]
|
| 2483 |
-
ce_avg: 0.05304804816842079, mse_avg: 0.0
|
| 2484 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_ce_no_mse_ins_step3000
|
| 2485 |
-
Preparing Dataset vlm_gym_match_equation_sos_celoss_no_mse_evalonce/vlm_gym_match_equation_sos_val
|
| 2486 |
-
[eval debug] first 3 batch fingerprints:
|
| 2487 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_no_mse_evalonce'}]
|
| 2488 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_no_mse_evalonce'}]
|
| 2489 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_no_mse_evalonce'}]
|
| 2490 |
-
ce_avg: 0.058707185089588165, mse_avg: 0.0
|
| 2491 |
[[34m2026-01-26 20:14:55[39m] (step=0002282) Train Loss mse: 0.0000, Train Loss ce: 0.0328, Train Steps/Sec: 0.81,
|
| 2492 |
[[34m2026-01-26 20:14:56[39m] (step=0002283) Train Loss mse: 0.0000, Train Loss ce: 0.0494, Train Steps/Sec: 1.02,
|
| 2493 |
[[34m2026-01-26 20:14:57[39m] (step=0002284) Train Loss mse: 0.0000, Train Loss ce: 0.0751, Train Steps/Sec: 1.02,
|
|
@@ -2820,6 +2813,20 @@ ce_avg: 0.058707185089588165, mse_avg: 0.0
|
|
| 2820 |
[[34m2026-01-26 20:20:47[39m] (step=0002611) Train Loss mse: 0.0000, Train Loss ce: 0.0525, Train Steps/Sec: 1.01,
|
| 2821 |
[[34m2026-01-26 20:20:48[39m] (step=0002612) Train Loss mse: 0.0000, Train Loss ce: 0.0433, Train Steps/Sec: 0.99,
|
| 2822 |
[[34m2026-01-26 20:20:49[39m] (step=0002613) Train Loss mse: 0.0000, Train Loss ce: 0.0471, Train Steps/Sec: 0.76,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2823 |
[[34m2026-01-26 20:20:50[39m] (step=0002614) Train Loss mse: 0.0000, Train Loss ce: 0.0586, Train Steps/Sec: 1.01,
|
| 2824 |
[[34m2026-01-26 20:20:51[39m] (step=0002615) Train Loss mse: 0.0000, Train Loss ce: 0.0357, Train Steps/Sec: 1.01,
|
| 2825 |
[[34m2026-01-26 20:20:52[39m] (step=0002616) Train Loss mse: 0.0000, Train Loss ce: 0.0313, Train Steps/Sec: 1.00,
|
|
@@ -3496,27 +3503,6 @@ ce_avg: 0.058707185089588165, mse_avg: 0.0
|
|
| 3496 |
[[34m2026-01-26 20:32:48[39m] (step=0003287) Train Loss mse: 0.0000, Train Loss ce: 0.0356, Train Steps/Sec: 1.01,
|
| 3497 |
[[34m2026-01-26 20:32:49[39m] (step=0003288) Train Loss mse: 0.0000, Train Loss ce: 0.0347, Train Steps/Sec: 1.01,
|
| 3498 |
[[34m2026-01-26 20:32:51[39m] (step=0003289) Train Loss mse: 0.0000, Train Loss ce: 0.0473, Train Steps/Sec: 0.75,
|
| 3499 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_ce_no_mse_ins_step3500
|
| 3500 |
-
Preparing Dataset vlm_gym_match_equation_sos_celoss_no_mse_evalonce/vlm_gym_match_equation_sos_val
|
| 3501 |
-
[eval debug] first 3 batch fingerprints:
|
| 3502 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_no_mse_evalonce'}]
|
| 3503 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_no_mse_evalonce'}]
|
| 3504 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_no_mse_evalonce'}]
|
| 3505 |
-
ce_avg: 0.10416685044765472, mse_avg: 0.0
|
| 3506 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_ce_no_mse_ins_step4000
|
| 3507 |
-
Preparing Dataset vlm_gym_match_equation_sos_celoss_no_mse_evalonce/vlm_gym_match_equation_sos_val
|
| 3508 |
-
[eval debug] first 3 batch fingerprints:
|
| 3509 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_no_mse_evalonce'}]
|
| 3510 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_no_mse_evalonce'}]
|
| 3511 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_no_mse_evalonce'}]
|
| 3512 |
-
ce_avg: 0.09295430034399033, mse_avg: 0.0
|
| 3513 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_ce_no_mse_ins_step4500
|
| 3514 |
-
Preparing Dataset vlm_gym_match_equation_sos_celoss_no_mse_evalonce/vlm_gym_match_equation_sos_val
|
| 3515 |
-
[eval debug] first 3 batch fingerprints:
|
| 3516 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_no_mse_evalonce'}]
|
| 3517 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_no_mse_evalonce'}]
|
| 3518 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_no_mse_evalonce'}]
|
| 3519 |
-
ce_avg: 0.09719827771186829, mse_avg: 0.0
|
| 3520 |
[[34m2026-01-26 20:32:52[39m] (step=0003290) Train Loss mse: 0.0000, Train Loss ce: 0.0485, Train Steps/Sec: 0.81,
|
| 3521 |
[[34m2026-01-26 20:32:53[39m] (step=0003291) Train Loss mse: 0.0000, Train Loss ce: 0.0363, Train Steps/Sec: 1.02,
|
| 3522 |
[[34m2026-01-26 20:32:54[39m] (step=0003292) Train Loss mse: 0.0000, Train Loss ce: 0.0417, Train Steps/Sec: 1.01,
|
|
@@ -3921,27 +3907,27 @@ ce_avg: 0.09719827771186829, mse_avg: 0.0
|
|
| 3921 |
[[34m2026-01-26 20:40:04[39m] (step=0003691) Train Loss mse: 0.0000, Train Loss ce: 0.0398, Train Steps/Sec: 1.00,
|
| 3922 |
[[34m2026-01-26 20:40:05[39m] (step=0003692) Train Loss mse: 0.0000, Train Loss ce: 0.0272, Train Steps/Sec: 1.02,
|
| 3923 |
[[34m2026-01-26 20:40:06[39m] (step=0003693) Train Loss mse: 0.0000, Train Loss ce: 0.0313, Train Steps/Sec: 1.02,
|
| 3924 |
-
|
| 3925 |
-
|
| 3926 |
-
[
|
| 3927 |
-
[
|
| 3928 |
-
[
|
| 3929 |
-
[
|
| 3930 |
-
|
| 3931 |
-
|
| 3932 |
-
|
| 3933 |
-
[
|
| 3934 |
-
[
|
| 3935 |
-
[
|
| 3936 |
-
[
|
| 3937 |
-
|
| 3938 |
-
|
| 3939 |
-
|
| 3940 |
-
[
|
| 3941 |
-
[
|
| 3942 |
-
[
|
| 3943 |
-
[
|
| 3944 |
-
|
| 3945 |
[[34m2026-01-26 20:40:30[39m] (step=0003715) Train Loss mse: 0.0000, Train Loss ce: 0.0235, Train Steps/Sec: 1.01,
|
| 3946 |
[[34m2026-01-26 20:40:31[39m] (step=0003716) Train Loss mse: 0.0000, Train Loss ce: 0.0562, Train Steps/Sec: 1.01,
|
| 3947 |
[[34m2026-01-26 20:40:32[39m] (step=0003717) Train Loss mse: 0.0000, Train Loss ce: 0.0530, Train Steps/Sec: 0.99,
|
|
@@ -4909,13 +4895,6 @@ ce_avg: 0.09719827771186829, mse_avg: 0.0
|
|
| 4909 |
[[34m2026-01-26 20:57:42[39m] (step=0004679) Train Loss mse: 0.0000, Train Loss ce: 0.0178, Train Steps/Sec: 0.99,
|
| 4910 |
[[34m2026-01-26 20:57:43[39m] (step=0004680) Train Loss mse: 0.0000, Train Loss ce: 0.0502, Train Steps/Sec: 0.79,
|
| 4911 |
[[34m2026-01-26 20:57:44[39m] (step=0004681) Train Loss mse: 0.0000, Train Loss ce: 0.0485, Train Steps/Sec: 1.01,
|
| 4912 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_ce_no_mse_ins_step5000
|
| 4913 |
-
Preparing Dataset vlm_gym_match_equation_sos_celoss_no_mse_evalonce/vlm_gym_match_equation_sos_val
|
| 4914 |
-
[eval debug] first 3 batch fingerprints:
|
| 4915 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_no_mse_evalonce'}]
|
| 4916 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_no_mse_evalonce'}]
|
| 4917 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_no_mse_evalonce'}]
|
| 4918 |
-
ce_avg: 0.10010946542024612, mse_avg: 0.0
|
| 4919 |
[[34m2026-01-26 20:57:46[39m] (step=0004682) Train Loss mse: 0.0000, Train Loss ce: 0.0333, Train Steps/Sec: 0.74,
|
| 4920 |
[[34m2026-01-26 20:57:47[39m] (step=0004683) Train Loss mse: 0.0000, Train Loss ce: 0.0376, Train Steps/Sec: 1.01,
|
| 4921 |
[[34m2026-01-26 20:57:48[39m] (step=0004684) Train Loss mse: 0.0000, Train Loss ce: 0.0151, Train Steps/Sec: 1.02,
|
|
|
|
| 1077 |
[[34m2026-01-26 19:50:13[39m] (step=0000896) Train Loss mse: 0.0000, Train Loss ce: 0.1327, Train Steps/Sec: 1.02,
|
| 1078 |
[[34m2026-01-26 19:50:14[39m] (step=0000897) Train Loss mse: 0.0000, Train Loss ce: 0.0637, Train Steps/Sec: 1.02,
|
| 1079 |
[[34m2026-01-26 19:50:15[39m] (step=0000898) Train Loss mse: 0.0000, Train Loss ce: 0.0733, Train Steps/Sec: 1.02,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1080 |
[[34m2026-01-26 19:50:16[39m] (step=0000899) Train Loss mse: 0.0000, Train Loss ce: 0.0557, Train Steps/Sec: 1.02,
|
| 1081 |
[[34m2026-01-26 19:50:17[39m] (step=0000900) Train Loss mse: 0.0000, Train Loss ce: 0.0701, Train Steps/Sec: 0.80,
|
| 1082 |
[[34m2026-01-26 19:50:18[39m] (step=0000901) Train Loss mse: 0.0000, Train Loss ce: 0.0699, Train Steps/Sec: 1.01,
|
|
|
|
| 1289 |
[[34m2026-01-26 19:54:02[39m] (step=0001108) Train Loss mse: 0.0000, Train Loss ce: 0.0604, Train Steps/Sec: 1.01,
|
| 1290 |
[[34m2026-01-26 19:54:03[39m] (step=0001109) Train Loss mse: 0.0000, Train Loss ce: 0.0570, Train Steps/Sec: 0.81,
|
| 1291 |
[[34m2026-01-26 19:54:04[39m] (step=0001110) Train Loss mse: 0.0000, Train Loss ce: 0.0559, Train Steps/Sec: 1.01,
|
| 1292 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_ce_no_mse_ins_step1500
|
| 1293 |
+
Preparing Dataset vlm_gym_match_equation_sos_celoss_no_mse_evalonce/vlm_gym_match_equation_sos_val
|
| 1294 |
+
[eval debug] first 3 batch fingerprints:
|
| 1295 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_no_mse_evalonce'}]
|
| 1296 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_no_mse_evalonce'}]
|
| 1297 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_no_mse_evalonce'}]
|
| 1298 |
+
ce_avg: 0.05330345034599304, mse_avg: 0.0
|
| 1299 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_ce_no_mse_ins_step2000
|
| 1300 |
+
Preparing Dataset vlm_gym_match_equation_sos_celoss_no_mse_evalonce/vlm_gym_match_equation_sos_val
|
| 1301 |
+
[eval debug] first 3 batch fingerprints:
|
| 1302 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_no_mse_evalonce'}]
|
| 1303 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_no_mse_evalonce'}]
|
| 1304 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_no_mse_evalonce'}]
|
| 1305 |
+
ce_avg: 0.05127852410078049, mse_avg: 0.0
|
| 1306 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_ce_no_mse_ins_step2500
|
| 1307 |
+
Preparing Dataset vlm_gym_match_equation_sos_celoss_no_mse_evalonce/vlm_gym_match_equation_sos_val
|
| 1308 |
+
[eval debug] first 3 batch fingerprints:
|
| 1309 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_no_mse_evalonce'}]
|
| 1310 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_no_mse_evalonce'}]
|
| 1311 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_no_mse_evalonce'}]
|
| 1312 |
+
ce_avg: 0.05304804816842079, mse_avg: 0.0
|
| 1313 |
[[34m2026-01-26 19:54:05[39m] (step=0001111) Train Loss mse: 0.0000, Train Loss ce: 0.0425, Train Steps/Sec: 1.01,
|
| 1314 |
[[34m2026-01-26 19:54:06[39m] (step=0001112) Train Loss mse: 0.0000, Train Loss ce: 0.0792, Train Steps/Sec: 1.01,
|
| 1315 |
[[34m2026-01-26 19:54:08[39m] (step=0001113) Train Loss mse: 0.0000, Train Loss ce: 0.0488, Train Steps/Sec: 0.76,
|
|
|
|
| 2481 |
[[34m2026-01-26 20:14:52[39m] (step=0002279) Train Loss mse: 0.0000, Train Loss ce: 0.0346, Train Steps/Sec: 1.02,
|
| 2482 |
[[34m2026-01-26 20:14:53[39m] (step=0002280) Train Loss mse: 0.0000, Train Loss ce: 0.0447, Train Steps/Sec: 1.02,
|
| 2483 |
[[34m2026-01-26 20:14:54[39m] (step=0002281) Train Loss mse: 0.0000, Train Loss ce: 0.0545, Train Steps/Sec: 0.75,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2484 |
[[34m2026-01-26 20:14:55[39m] (step=0002282) Train Loss mse: 0.0000, Train Loss ce: 0.0328, Train Steps/Sec: 0.81,
|
| 2485 |
[[34m2026-01-26 20:14:56[39m] (step=0002283) Train Loss mse: 0.0000, Train Loss ce: 0.0494, Train Steps/Sec: 1.02,
|
| 2486 |
[[34m2026-01-26 20:14:57[39m] (step=0002284) Train Loss mse: 0.0000, Train Loss ce: 0.0751, Train Steps/Sec: 1.02,
|
|
|
|
| 2813 |
[[34m2026-01-26 20:20:47[39m] (step=0002611) Train Loss mse: 0.0000, Train Loss ce: 0.0525, Train Steps/Sec: 1.01,
|
| 2814 |
[[34m2026-01-26 20:20:48[39m] (step=0002612) Train Loss mse: 0.0000, Train Loss ce: 0.0433, Train Steps/Sec: 0.99,
|
| 2815 |
[[34m2026-01-26 20:20:49[39m] (step=0002613) Train Loss mse: 0.0000, Train Loss ce: 0.0471, Train Steps/Sec: 0.76,
|
| 2816 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_ce_no_mse_ins_step3000
|
| 2817 |
+
Preparing Dataset vlm_gym_match_equation_sos_celoss_no_mse_evalonce/vlm_gym_match_equation_sos_val
|
| 2818 |
+
[eval debug] first 3 batch fingerprints:
|
| 2819 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_no_mse_evalonce'}]
|
| 2820 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_no_mse_evalonce'}]
|
| 2821 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_no_mse_evalonce'}]
|
| 2822 |
+
ce_avg: 0.058707185089588165, mse_avg: 0.0
|
| 2823 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_ce_no_mse_ins_step3500
|
| 2824 |
+
Preparing Dataset vlm_gym_match_equation_sos_celoss_no_mse_evalonce/vlm_gym_match_equation_sos_val
|
| 2825 |
+
[eval debug] first 3 batch fingerprints:
|
| 2826 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_no_mse_evalonce'}]
|
| 2827 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_no_mse_evalonce'}]
|
| 2828 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_no_mse_evalonce'}]
|
| 2829 |
+
ce_avg: 0.10416685044765472, mse_avg: 0.0
|
| 2830 |
[[34m2026-01-26 20:20:50[39m] (step=0002614) Train Loss mse: 0.0000, Train Loss ce: 0.0586, Train Steps/Sec: 1.01,
|
| 2831 |
[[34m2026-01-26 20:20:51[39m] (step=0002615) Train Loss mse: 0.0000, Train Loss ce: 0.0357, Train Steps/Sec: 1.01,
|
| 2832 |
[[34m2026-01-26 20:20:52[39m] (step=0002616) Train Loss mse: 0.0000, Train Loss ce: 0.0313, Train Steps/Sec: 1.00,
|
|
|
|
| 3503 |
[[34m2026-01-26 20:32:48[39m] (step=0003287) Train Loss mse: 0.0000, Train Loss ce: 0.0356, Train Steps/Sec: 1.01,
|
| 3504 |
[[34m2026-01-26 20:32:49[39m] (step=0003288) Train Loss mse: 0.0000, Train Loss ce: 0.0347, Train Steps/Sec: 1.01,
|
| 3505 |
[[34m2026-01-26 20:32:51[39m] (step=0003289) Train Loss mse: 0.0000, Train Loss ce: 0.0473, Train Steps/Sec: 0.75,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3506 |
[[34m2026-01-26 20:32:52[39m] (step=0003290) Train Loss mse: 0.0000, Train Loss ce: 0.0485, Train Steps/Sec: 0.81,
|
| 3507 |
[[34m2026-01-26 20:32:53[39m] (step=0003291) Train Loss mse: 0.0000, Train Loss ce: 0.0363, Train Steps/Sec: 1.02,
|
| 3508 |
[[34m2026-01-26 20:32:54[39m] (step=0003292) Train Loss mse: 0.0000, Train Loss ce: 0.0417, Train Steps/Sec: 1.01,
|
|
|
|
| 3907 |
[[34m2026-01-26 20:40:04[39m] (step=0003691) Train Loss mse: 0.0000, Train Loss ce: 0.0398, Train Steps/Sec: 1.00,
|
| 3908 |
[[34m2026-01-26 20:40:05[39m] (step=0003692) Train Loss mse: 0.0000, Train Loss ce: 0.0272, Train Steps/Sec: 1.02,
|
| 3909 |
[[34m2026-01-26 20:40:06[39m] (step=0003693) Train Loss mse: 0.0000, Train Loss ce: 0.0313, Train Steps/Sec: 1.02,
|
| 3910 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_ce_no_mse_ins_step4000
|
| 3911 |
+
Preparing Dataset vlm_gym_match_equation_sos_celoss_no_mse_evalonce/vlm_gym_match_equation_sos_val
|
| 3912 |
+
[eval debug] first 3 batch fingerprints:
|
| 3913 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_no_mse_evalonce'}]
|
| 3914 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_no_mse_evalonce'}]
|
| 3915 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_no_mse_evalonce'}]
|
| 3916 |
+
ce_avg: 0.09295430034399033, mse_avg: 0.0
|
| 3917 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_ce_no_mse_ins_step4500
|
| 3918 |
+
Preparing Dataset vlm_gym_match_equation_sos_celoss_no_mse_evalonce/vlm_gym_match_equation_sos_val
|
| 3919 |
+
[eval debug] first 3 batch fingerprints:
|
| 3920 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_no_mse_evalonce'}]
|
| 3921 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_no_mse_evalonce'}]
|
| 3922 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_no_mse_evalonce'}]
|
| 3923 |
+
ce_avg: 0.09719827771186829, mse_avg: 0.0
|
| 3924 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_match_equation_sos_one_image_lr2e_5_ce_no_mse_ins_step5000
|
| 3925 |
+
Preparing Dataset vlm_gym_match_equation_sos_celoss_no_mse_evalonce/vlm_gym_match_equation_sos_val
|
| 3926 |
+
[eval debug] first 3 batch fingerprints:
|
| 3927 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_no_mse_evalonce'}]
|
| 3928 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_no_mse_evalonce'}]
|
| 3929 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_match_equation_sos_celoss_no_mse_evalonce'}]
|
| 3930 |
+
ce_avg: 0.10010946542024612, mse_avg: 0.0
|
| 3931 |
[[34m2026-01-26 20:40:30[39m] (step=0003715) Train Loss mse: 0.0000, Train Loss ce: 0.0235, Train Steps/Sec: 1.01,
|
| 3932 |
[[34m2026-01-26 20:40:31[39m] (step=0003716) Train Loss mse: 0.0000, Train Loss ce: 0.0562, Train Steps/Sec: 1.01,
|
| 3933 |
[[34m2026-01-26 20:40:32[39m] (step=0003717) Train Loss mse: 0.0000, Train Loss ce: 0.0530, Train Steps/Sec: 0.99,
|
|
|
|
| 4895 |
[[34m2026-01-26 20:57:42[39m] (step=0004679) Train Loss mse: 0.0000, Train Loss ce: 0.0178, Train Steps/Sec: 0.99,
|
| 4896 |
[[34m2026-01-26 20:57:43[39m] (step=0004680) Train Loss mse: 0.0000, Train Loss ce: 0.0502, Train Steps/Sec: 0.79,
|
| 4897 |
[[34m2026-01-26 20:57:44[39m] (step=0004681) Train Loss mse: 0.0000, Train Loss ce: 0.0485, Train Steps/Sec: 1.01,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4898 |
[[34m2026-01-26 20:57:46[39m] (step=0004682) Train Loss mse: 0.0000, Train Loss ce: 0.0333, Train Steps/Sec: 0.74,
|
| 4899 |
[[34m2026-01-26 20:57:47[39m] (step=0004683) Train Loss mse: 0.0000, Train Loss ce: 0.0376, Train Steps/Sec: 1.01,
|
| 4900 |
[[34m2026-01-26 20:57:48[39m] (step=0004684) Train Loss mse: 0.0000, Train Loss ce: 0.0151, Train Steps/Sec: 1.02,
|