Upload checkpoints_vlm_gym_colorization_one_image_lr2e_5_ce_ins/checkpoints_vlm_gym_colorization_one_image_lr2e_5_ce_ins
Browse files
checkpoints_vlm_gym_colorization_one_image_lr2e_5_ce_ins/checkpoints_vlm_gym_colorization_one_image_lr2e_5_ce_ins/wandb/offline-run-20260125_170425-checkpoints_vlm_gym_colorization_one_image_lr2e_5_ce_ins-run0/files/output.log
CHANGED
|
@@ -781,6 +781,62 @@ wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/
|
|
| 781 |
[[34m2026-01-25 21:19:28[39m] (step=0000770) Train Loss mse: 0.0214, Train Loss ce: 0.2890, Train Steps/Sec: 0.06,
|
| 782 |
[[34m2026-01-25 21:19:50[39m] (step=0000771) Train Loss mse: 0.0135, Train Loss ce: 0.2694, Train Steps/Sec: 0.05,
|
| 783 |
[[34m2026-01-25 21:20:06[39m] (step=0000772) Train Loss mse: 0.0331, Train Loss ce: 0.2532, Train Steps/Sec: 0.06,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 784 |
FullyShardedDataParallel(
|
| 785 |
(_fsdp_wrapped_module): Bagel(
|
| 786 |
(language_model): Qwen2ForCausalLM(
|
|
@@ -981,62 +1037,6 @@ Preparing Dataset vlm_gym_colorization_celoss_evalonce/vlm_gym_colorization_val
|
|
| 981 |
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_colorization_celoss_evalonce'}]
|
| 982 |
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_colorization_celoss_evalonce'}]
|
| 983 |
ce_avg: 0.5879027843475342, mse_avg: 0.008525446057319641
|
| 984 |
-
[[34m2026-01-25 21:20:23[39m] (step=0000773) Train Loss mse: 0.0232, Train Loss ce: 0.2757, Train Steps/Sec: 0.06,
|
| 985 |
-
[[34m2026-01-25 21:20:44[39m] (step=0000774) Train Loss mse: 0.0237, Train Loss ce: 0.2745, Train Steps/Sec: 0.05,
|
| 986 |
-
[[34m2026-01-25 21:21:03[39m] (step=0000775) Train Loss mse: 0.0244, Train Loss ce: 0.2767, Train Steps/Sec: 0.05,
|
| 987 |
-
[[34m2026-01-25 21:21:23[39m] (step=0000776) Train Loss mse: 0.0153, Train Loss ce: 0.2571, Train Steps/Sec: 0.05,
|
| 988 |
-
[[34m2026-01-25 21:21:45[39m] (step=0000777) Train Loss mse: 0.0167, Train Loss ce: 0.2734, Train Steps/Sec: 0.05,
|
| 989 |
-
[[34m2026-01-25 21:22:05[39m] (step=0000778) Train Loss mse: 0.0242, Train Loss ce: 0.2577, Train Steps/Sec: 0.05,
|
| 990 |
-
[[34m2026-01-25 21:22:21[39m] (step=0000779) Train Loss mse: 0.0266, Train Loss ce: 0.2996, Train Steps/Sec: 0.06,
|
| 991 |
-
[[34m2026-01-25 21:22:41[39m] (step=0000780) Train Loss mse: 0.0120, Train Loss ce: 0.2880, Train Steps/Sec: 0.05,
|
| 992 |
-
[[34m2026-01-25 21:23:00[39m] (step=0000781) Train Loss mse: 0.0181, Train Loss ce: 0.2728, Train Steps/Sec: 0.05,
|
| 993 |
-
[[34m2026-01-25 21:23:21[39m] (step=0000782) Train Loss mse: 0.0171, Train Loss ce: 0.2540, Train Steps/Sec: 0.05,
|
| 994 |
-
[[34m2026-01-25 21:23:38[39m] (step=0000783) Train Loss mse: 0.0267, Train Loss ce: 0.2833, Train Steps/Sec: 0.06,
|
| 995 |
-
[[34m2026-01-25 21:23:57[39m] (step=0000784) Train Loss mse: 0.0150, Train Loss ce: 0.2550, Train Steps/Sec: 0.05,
|
| 996 |
-
[[34m2026-01-25 21:24:19[39m] (step=0000785) Train Loss mse: 0.0116, Train Loss ce: 0.2735, Train Steps/Sec: 0.05,
|
| 997 |
-
[[34m2026-01-25 21:24:41[39m] (step=0000786) Train Loss mse: 0.0338, Train Loss ce: 0.2884, Train Steps/Sec: 0.05,
|
| 998 |
-
[[34m2026-01-25 21:25:00[39m] (step=0000787) Train Loss mse: 0.0223, Train Loss ce: 0.2747, Train Steps/Sec: 0.05,
|
| 999 |
-
[[34m2026-01-25 21:25:21[39m] (step=0000788) Train Loss mse: 0.0205, Train Loss ce: 0.2710, Train Steps/Sec: 0.05,
|
| 1000 |
-
[[34m2026-01-25 21:25:44[39m] (step=0000789) Train Loss mse: 0.0182, Train Loss ce: 0.2671, Train Steps/Sec: 0.04,
|
| 1001 |
-
[[34m2026-01-25 21:26:03[39m] (step=0000790) Train Loss mse: 0.0252, Train Loss ce: 0.2724, Train Steps/Sec: 0.05,
|
| 1002 |
-
[[34m2026-01-25 21:26:24[39m] (step=0000791) Train Loss mse: 0.0264, Train Loss ce: 0.2730, Train Steps/Sec: 0.05,
|
| 1003 |
-
[[34m2026-01-25 21:26:43[39m] (step=0000792) Train Loss mse: 0.0222, Train Loss ce: 0.2753, Train Steps/Sec: 0.05,
|
| 1004 |
-
[[34m2026-01-25 21:27:02[39m] (step=0000793) Train Loss mse: 0.0116, Train Loss ce: 0.2812, Train Steps/Sec: 0.05,
|
| 1005 |
-
[[34m2026-01-25 21:27:24[39m] (step=0000794) Train Loss mse: 0.0301, Train Loss ce: 0.2509, Train Steps/Sec: 0.05,
|
| 1006 |
-
[[34m2026-01-25 21:27:40[39m] (step=0000795) Train Loss mse: 0.0214, Train Loss ce: 0.2519, Train Steps/Sec: 0.06,
|
| 1007 |
-
[[34m2026-01-25 21:27:59[39m] (step=0000796) Train Loss mse: 0.0252, Train Loss ce: 0.2716, Train Steps/Sec: 0.05,
|
| 1008 |
-
[[34m2026-01-25 21:28:15[39m] (step=0000797) Train Loss mse: 0.0191, Train Loss ce: 0.2663, Train Steps/Sec: 0.06,
|
| 1009 |
-
[[34m2026-01-25 21:28:36[39m] (step=0000798) Train Loss mse: 0.0243, Train Loss ce: 0.2813, Train Steps/Sec: 0.05,
|
| 1010 |
-
[[34m2026-01-25 21:28:58[39m] (step=0000799) Train Loss mse: 0.0207, Train Loss ce: 0.2609, Train Steps/Sec: 0.05,
|
| 1011 |
-
[[34m2026-01-25 21:29:21[39m] (step=0000800) Train Loss mse: 0.0376, Train Loss ce: 0.2635, Train Steps/Sec: 0.04,
|
| 1012 |
-
[[34m2026-01-25 21:29:39[39m] (step=0000801) Train Loss mse: 0.0176, Train Loss ce: 0.2571, Train Steps/Sec: 0.06,
|
| 1013 |
-
[[34m2026-01-25 21:30:00[39m] (step=0000802) Train Loss mse: 0.0268, Train Loss ce: 0.2634, Train Steps/Sec: 0.05,
|
| 1014 |
-
[[34m2026-01-25 21:30:18[39m] (step=0000803) Train Loss mse: 0.0166, Train Loss ce: 0.2634, Train Steps/Sec: 0.06,
|
| 1015 |
-
[[34m2026-01-25 21:30:41[39m] (step=0000804) Train Loss mse: 0.0260, Train Loss ce: 0.2712, Train Steps/Sec: 0.04,
|
| 1016 |
-
[[34m2026-01-25 21:31:02[39m] (step=0000805) Train Loss mse: 0.0198, Train Loss ce: 0.2557, Train Steps/Sec: 0.05,
|
| 1017 |
-
[[34m2026-01-25 21:31:20[39m] (step=0000806) Train Loss mse: 0.0182, Train Loss ce: 0.2784, Train Steps/Sec: 0.05,
|
| 1018 |
-
[[34m2026-01-25 21:31:34[39m] (step=0000807) Train Loss mse: 0.0287, Train Loss ce: 0.2681, Train Steps/Sec: 0.07,
|
| 1019 |
-
[[34m2026-01-25 21:31:55[39m] (step=0000808) Train Loss mse: 0.0165, Train Loss ce: 0.2520, Train Steps/Sec: 0.05,
|
| 1020 |
-
[[34m2026-01-25 21:32:10[39m] (step=0000809) Train Loss mse: 0.0386, Train Loss ce: 0.2666, Train Steps/Sec: 0.07,
|
| 1021 |
-
[[34m2026-01-25 21:32:27[39m] (step=0000810) Train Loss mse: 0.0144, Train Loss ce: 0.2755, Train Steps/Sec: 0.06,
|
| 1022 |
-
[[34m2026-01-25 21:32:44[39m] (step=0000811) Train Loss mse: 0.0192, Train Loss ce: 0.2767, Train Steps/Sec: 0.06,
|
| 1023 |
-
[[34m2026-01-25 21:33:04[39m] (step=0000812) Train Loss mse: 0.0235, Train Loss ce: 0.2813, Train Steps/Sec: 0.05,
|
| 1024 |
-
[[34m2026-01-25 21:33:19[39m] (step=0000813) Train Loss mse: 0.0263, Train Loss ce: 0.2622, Train Steps/Sec: 0.06,
|
| 1025 |
-
[[34m2026-01-25 21:33:41[39m] (step=0000814) Train Loss mse: 0.0219, Train Loss ce: 0.2812, Train Steps/Sec: 0.05,
|
| 1026 |
-
[[34m2026-01-25 21:33:57[39m] (step=0000815) Train Loss mse: 0.0234, Train Loss ce: 0.2941, Train Steps/Sec: 0.07,
|
| 1027 |
-
[[34m2026-01-25 21:34:16[39m] (step=0000816) Train Loss mse: 0.0220, Train Loss ce: 0.2732, Train Steps/Sec: 0.05,
|
| 1028 |
-
[[34m2026-01-25 21:34:34[39m] (step=0000817) Train Loss mse: 0.0237, Train Loss ce: 0.2544, Train Steps/Sec: 0.05,
|
| 1029 |
-
[[34m2026-01-25 21:34:57[39m] (step=0000818) Train Loss mse: 0.0201, Train Loss ce: 0.2899, Train Steps/Sec: 0.04,
|
| 1030 |
-
[[34m2026-01-25 21:35:18[39m] (step=0000819) Train Loss mse: 0.0205, Train Loss ce: 0.2655, Train Steps/Sec: 0.05,
|
| 1031 |
-
[[34m2026-01-25 21:35:34[39m] (step=0000820) Train Loss mse: 0.0316, Train Loss ce: 0.2858, Train Steps/Sec: 0.06,
|
| 1032 |
-
[[34m2026-01-25 21:35:52[39m] (step=0000821) Train Loss mse: 0.0224, Train Loss ce: 0.2598, Train Steps/Sec: 0.06,
|
| 1033 |
-
[[34m2026-01-25 21:36:16[39m] (step=0000822) Train Loss mse: 0.0216, Train Loss ce: 0.2680, Train Steps/Sec: 0.04,
|
| 1034 |
-
[[34m2026-01-25 21:36:35[39m] (step=0000823) Train Loss mse: 0.0319, Train Loss ce: 0.2619, Train Steps/Sec: 0.05,
|
| 1035 |
-
[[34m2026-01-25 21:36:54[39m] (step=0000824) Train Loss mse: 0.0220, Train Loss ce: 0.2565, Train Steps/Sec: 0.05,
|
| 1036 |
-
[[34m2026-01-25 21:37:11[39m] (step=0000825) Train Loss mse: 0.0507, Train Loss ce: 0.2990, Train Steps/Sec: 0.06,
|
| 1037 |
-
[[34m2026-01-25 21:37:31[39m] (step=0000826) Train Loss mse: 0.0275, Train Loss ce: 0.2735, Train Steps/Sec: 0.05,
|
| 1038 |
-
[[34m2026-01-25 21:37:54[39m] (step=0000827) Train Loss mse: 0.0266, Train Loss ce: 0.2762, Train Steps/Sec: 0.04,
|
| 1039 |
-
[[34m2026-01-25 21:38:15[39m] (step=0000828) Train Loss mse: 0.0188, Train Loss ce: 0.2666, Train Steps/Sec: 0.05,
|
| 1040 |
[[34m2026-01-25 21:38:35[39m] (step=0000829) Train Loss mse: 0.0242, Train Loss ce: 0.2744, Train Steps/Sec: 0.05,
|
| 1041 |
[[34m2026-01-25 21:38:57[39m] (step=0000830) Train Loss mse: 0.0351, Train Loss ce: 0.2716, Train Steps/Sec: 0.05,
|
| 1042 |
[[34m2026-01-25 21:39:15[39m] (step=0000831) Train Loss mse: 0.0151, Train Loss ce: 0.2599, Train Steps/Sec: 0.06,
|
|
@@ -2130,20 +2130,6 @@ ce_avg: 0.5879027843475342, mse_avg: 0.008525446057319641
|
|
| 2130 |
[[34m2026-01-26 03:25:32[39m] (step=0001919) Train Loss mse: 0.0294, Train Loss ce: 0.2876, Train Steps/Sec: 0.05,
|
| 2131 |
[[34m2026-01-26 03:25:50[39m] (step=0001920) Train Loss mse: 0.0426, Train Loss ce: 0.2808, Train Steps/Sec: 0.06,
|
| 2132 |
[[34m2026-01-26 03:26:07[39m] (step=0001921) Train Loss mse: 0.0266, Train Loss ce: 0.2536, Train Steps/Sec: 0.06,
|
| 2133 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_colorization_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_colorization_one_image_lr2e_5_ce_ins_step2000
|
| 2134 |
-
Preparing Dataset vlm_gym_colorization_celoss_evalonce/vlm_gym_colorization_val
|
| 2135 |
-
[eval debug] first 3 batch fingerprints:
|
| 2136 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_colorization_celoss_evalonce'}]
|
| 2137 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_colorization_celoss_evalonce'}]
|
| 2138 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_colorization_celoss_evalonce'}]
|
| 2139 |
-
ce_avg: 1.2331268787384033, mse_avg: 0.008829578757286072
|
| 2140 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_colorization_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_colorization_one_image_lr2e_5_ce_ins_step2500
|
| 2141 |
-
Preparing Dataset vlm_gym_colorization_celoss_evalonce/vlm_gym_colorization_val
|
| 2142 |
-
[eval debug] first 3 batch fingerprints:
|
| 2143 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_colorization_celoss_evalonce'}]
|
| 2144 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_colorization_celoss_evalonce'}]
|
| 2145 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_colorization_celoss_evalonce'}]
|
| 2146 |
-
ce_avg: 1.185328722000122, mse_avg: 0.008346919901669025
|
| 2147 |
[[34m2026-01-26 03:26:24[39m] (step=0001922) Train Loss mse: 0.0150, Train Loss ce: 0.2684, Train Steps/Sec: 0.06,
|
| 2148 |
[[34m2026-01-26 03:26:44[39m] (step=0001923) Train Loss mse: 0.0251, Train Loss ce: 0.2634, Train Steps/Sec: 0.05,
|
| 2149 |
[[34m2026-01-26 03:27:01[39m] (step=0001924) Train Loss mse: 0.0264, Train Loss ce: 0.2502, Train Steps/Sec: 0.06,
|
|
@@ -2164,6 +2150,20 @@ ce_avg: 1.185328722000122, mse_avg: 0.008346919901669025
|
|
| 2164 |
[[34m2026-01-26 03:31:52[39m] (step=0001939) Train Loss mse: 0.0276, Train Loss ce: 0.2518, Train Steps/Sec: 0.05,
|
| 2165 |
[[34m2026-01-26 03:32:06[39m] (step=0001940) Train Loss mse: 0.0379, Train Loss ce: 0.2623, Train Steps/Sec: 0.07,
|
| 2166 |
[[34m2026-01-26 03:32:26[39m] (step=0001941) Train Loss mse: 0.0189, Train Loss ce: 0.2602, Train Steps/Sec: 0.05,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2167 |
[[34m2026-01-26 03:32:46[39m] (step=0001942) Train Loss mse: 0.0175, Train Loss ce: 0.2652, Train Steps/Sec: 0.05,
|
| 2168 |
[[34m2026-01-26 03:33:03[39m] (step=0001943) Train Loss mse: 0.0184, Train Loss ce: 0.2582, Train Steps/Sec: 0.06,
|
| 2169 |
[[34m2026-01-26 03:33:25[39m] (step=0001944) Train Loss mse: 0.0178, Train Loss ce: 0.2633, Train Steps/Sec: 0.04,
|
|
@@ -3085,7 +3085,22 @@ ce_avg: 1.185328722000122, mse_avg: 0.008346919901669025
|
|
| 3085 |
[[34m2026-01-26 08:28:11[39m] (step=0002857) Train Loss mse: 0.0335, Train Loss ce: 0.2473, Train Steps/Sec: 0.06,
|
| 3086 |
[[34m2026-01-26 08:28:30[39m] (step=0002858) Train Loss mse: 0.0357, Train Loss ce: 0.2628, Train Steps/Sec: 0.05,
|
| 3087 |
[[34m2026-01-26 08:28:47[39m] (step=0002859) Train Loss mse: 0.0105, Train Loss ce: 0.2591, Train Steps/Sec: 0.06,
|
| 3088 |
-
[[34m2026-01-26 08:29:04
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3089 |
base_dir is /dev/shm/models/checkpoints_vlm_gym_colorization_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_colorization_one_image_lr2e_5_ce_ins_step3000
|
| 3090 |
Preparing Dataset vlm_gym_colorization_celoss_evalonce/vlm_gym_colorization_val
|
| 3091 |
[eval debug] first 3 batch fingerprints:
|
|
@@ -3099,7 +3114,19 @@ Preparing Dataset vlm_gym_colorization_celoss_evalonce/vlm_gym_colorization_val
|
|
| 3099 |
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_colorization_celoss_evalonce'}]
|
| 3100 |
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_colorization_celoss_evalonce'}]
|
| 3101 |
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_colorization_celoss_evalonce'}]
|
| 3102 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3103 |
[[34m2026-01-26 08:37:31[39m] (step=0002888) Train Loss mse: 0.0242, Train Loss ce: 0.2401, Train Steps/Sec: 0.05,
|
| 3104 |
[[34m2026-01-26 08:37:51[39m] (step=0002889) Train Loss mse: 0.0210, Train Loss ce: 0.2621, Train Steps/Sec: 0.05,
|
| 3105 |
[[34m2026-01-26 08:38:13[39m] (step=0002890) Train Loss mse: 0.0197, Train Loss ce: 0.2555, Train Steps/Sec: 0.04,
|
|
@@ -4064,18 +4091,6 @@ ce_avg: 0.2417636662721634, mse_avg: 0.007533130701631308
|
|
| 4064 |
[[34m2026-01-26 13:46:41[39m] (step=0003849) Train Loss mse: 0.0260, Train Loss ce: 0.2551, Train Steps/Sec: 0.07,
|
| 4065 |
[[34m2026-01-26 13:47:00[39m] (step=0003850) Train Loss mse: 0.0139, Train Loss ce: 0.2375, Train Steps/Sec: 0.05,
|
| 4066 |
[[34m2026-01-26 13:47:19[39m] (step=0003851) Train Loss mse: 0.0181, Train Loss ce: 0.2496, Train Steps/Sec: 0.05,
|
| 4067 |
-
[[34m2026-01-26 13:47:40[39m] (step=0003852) Train Loss mse: 0.0153, Train Loss ce: 0.2624, Train Steps/Sec: 0.05,
|
| 4068 |
-
[[34m2026-01-26 13:47:59[39m] (step=0003853) Train Loss mse: 0.0358, Train Loss ce: 0.2603, Train Steps/Sec: 0.05,
|
| 4069 |
-
[[34m2026-01-26 13:48:20[39m] (step=0003854) Train Loss mse: 0.0307, Train Loss ce: 0.2451, Train Steps/Sec: 0.05,
|
| 4070 |
-
[[34m2026-01-26 13:48:40[39m] (step=0003855) Train Loss mse: 0.0216, Train Loss ce: 0.2479, Train Steps/Sec: 0.05,
|
| 4071 |
-
[[34m2026-01-26 13:49:00[39m] (step=0003856) Train Loss mse: 0.0176, Train Loss ce: 0.2441, Train Steps/Sec: 0.05,
|
| 4072 |
-
[[34m2026-01-26 13:49:21[39m] (step=0003857) Train Loss mse: 0.0165, Train Loss ce: 0.2391, Train Steps/Sec: 0.05,
|
| 4073 |
-
[[34m2026-01-26 13:49:43[39m] (step=0003858) Train Loss mse: 0.0232, Train Loss ce: 0.2505, Train Steps/Sec: 0.05,
|
| 4074 |
-
[[34m2026-01-26 13:50:05[39m] (step=0003859) Train Loss mse: 0.0182, Train Loss ce: 0.2497, Train Steps/Sec: 0.04,
|
| 4075 |
-
[[34m2026-01-26 13:50:23[39m] (step=0003860) Train Loss mse: 0.0263, Train Loss ce: 0.2505, Train Steps/Sec: 0.06,
|
| 4076 |
-
[[34m2026-01-26 13:50:40[39m] (step=0003861) Train Loss mse: 0.0202, Train Loss ce: 0.2361, Train Steps/Sec: 0.06,
|
| 4077 |
-
[[34m2026-01-26 13:51:01[39m] (step=0003862) Train Loss mse: 0.0086, Train Loss ce: 0.2474, Train Steps/Sec: 0.05,
|
| 4078 |
-
[[34m2026-01-26 13:51:19[39m] (step=0003863) Train Loss mse: 0.0190, Train Loss ce: 0.2612, Train Steps/Sec: 0.05,
|
| 4079 |
base_dir is /dev/shm/models/checkpoints_vlm_gym_colorization_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_colorization_one_image_lr2e_5_ce_ins_step4000
|
| 4080 |
Preparing Dataset vlm_gym_colorization_celoss_evalonce/vlm_gym_colorization_val
|
| 4081 |
[eval debug] first 3 batch fingerprints:
|
|
@@ -4090,6 +4105,18 @@ Preparing Dataset vlm_gym_colorization_celoss_evalonce/vlm_gym_colorization_val
|
|
| 4090 |
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_colorization_celoss_evalonce'}]
|
| 4091 |
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_colorization_celoss_evalonce'}]
|
| 4092 |
ce_avg: 0.23980583250522614, mse_avg: 0.007650755811482668
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4093 |
[[34m2026-01-26 13:51:37[39m] (step=0003864) Train Loss mse: 0.0254, Train Loss ce: 0.2425, Train Steps/Sec: 0.06,
|
| 4094 |
[[34m2026-01-26 13:51:54[39m] (step=0003865) Train Loss mse: 0.0189, Train Loss ce: 0.2616, Train Steps/Sec: 0.06,
|
| 4095 |
[[34m2026-01-26 13:52:12[39m] (step=0003866) Train Loss mse: 0.0487, Train Loss ce: 0.2488, Train Steps/Sec: 0.05,
|
|
@@ -5134,6 +5161,10 @@ ce_avg: 0.23980583250522614, mse_avg: 0.007650755811482668
|
|
| 5134 |
[[34m2026-01-26 19:26:53[39m] (step=0004905) Train Loss mse: 0.0182, Train Loss ce: 0.2456, Train Steps/Sec: 0.05,
|
| 5135 |
[[34m2026-01-26 19:27:11[39m] (step=0004906) Train Loss mse: 0.0186, Train Loss ce: 0.2570, Train Steps/Sec: 0.05,
|
| 5136 |
[[34m2026-01-26 19:27:28[39m] (step=0004907) Train Loss mse: 0.0279, Train Loss ce: 0.2629, Train Steps/Sec: 0.06,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5137 |
base_dir is /dev/shm/models/checkpoints_vlm_gym_colorization_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_colorization_one_image_lr2e_5_ce_ins_step5000
|
| 5138 |
Preparing Dataset vlm_gym_colorization_celoss_evalonce/vlm_gym_colorization_val
|
| 5139 |
[eval debug] first 3 batch fingerprints:
|
|
@@ -5141,10 +5172,6 @@ Preparing Dataset vlm_gym_colorization_celoss_evalonce/vlm_gym_colorization_val
|
|
| 5141 |
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_colorization_celoss_evalonce'}]
|
| 5142 |
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_colorization_celoss_evalonce'}]
|
| 5143 |
ce_avg: 0.23948809504508972, mse_avg: 0.007586085703223944
|
| 5144 |
-
[[34m2026-01-26 19:27:48[39m] (step=0004908) Train Loss mse: 0.0204, Train Loss ce: 0.2472, Train Steps/Sec: 0.05,
|
| 5145 |
-
[[34m2026-01-26 19:28:05[39m] (step=0004909) Train Loss mse: 0.0160, Train Loss ce: 0.2394, Train Steps/Sec: 0.06,
|
| 5146 |
-
[[34m2026-01-26 19:28:28[39m] (step=0004910) Train Loss mse: 0.0217, Train Loss ce: 0.2375, Train Steps/Sec: 0.04,
|
| 5147 |
-
[[34m2026-01-26 19:28:49[39m] (step=0004911) Train Loss mse: 0.0169, Train Loss ce: 0.2407, Train Steps/Sec: 0.05,
|
| 5148 |
[[34m2026-01-26 19:29:05[39m] (step=0004912) Train Loss mse: 0.0111, Train Loss ce: 0.2392, Train Steps/Sec: 0.06,
|
| 5149 |
[[34m2026-01-26 19:29:25[39m] (step=0004913) Train Loss mse: 0.0325, Train Loss ce: 0.2469, Train Steps/Sec: 0.05,
|
| 5150 |
[[34m2026-01-26 19:29:48[39m] (step=0004914) Train Loss mse: 0.0196, Train Loss ce: 0.2603, Train Steps/Sec: 0.04,
|
|
|
|
| 781 |
[[34m2026-01-25 21:19:28[39m] (step=0000770) Train Loss mse: 0.0214, Train Loss ce: 0.2890, Train Steps/Sec: 0.06,
|
| 782 |
[[34m2026-01-25 21:19:50[39m] (step=0000771) Train Loss mse: 0.0135, Train Loss ce: 0.2694, Train Steps/Sec: 0.05,
|
| 783 |
[[34m2026-01-25 21:20:06[39m] (step=0000772) Train Loss mse: 0.0331, Train Loss ce: 0.2532, Train Steps/Sec: 0.06,
|
| 784 |
+
[[34m2026-01-25 21:20:23[39m] (step=0000773) Train Loss mse: 0.0232, Train Loss ce: 0.2757, Train Steps/Sec: 0.06,
|
| 785 |
+
[[34m2026-01-25 21:20:44[39m] (step=0000774) Train Loss mse: 0.0237, Train Loss ce: 0.2745, Train Steps/Sec: 0.05,
|
| 786 |
+
[[34m2026-01-25 21:21:03[39m] (step=0000775) Train Loss mse: 0.0244, Train Loss ce: 0.2767, Train Steps/Sec: 0.05,
|
| 787 |
+
[[34m2026-01-25 21:21:23[39m] (step=0000776) Train Loss mse: 0.0153, Train Loss ce: 0.2571, Train Steps/Sec: 0.05,
|
| 788 |
+
[[34m2026-01-25 21:21:45[39m] (step=0000777) Train Loss mse: 0.0167, Train Loss ce: 0.2734, Train Steps/Sec: 0.05,
|
| 789 |
+
[[34m2026-01-25 21:22:05[39m] (step=0000778) Train Loss mse: 0.0242, Train Loss ce: 0.2577, Train Steps/Sec: 0.05,
|
| 790 |
+
[[34m2026-01-25 21:22:21[39m] (step=0000779) Train Loss mse: 0.0266, Train Loss ce: 0.2996, Train Steps/Sec: 0.06,
|
| 791 |
+
[[34m2026-01-25 21:22:41[39m] (step=0000780) Train Loss mse: 0.0120, Train Loss ce: 0.2880, Train Steps/Sec: 0.05,
|
| 792 |
+
[[34m2026-01-25 21:23:00[39m] (step=0000781) Train Loss mse: 0.0181, Train Loss ce: 0.2728, Train Steps/Sec: 0.05,
|
| 793 |
+
[[34m2026-01-25 21:23:21[39m] (step=0000782) Train Loss mse: 0.0171, Train Loss ce: 0.2540, Train Steps/Sec: 0.05,
|
| 794 |
+
[[34m2026-01-25 21:23:38[39m] (step=0000783) Train Loss mse: 0.0267, Train Loss ce: 0.2833, Train Steps/Sec: 0.06,
|
| 795 |
+
[[34m2026-01-25 21:23:57[39m] (step=0000784) Train Loss mse: 0.0150, Train Loss ce: 0.2550, Train Steps/Sec: 0.05,
|
| 796 |
+
[[34m2026-01-25 21:24:19[39m] (step=0000785) Train Loss mse: 0.0116, Train Loss ce: 0.2735, Train Steps/Sec: 0.05,
|
| 797 |
+
[[34m2026-01-25 21:24:41[39m] (step=0000786) Train Loss mse: 0.0338, Train Loss ce: 0.2884, Train Steps/Sec: 0.05,
|
| 798 |
+
[[34m2026-01-25 21:25:00[39m] (step=0000787) Train Loss mse: 0.0223, Train Loss ce: 0.2747, Train Steps/Sec: 0.05,
|
| 799 |
+
[[34m2026-01-25 21:25:21[39m] (step=0000788) Train Loss mse: 0.0205, Train Loss ce: 0.2710, Train Steps/Sec: 0.05,
|
| 800 |
+
[[34m2026-01-25 21:25:44[39m] (step=0000789) Train Loss mse: 0.0182, Train Loss ce: 0.2671, Train Steps/Sec: 0.04,
|
| 801 |
+
[[34m2026-01-25 21:26:03[39m] (step=0000790) Train Loss mse: 0.0252, Train Loss ce: 0.2724, Train Steps/Sec: 0.05,
|
| 802 |
+
[[34m2026-01-25 21:26:24[39m] (step=0000791) Train Loss mse: 0.0264, Train Loss ce: 0.2730, Train Steps/Sec: 0.05,
|
| 803 |
+
[[34m2026-01-25 21:26:43[39m] (step=0000792) Train Loss mse: 0.0222, Train Loss ce: 0.2753, Train Steps/Sec: 0.05,
|
| 804 |
+
[[34m2026-01-25 21:27:02[39m] (step=0000793) Train Loss mse: 0.0116, Train Loss ce: 0.2812, Train Steps/Sec: 0.05,
|
| 805 |
+
[[34m2026-01-25 21:27:24[39m] (step=0000794) Train Loss mse: 0.0301, Train Loss ce: 0.2509, Train Steps/Sec: 0.05,
|
| 806 |
+
[[34m2026-01-25 21:27:40[39m] (step=0000795) Train Loss mse: 0.0214, Train Loss ce: 0.2519, Train Steps/Sec: 0.06,
|
| 807 |
+
[[34m2026-01-25 21:27:59[39m] (step=0000796) Train Loss mse: 0.0252, Train Loss ce: 0.2716, Train Steps/Sec: 0.05,
|
| 808 |
+
[[34m2026-01-25 21:28:15[39m] (step=0000797) Train Loss mse: 0.0191, Train Loss ce: 0.2663, Train Steps/Sec: 0.06,
|
| 809 |
+
[[34m2026-01-25 21:28:36[39m] (step=0000798) Train Loss mse: 0.0243, Train Loss ce: 0.2813, Train Steps/Sec: 0.05,
|
| 810 |
+
[[34m2026-01-25 21:28:58[39m] (step=0000799) Train Loss mse: 0.0207, Train Loss ce: 0.2609, Train Steps/Sec: 0.05,
|
| 811 |
+
[[34m2026-01-25 21:29:21[39m] (step=0000800) Train Loss mse: 0.0376, Train Loss ce: 0.2635, Train Steps/Sec: 0.04,
|
| 812 |
+
[[34m2026-01-25 21:29:39[39m] (step=0000801) Train Loss mse: 0.0176, Train Loss ce: 0.2571, Train Steps/Sec: 0.06,
|
| 813 |
+
[[34m2026-01-25 21:30:00[39m] (step=0000802) Train Loss mse: 0.0268, Train Loss ce: 0.2634, Train Steps/Sec: 0.05,
|
| 814 |
+
[[34m2026-01-25 21:30:18[39m] (step=0000803) Train Loss mse: 0.0166, Train Loss ce: 0.2634, Train Steps/Sec: 0.06,
|
| 815 |
+
[[34m2026-01-25 21:30:41[39m] (step=0000804) Train Loss mse: 0.0260, Train Loss ce: 0.2712, Train Steps/Sec: 0.04,
|
| 816 |
+
[[34m2026-01-25 21:31:02[39m] (step=0000805) Train Loss mse: 0.0198, Train Loss ce: 0.2557, Train Steps/Sec: 0.05,
|
| 817 |
+
[[34m2026-01-25 21:31:20[39m] (step=0000806) Train Loss mse: 0.0182, Train Loss ce: 0.2784, Train Steps/Sec: 0.05,
|
| 818 |
+
[[34m2026-01-25 21:31:34[39m] (step=0000807) Train Loss mse: 0.0287, Train Loss ce: 0.2681, Train Steps/Sec: 0.07,
|
| 819 |
+
[[34m2026-01-25 21:31:55[39m] (step=0000808) Train Loss mse: 0.0165, Train Loss ce: 0.2520, Train Steps/Sec: 0.05,
|
| 820 |
+
[[34m2026-01-25 21:32:10[39m] (step=0000809) Train Loss mse: 0.0386, Train Loss ce: 0.2666, Train Steps/Sec: 0.07,
|
| 821 |
+
[[34m2026-01-25 21:32:27[39m] (step=0000810) Train Loss mse: 0.0144, Train Loss ce: 0.2755, Train Steps/Sec: 0.06,
|
| 822 |
+
[[34m2026-01-25 21:32:44[39m] (step=0000811) Train Loss mse: 0.0192, Train Loss ce: 0.2767, Train Steps/Sec: 0.06,
|
| 823 |
+
[[34m2026-01-25 21:33:04[39m] (step=0000812) Train Loss mse: 0.0235, Train Loss ce: 0.2813, Train Steps/Sec: 0.05,
|
| 824 |
+
[[34m2026-01-25 21:33:19[39m] (step=0000813) Train Loss mse: 0.0263, Train Loss ce: 0.2622, Train Steps/Sec: 0.06,
|
| 825 |
+
[[34m2026-01-25 21:33:41[39m] (step=0000814) Train Loss mse: 0.0219, Train Loss ce: 0.2812, Train Steps/Sec: 0.05,
|
| 826 |
+
[[34m2026-01-25 21:33:57[39m] (step=0000815) Train Loss mse: 0.0234, Train Loss ce: 0.2941, Train Steps/Sec: 0.07,
|
| 827 |
+
[[34m2026-01-25 21:34:16[39m] (step=0000816) Train Loss mse: 0.0220, Train Loss ce: 0.2732, Train Steps/Sec: 0.05,
|
| 828 |
+
[[34m2026-01-25 21:34:34[39m] (step=0000817) Train Loss mse: 0.0237, Train Loss ce: 0.2544, Train Steps/Sec: 0.05,
|
| 829 |
+
[[34m2026-01-25 21:34:57[39m] (step=0000818) Train Loss mse: 0.0201, Train Loss ce: 0.2899, Train Steps/Sec: 0.04,
|
| 830 |
+
[[34m2026-01-25 21:35:18[39m] (step=0000819) Train Loss mse: 0.0205, Train Loss ce: 0.2655, Train Steps/Sec: 0.05,
|
| 831 |
+
[[34m2026-01-25 21:35:34[39m] (step=0000820) Train Loss mse: 0.0316, Train Loss ce: 0.2858, Train Steps/Sec: 0.06,
|
| 832 |
+
[[34m2026-01-25 21:35:52[39m] (step=0000821) Train Loss mse: 0.0224, Train Loss ce: 0.2598, Train Steps/Sec: 0.06,
|
| 833 |
+
[[34m2026-01-25 21:36:16[39m] (step=0000822) Train Loss mse: 0.0216, Train Loss ce: 0.2680, Train Steps/Sec: 0.04,
|
| 834 |
+
[[34m2026-01-25 21:36:35[39m] (step=0000823) Train Loss mse: 0.0319, Train Loss ce: 0.2619, Train Steps/Sec: 0.05,
|
| 835 |
+
[[34m2026-01-25 21:36:54[39m] (step=0000824) Train Loss mse: 0.0220, Train Loss ce: 0.2565, Train Steps/Sec: 0.05,
|
| 836 |
+
[[34m2026-01-25 21:37:11[39m] (step=0000825) Train Loss mse: 0.0507, Train Loss ce: 0.2990, Train Steps/Sec: 0.06,
|
| 837 |
+
[[34m2026-01-25 21:37:31[39m] (step=0000826) Train Loss mse: 0.0275, Train Loss ce: 0.2735, Train Steps/Sec: 0.05,
|
| 838 |
+
[[34m2026-01-25 21:37:54[39m] (step=0000827) Train Loss mse: 0.0266, Train Loss ce: 0.2762, Train Steps/Sec: 0.04,
|
| 839 |
+
[[34m2026-01-25 21:38:15[39m] (step=0000828) Train Loss mse: 0.0188, Train Loss ce: 0.2666, Train Steps/Sec: 0.05,
|
| 840 |
FullyShardedDataParallel(
|
| 841 |
(_fsdp_wrapped_module): Bagel(
|
| 842 |
(language_model): Qwen2ForCausalLM(
|
|
|
|
| 1037 |
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_colorization_celoss_evalonce'}]
|
| 1038 |
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_colorization_celoss_evalonce'}]
|
| 1039 |
ce_avg: 0.5879027843475342, mse_avg: 0.008525446057319641
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1040 |
[[34m2026-01-25 21:38:35[39m] (step=0000829) Train Loss mse: 0.0242, Train Loss ce: 0.2744, Train Steps/Sec: 0.05,
|
| 1041 |
[[34m2026-01-25 21:38:57[39m] (step=0000830) Train Loss mse: 0.0351, Train Loss ce: 0.2716, Train Steps/Sec: 0.05,
|
| 1042 |
[[34m2026-01-25 21:39:15[39m] (step=0000831) Train Loss mse: 0.0151, Train Loss ce: 0.2599, Train Steps/Sec: 0.06,
|
|
|
|
| 2130 |
[[34m2026-01-26 03:25:32[39m] (step=0001919) Train Loss mse: 0.0294, Train Loss ce: 0.2876, Train Steps/Sec: 0.05,
|
| 2131 |
[[34m2026-01-26 03:25:50[39m] (step=0001920) Train Loss mse: 0.0426, Train Loss ce: 0.2808, Train Steps/Sec: 0.06,
|
| 2132 |
[[34m2026-01-26 03:26:07[39m] (step=0001921) Train Loss mse: 0.0266, Train Loss ce: 0.2536, Train Steps/Sec: 0.06,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2133 |
[[34m2026-01-26 03:26:24[39m] (step=0001922) Train Loss mse: 0.0150, Train Loss ce: 0.2684, Train Steps/Sec: 0.06,
|
| 2134 |
[[34m2026-01-26 03:26:44[39m] (step=0001923) Train Loss mse: 0.0251, Train Loss ce: 0.2634, Train Steps/Sec: 0.05,
|
| 2135 |
[[34m2026-01-26 03:27:01[39m] (step=0001924) Train Loss mse: 0.0264, Train Loss ce: 0.2502, Train Steps/Sec: 0.06,
|
|
|
|
| 2150 |
[[34m2026-01-26 03:31:52[39m] (step=0001939) Train Loss mse: 0.0276, Train Loss ce: 0.2518, Train Steps/Sec: 0.05,
|
| 2151 |
[[34m2026-01-26 03:32:06[39m] (step=0001940) Train Loss mse: 0.0379, Train Loss ce: 0.2623, Train Steps/Sec: 0.07,
|
| 2152 |
[[34m2026-01-26 03:32:26[39m] (step=0001941) Train Loss mse: 0.0189, Train Loss ce: 0.2602, Train Steps/Sec: 0.05,
|
| 2153 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_colorization_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_colorization_one_image_lr2e_5_ce_ins_step2000
|
| 2154 |
+
Preparing Dataset vlm_gym_colorization_celoss_evalonce/vlm_gym_colorization_val
|
| 2155 |
+
[eval debug] first 3 batch fingerprints:
|
| 2156 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_colorization_celoss_evalonce'}]
|
| 2157 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_colorization_celoss_evalonce'}]
|
| 2158 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_colorization_celoss_evalonce'}]
|
| 2159 |
+
ce_avg: 1.2331268787384033, mse_avg: 0.008829578757286072
|
| 2160 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_colorization_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_colorization_one_image_lr2e_5_ce_ins_step2500
|
| 2161 |
+
Preparing Dataset vlm_gym_colorization_celoss_evalonce/vlm_gym_colorization_val
|
| 2162 |
+
[eval debug] first 3 batch fingerprints:
|
| 2163 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_colorization_celoss_evalonce'}]
|
| 2164 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_colorization_celoss_evalonce'}]
|
| 2165 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_colorization_celoss_evalonce'}]
|
| 2166 |
+
ce_avg: 1.185328722000122, mse_avg: 0.008346919901669025
|
| 2167 |
[[34m2026-01-26 03:32:46[39m] (step=0001942) Train Loss mse: 0.0175, Train Loss ce: 0.2652, Train Steps/Sec: 0.05,
|
| 2168 |
[[34m2026-01-26 03:33:03[39m] (step=0001943) Train Loss mse: 0.0184, Train Loss ce: 0.2582, Train Steps/Sec: 0.06,
|
| 2169 |
[[34m2026-01-26 03:33:25[39m] (step=0001944) Train Loss mse: 0.0178, Train Loss ce: 0.2633, Train Steps/Sec: 0.04,
|
|
|
|
| 3085 |
[[34m2026-01-26 08:28:11[39m] (step=0002857) Train Loss mse: 0.0335, Train Loss ce: 0.2473, Train Steps/Sec: 0.06,
|
| 3086 |
[[34m2026-01-26 08:28:30[39m] (step=0002858) Train Loss mse: 0.0357, Train Loss ce: 0.2628, Train Steps/Sec: 0.05,
|
| 3087 |
[[34m2026-01-26 08:28:47[39m] (step=0002859) Train Loss mse: 0.0105, Train Loss ce: 0.2591, Train Steps/Sec: 0.06,
|
| 3088 |
+
[[34m2026-01-26 08:29:04[39m] (step=0002860) Train Loss mse: 0.0215, Train Loss ce: 0.2492, Train Steps/Sec: 0.06,
|
| 3089 |
+
[[34m2026-01-26 08:29:22[39m] (step=0002861) Train Loss mse: 0.0135, Train Loss ce: 0.2470, Train Steps/Sec: 0.05,
|
| 3090 |
+
[[34m2026-01-26 08:29:40[39m] (step=0002862) Train Loss mse: 0.0227, Train Loss ce: 0.2668, Train Steps/Sec: 0.06,
|
| 3091 |
+
[[34m2026-01-26 08:29:59[39m] (step=0002863) Train Loss mse: 0.0226, Train Loss ce: 0.2567, Train Steps/Sec: 0.05,
|
| 3092 |
+
[[34m2026-01-26 08:30:16[39m] (step=0002864) Train Loss mse: 0.0198, Train Loss ce: 0.2684, Train Steps/Sec: 0.06,
|
| 3093 |
+
[[34m2026-01-26 08:30:38[39m] (step=0002865) Train Loss mse: 0.0179, Train Loss ce: 0.2545, Train Steps/Sec: 0.05,
|
| 3094 |
+
[[34m2026-01-26 08:30:56[39m] (step=0002866) Train Loss mse: 0.0368, Train Loss ce: 0.2568, Train Steps/Sec: 0.05,
|
| 3095 |
+
[[34m2026-01-26 08:31:16[39m] (step=0002867) Train Loss mse: 0.0119, Train Loss ce: 0.2393, Train Steps/Sec: 0.05,
|
| 3096 |
+
[[34m2026-01-26 08:31:35[39m] (step=0002868) Train Loss mse: 0.0218, Train Loss ce: 0.2533, Train Steps/Sec: 0.05,
|
| 3097 |
+
[[34m2026-01-26 08:31:52[39m] (step=0002869) Train Loss mse: 0.0175, Train Loss ce: 0.2578, Train Steps/Sec: 0.06,
|
| 3098 |
+
[[34m2026-01-26 08:32:10[39m] (step=0002870) Train Loss mse: 0.0153, Train Loss ce: 0.2477, Train Steps/Sec: 0.06,
|
| 3099 |
+
[[34m2026-01-26 08:32:28[39m] (step=0002871) Train Loss mse: 0.0252, Train Loss ce: 0.2478, Train Steps/Sec: 0.06,
|
| 3100 |
+
[[34m2026-01-26 08:32:45[39m] (step=0002872) Train Loss mse: 0.0386, Train Loss ce: 0.2694, Train Steps/Sec: 0.06,
|
| 3101 |
+
[[34m2026-01-26 08:33:02[39m] (step=0002873) Train Loss mse: 0.0198, Train Loss ce: 0.2483, Train Steps/Sec: 0.06,
|
| 3102 |
+
[[34m2026-01-26 08:33:20[39m] (step=0002874) Train Loss mse: 0.0182, Train Loss ce: 0.2428, Train Steps/Sec: 0.06,
|
| 3103 |
+
[[34m2026-01-26 08:33:40
|
| 3104 |
base_dir is /dev/shm/models/checkpoints_vlm_gym_colorization_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_colorization_one_image_lr2e_5_ce_ins_step3000
|
| 3105 |
Preparing Dataset vlm_gym_colorization_celoss_evalonce/vlm_gym_colorization_val
|
| 3106 |
[eval debug] first 3 batch fingerprints:
|
|
|
|
| 3114 |
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_colorization_celoss_evalonce'}]
|
| 3115 |
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_colorization_celoss_evalonce'}]
|
| 3116 |
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_colorization_celoss_evalonce'}]
|
| 3117 |
+
[[34m2026-01-26 08:33:40[39m] (step=0002875) Train Loss mse: 0.0141, Train Loss ce: 0.2511, Train Steps/Sec: 0.05,
|
| 3118 |
+
[[34m2026-01-26 08:33:56[39m] (step=0002876) Train Loss mse: 0.0191, Train Loss ce: 0.2676, Train Steps/Sec: 0.06,
|
| 3119 |
+
[[34m2026-01-26 08:34:16[39m] (step=0002877) Train Loss mse: 0.0193, Train Loss ce: 0.2443, Train Steps/Sec: 0.05,
|
| 3120 |
+
[[34m2026-01-26 08:34:35[39m] (step=0002878) Train Loss mse: 0.0272, Train Loss ce: 0.2668, Train Steps/Sec: 0.05,
|
| 3121 |
+
[[34m2026-01-26 08:34:49[39m] (step=0002879) Train Loss mse: 0.0296, Train Loss ce: 0.2523, Train Steps/Sec: 0.07,
|
| 3122 |
+
[[34m2026-01-26 08:35:07[39m] (step=0002880) Train Loss mse: 0.0135, Train Loss ce: 0.2614, Train Steps/Sec: 0.06,
|
| 3123 |
+
[[34m2026-01-26 08:35:26[39m] (step=0002881) Train Loss mse: 0.0178, Train Loss ce: 0.2501, Train Steps/Sec: 0.05,
|
| 3124 |
+
[[34m2026-01-26 08:35:42[39m] (step=0002882) Train Loss mse: 0.0128, Train Loss ce: 0.2528, Train Steps/Sec: 0.06,
|
| 3125 |
+
[[34m2026-01-26 08:36:01[39m] (step=0002883) Train Loss mse: 0.0314, Train Loss ce: 0.2806, Train Steps/Sec: 0.06,
|
| 3126 |
+
[[34m2026-01-26 08:36:15[39m] (step=0002884) Train Loss mse: 0.0234, Train Loss ce: 0.2533, Train Steps/Sec: 0.07,
|
| 3127 |
+
[[34m2026-01-26 08:36:33[39m] (step=0002885) Train Loss mse: 0.0144, Train Loss ce: 0.2517, Train Steps/Sec: 0.05,
|
| 3128 |
+
[[34m2026-01-26 08:36:52[39m] (step=0002886) Train Loss mse: 0.0205, Train Loss ce: 0.2331, Train Steps/Sec: 0.06,
|
| 3129 |
+
[[34m2026-01-26 08:37:11[39m] (step=0002887) Train Loss mse: 0.0206, Train Loss ce: 0.2640, Train Steps/Sec: 0.05,
|
| 3130 |
[[34m2026-01-26 08:37:31[39m] (step=0002888) Train Loss mse: 0.0242, Train Loss ce: 0.2401, Train Steps/Sec: 0.05,
|
| 3131 |
[[34m2026-01-26 08:37:51[39m] (step=0002889) Train Loss mse: 0.0210, Train Loss ce: 0.2621, Train Steps/Sec: 0.05,
|
| 3132 |
[[34m2026-01-26 08:38:13[39m] (step=0002890) Train Loss mse: 0.0197, Train Loss ce: 0.2555, Train Steps/Sec: 0.04,
|
|
|
|
| 4091 |
[[34m2026-01-26 13:46:41[39m] (step=0003849) Train Loss mse: 0.0260, Train Loss ce: 0.2551, Train Steps/Sec: 0.07,
|
| 4092 |
[[34m2026-01-26 13:47:00[39m] (step=0003850) Train Loss mse: 0.0139, Train Loss ce: 0.2375, Train Steps/Sec: 0.05,
|
| 4093 |
[[34m2026-01-26 13:47:19[39m] (step=0003851) Train Loss mse: 0.0181, Train Loss ce: 0.2496, Train Steps/Sec: 0.05,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4094 |
base_dir is /dev/shm/models/checkpoints_vlm_gym_colorization_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_colorization_one_image_lr2e_5_ce_ins_step4000
|
| 4095 |
Preparing Dataset vlm_gym_colorization_celoss_evalonce/vlm_gym_colorization_val
|
| 4096 |
[eval debug] first 3 batch fingerprints:
|
|
|
|
| 4105 |
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_colorization_celoss_evalonce'}]
|
| 4106 |
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_colorization_celoss_evalonce'}]
|
| 4107 |
ce_avg: 0.23980583250522614, mse_avg: 0.007650755811482668
|
| 4108 |
+
[[34m2026-01-26 13:47:40[39m] (step=0003852) Train Loss mse: 0.0153, Train Loss ce: 0.2624, Train Steps/Sec: 0.05,
|
| 4109 |
+
[[34m2026-01-26 13:47:59[39m] (step=0003853) Train Loss mse: 0.0358, Train Loss ce: 0.2603, Train Steps/Sec: 0.05,
|
| 4110 |
+
[[34m2026-01-26 13:48:20[39m] (step=0003854) Train Loss mse: 0.0307, Train Loss ce: 0.2451, Train Steps/Sec: 0.05,
|
| 4111 |
+
[[34m2026-01-26 13:48:40[39m] (step=0003855) Train Loss mse: 0.0216, Train Loss ce: 0.2479, Train Steps/Sec: 0.05,
|
| 4112 |
+
[[34m2026-01-26 13:49:00[39m] (step=0003856) Train Loss mse: 0.0176, Train Loss ce: 0.2441, Train Steps/Sec: 0.05,
|
| 4113 |
+
[[34m2026-01-26 13:49:21[39m] (step=0003857) Train Loss mse: 0.0165, Train Loss ce: 0.2391, Train Steps/Sec: 0.05,
|
| 4114 |
+
[[34m2026-01-26 13:49:43[39m] (step=0003858) Train Loss mse: 0.0232, Train Loss ce: 0.2505, Train Steps/Sec: 0.05,
|
| 4115 |
+
[[34m2026-01-26 13:50:05[39m] (step=0003859) Train Loss mse: 0.0182, Train Loss ce: 0.2497, Train Steps/Sec: 0.04,
|
| 4116 |
+
[[34m2026-01-26 13:50:23[39m] (step=0003860) Train Loss mse: 0.0263, Train Loss ce: 0.2505, Train Steps/Sec: 0.06,
|
| 4117 |
+
[[34m2026-01-26 13:50:40[39m] (step=0003861) Train Loss mse: 0.0202, Train Loss ce: 0.2361, Train Steps/Sec: 0.06,
|
| 4118 |
+
[[34m2026-01-26 13:51:01[39m] (step=0003862) Train Loss mse: 0.0086, Train Loss ce: 0.2474, Train Steps/Sec: 0.05,
|
| 4119 |
+
[[34m2026-01-26 13:51:19[39m] (step=0003863) Train Loss mse: 0.0190, Train Loss ce: 0.2612, Train Steps/Sec: 0.05,
|
| 4120 |
[[34m2026-01-26 13:51:37[39m] (step=0003864) Train Loss mse: 0.0254, Train Loss ce: 0.2425, Train Steps/Sec: 0.06,
|
| 4121 |
[[34m2026-01-26 13:51:54[39m] (step=0003865) Train Loss mse: 0.0189, Train Loss ce: 0.2616, Train Steps/Sec: 0.06,
|
| 4122 |
[[34m2026-01-26 13:52:12[39m] (step=0003866) Train Loss mse: 0.0487, Train Loss ce: 0.2488, Train Steps/Sec: 0.05,
|
|
|
|
| 5161 |
[[34m2026-01-26 19:26:53[39m] (step=0004905) Train Loss mse: 0.0182, Train Loss ce: 0.2456, Train Steps/Sec: 0.05,
|
| 5162 |
[[34m2026-01-26 19:27:11[39m] (step=0004906) Train Loss mse: 0.0186, Train Loss ce: 0.2570, Train Steps/Sec: 0.05,
|
| 5163 |
[[34m2026-01-26 19:27:28[39m] (step=0004907) Train Loss mse: 0.0279, Train Loss ce: 0.2629, Train Steps/Sec: 0.06,
|
| 5164 |
+
[[34m2026-01-26 19:27:48[39m] (step=0004908) Train Loss mse: 0.0204, Train Loss ce: 0.2472, Train Steps/Sec: 0.05,
|
| 5165 |
+
[[34m2026-01-26 19:28:05[39m] (step=0004909) Train Loss mse: 0.0160, Train Loss ce: 0.2394, Train Steps/Sec: 0.06,
|
| 5166 |
+
[[34m2026-01-26 19:28:28[39m] (step=0004910) Train Loss mse: 0.0217, Train Loss ce: 0.2375, Train Steps/Sec: 0.04,
|
| 5167 |
+
[[34m2026-01-26 19:28:49[39m] (step=0004911) Train Loss mse: 0.0169, Train Loss ce: 0.2407, Train Steps/Sec: 0.05,
|
| 5168 |
base_dir is /dev/shm/models/checkpoints_vlm_gym_colorization_one_image_lr2e_5_ce_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_colorization_one_image_lr2e_5_ce_ins_step5000
|
| 5169 |
Preparing Dataset vlm_gym_colorization_celoss_evalonce/vlm_gym_colorization_val
|
| 5170 |
[eval debug] first 3 batch fingerprints:
|
|
|
|
| 5172 |
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_colorization_celoss_evalonce'}]
|
| 5173 |
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_colorization_celoss_evalonce'}]
|
| 5174 |
ce_avg: 0.23948809504508972, mse_avg: 0.007586085703223944
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5175 |
[[34m2026-01-26 19:29:05[39m] (step=0004912) Train Loss mse: 0.0111, Train Loss ce: 0.2392, Train Steps/Sec: 0.06,
|
| 5176 |
[[34m2026-01-26 19:29:25[39m] (step=0004913) Train Loss mse: 0.0325, Train Loss ce: 0.2469, Train Steps/Sec: 0.05,
|
| 5177 |
[[34m2026-01-26 19:29:48[39m] (step=0004914) Train Loss mse: 0.0196, Train Loss ce: 0.2603, Train Steps/Sec: 0.04,
|