Training in progress, step 16510
Browse files- adapter_model.safetensors +1 -1
- trainer_log.jsonl +160 -0
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 8388736
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ef2399513eb12713e12f10982b0359cd719e392bcb1a78b2bbc82855ba01533c
|
| 3 |
size 8388736
|
trainer_log.jsonl
CHANGED
|
@@ -3160,3 +3160,163 @@
|
|
| 3160 |
{"current_steps": 15705, "total_steps": 16510, "loss": 0.0001, "lr": 3.6211744204203703e-07, "epoch": 9.512416717141127, "percentage": 95.12, "elapsed_time": "0:24:33", "remaining_time": "0:01:15", "throughput": 2056.06, "total_tokens": 3030544}
|
| 3161 |
{"current_steps": 15710, "total_steps": 16510, "loss": 0.0, "lr": 3.576493150982074e-07, "epoch": 9.515445184736523, "percentage": 95.15, "elapsed_time": "0:24:34", "remaining_time": "0:01:15", "throughput": 2056.1, "total_tokens": 3031488}
|
| 3162 |
{"current_steps": 15715, "total_steps": 16510, "loss": 0.0, "lr": 3.532087268066281e-07, "epoch": 9.51847365233192, "percentage": 95.18, "elapsed_time": "0:24:34", "remaining_time": "0:01:14", "throughput": 2056.14, "total_tokens": 3032400}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3160 |
{"current_steps": 15705, "total_steps": 16510, "loss": 0.0001, "lr": 3.6211744204203703e-07, "epoch": 9.512416717141127, "percentage": 95.12, "elapsed_time": "0:24:33", "remaining_time": "0:01:15", "throughput": 2056.06, "total_tokens": 3030544}
|
| 3161 |
{"current_steps": 15710, "total_steps": 16510, "loss": 0.0, "lr": 3.576493150982074e-07, "epoch": 9.515445184736523, "percentage": 95.15, "elapsed_time": "0:24:34", "remaining_time": "0:01:15", "throughput": 2056.1, "total_tokens": 3031488}
|
| 3162 |
{"current_steps": 15715, "total_steps": 16510, "loss": 0.0, "lr": 3.532087268066281e-07, "epoch": 9.51847365233192, "percentage": 95.18, "elapsed_time": "0:24:34", "remaining_time": "0:01:14", "throughput": 2056.14, "total_tokens": 3032400}
|
| 3163 |
+
{"current_steps": 15720, "total_steps": 16510, "loss": 0.0, "lr": 3.4879568212979886e-07, "epoch": 9.521502119927316, "percentage": 95.22, "elapsed_time": "0:24:35", "remaining_time": "0:01:14", "throughput": 2056.23, "total_tokens": 3033408}
|
| 3164 |
+
{"current_steps": 15725, "total_steps": 16510, "loss": 0.0, "lr": 3.444101859994553e-07, "epoch": 9.524530587522714, "percentage": 95.25, "elapsed_time": "0:24:35", "remaining_time": "0:01:13", "throughput": 2056.32, "total_tokens": 3034384}
|
| 3165 |
+
{"current_steps": 15730, "total_steps": 16510, "loss": 0.0, "lr": 3.4005224331653263e-07, "epoch": 9.527559055118111, "percentage": 95.28, "elapsed_time": "0:24:36", "remaining_time": "0:01:13", "throughput": 2056.42, "total_tokens": 3035392}
|
| 3166 |
+
{"current_steps": 15735, "total_steps": 16510, "loss": 0.0, "lr": 3.35721858951174e-07, "epoch": 9.530587522713507, "percentage": 95.31, "elapsed_time": "0:24:36", "remaining_time": "0:01:12", "throughput": 2056.45, "total_tokens": 3036288}
|
| 3167 |
+
{"current_steps": 15740, "total_steps": 16510, "loss": 0.0001, "lr": 3.314190377427334e-07, "epoch": 9.533615990308904, "percentage": 95.34, "elapsed_time": "0:24:36", "remaining_time": "0:01:12", "throughput": 2056.53, "total_tokens": 3037264}
|
| 3168 |
+
{"current_steps": 15745, "total_steps": 16510, "loss": 0.0, "lr": 3.2714378449975327e-07, "epoch": 9.5366444579043, "percentage": 95.37, "elapsed_time": "0:24:37", "remaining_time": "0:01:11", "throughput": 2056.56, "total_tokens": 3038160}
|
| 3169 |
+
{"current_steps": 15750, "total_steps": 16510, "loss": 0.0, "lr": 3.228961039999756e-07, "epoch": 9.539672925499698, "percentage": 95.4, "elapsed_time": "0:24:37", "remaining_time": "0:01:11", "throughput": 2056.61, "total_tokens": 3039088}
|
| 3170 |
+
{"current_steps": 15755, "total_steps": 16510, "loss": 0.0, "lr": 3.1867600099032555e-07, "epoch": 9.542701393095093, "percentage": 95.43, "elapsed_time": "0:24:38", "remaining_time": "0:01:10", "throughput": 2056.64, "total_tokens": 3039984}
|
| 3171 |
+
{"current_steps": 15760, "total_steps": 16510, "loss": 0.0, "lr": 3.144834801869084e-07, "epoch": 9.545729860690491, "percentage": 95.46, "elapsed_time": "0:24:38", "remaining_time": "0:01:10", "throughput": 2056.74, "total_tokens": 3040992}
|
| 3172 |
+
{"current_steps": 15765, "total_steps": 16510, "loss": 0.0, "lr": 3.103185462750097e-07, "epoch": 9.548758328285887, "percentage": 95.49, "elapsed_time": "0:24:38", "remaining_time": "0:01:09", "throughput": 2056.8, "total_tokens": 3041936}
|
| 3173 |
+
{"current_steps": 15770, "total_steps": 16510, "loss": 0.0, "lr": 3.0618120390907555e-07, "epoch": 9.551786795881284, "percentage": 95.52, "elapsed_time": "0:24:39", "remaining_time": "0:01:09", "throughput": 2056.85, "total_tokens": 3042864}
|
| 3174 |
+
{"current_steps": 15775, "total_steps": 16510, "loss": 0.0, "lr": 3.020714577127326e-07, "epoch": 9.55481526347668, "percentage": 95.55, "elapsed_time": "0:24:39", "remaining_time": "0:01:08", "throughput": 2056.9, "total_tokens": 3043792}
|
| 3175 |
+
{"current_steps": 15780, "total_steps": 16510, "loss": 0.0, "lr": 2.9798931227875693e-07, "epoch": 9.557843731072078, "percentage": 95.58, "elapsed_time": "0:24:40", "remaining_time": "0:01:08", "throughput": 2056.93, "total_tokens": 3044688}
|
| 3176 |
+
{"current_steps": 15785, "total_steps": 16510, "loss": 0.0, "lr": 2.939347721690772e-07, "epoch": 9.560872198667475, "percentage": 95.61, "elapsed_time": "0:24:40", "remaining_time": "0:01:08", "throughput": 2057.03, "total_tokens": 3045696}
|
| 3177 |
+
{"current_steps": 15790, "total_steps": 16510, "loss": 0.0, "lr": 2.8990784191478826e-07, "epoch": 9.563900666262871, "percentage": 95.64, "elapsed_time": "0:24:41", "remaining_time": "0:01:07", "throughput": 2057.05, "total_tokens": 3046576}
|
| 3178 |
+
{"current_steps": 15795, "total_steps": 16510, "loss": 0.0, "lr": 2.8590852601611263e-07, "epoch": 9.566929133858268, "percentage": 95.67, "elapsed_time": "0:24:41", "remaining_time": "0:01:07", "throughput": 2057.15, "total_tokens": 3047584}
|
| 3179 |
+
{"current_steps": 15800, "total_steps": 16510, "loss": 0.0, "lr": 2.819368289424196e-07, "epoch": 9.569957601453664, "percentage": 95.7, "elapsed_time": "0:24:41", "remaining_time": "0:01:06", "throughput": 2057.21, "total_tokens": 3048528}
|
| 3180 |
+
{"current_steps": 15805, "total_steps": 16510, "loss": 0.0001, "lr": 2.7799275513221703e-07, "epoch": 9.572986069049062, "percentage": 95.73, "elapsed_time": "0:24:42", "remaining_time": "0:01:06", "throughput": 2057.32, "total_tokens": 3049536}
|
| 3181 |
+
{"current_steps": 15810, "total_steps": 16510, "loss": 0.0, "lr": 2.740763089931347e-07, "epoch": 9.576014536644458, "percentage": 95.76, "elapsed_time": "0:24:42", "remaining_time": "0:01:05", "throughput": 2057.42, "total_tokens": 3050544}
|
| 3182 |
+
{"current_steps": 15815, "total_steps": 16510, "loss": 0.0, "lr": 2.701874949019384e-07, "epoch": 9.579043004239855, "percentage": 95.79, "elapsed_time": "0:24:43", "remaining_time": "0:01:05", "throughput": 2057.51, "total_tokens": 3051536}
|
| 3183 |
+
{"current_steps": 15820, "total_steps": 16510, "loss": 0.0, "lr": 2.663263172045016e-07, "epoch": 9.58207147183525, "percentage": 95.82, "elapsed_time": "0:24:43", "remaining_time": "0:01:04", "throughput": 2057.62, "total_tokens": 3052560}
|
| 3184 |
+
{"current_steps": 15825, "total_steps": 16510, "loss": 0.0, "lr": 2.624927802158283e-07, "epoch": 9.585099939430648, "percentage": 95.85, "elapsed_time": "0:24:43", "remaining_time": "0:01:04", "throughput": 2057.68, "total_tokens": 3053520}
|
| 3185 |
+
{"current_steps": 15830, "total_steps": 16510, "loss": 0.0001, "lr": 2.5868688822001663e-07, "epoch": 9.588128407026044, "percentage": 95.88, "elapsed_time": "0:24:44", "remaining_time": "0:01:03", "throughput": 2057.8, "total_tokens": 3054544}
|
| 3186 |
+
{"current_steps": 15835, "total_steps": 16510, "loss": 0.0, "lr": 2.549086454702837e-07, "epoch": 9.591156874621442, "percentage": 95.91, "elapsed_time": "0:24:44", "remaining_time": "0:01:03", "throughput": 2057.92, "total_tokens": 3055584}
|
| 3187 |
+
{"current_steps": 15840, "total_steps": 16510, "loss": 0.0, "lr": 2.511580561889354e-07, "epoch": 9.594185342216837, "percentage": 95.94, "elapsed_time": "0:24:45", "remaining_time": "0:01:02", "throughput": 2057.98, "total_tokens": 3056528}
|
| 3188 |
+
{"current_steps": 15845, "total_steps": 16510, "loss": 0.0, "lr": 2.474351245673884e-07, "epoch": 9.597213809812235, "percentage": 95.97, "elapsed_time": "0:24:45", "remaining_time": "0:01:02", "throughput": 2058.01, "total_tokens": 3057424}
|
| 3189 |
+
{"current_steps": 15850, "total_steps": 16510, "loss": 0.0, "lr": 2.437398547661396e-07, "epoch": 9.60024227740763, "percentage": 96.0, "elapsed_time": "0:24:46", "remaining_time": "0:01:01", "throughput": 2058.13, "total_tokens": 3058464}
|
| 3190 |
+
{"current_steps": 15855, "total_steps": 16510, "loss": 0.0, "lr": 2.400722509147774e-07, "epoch": 9.603270745003028, "percentage": 96.03, "elapsed_time": "0:24:46", "remaining_time": "0:01:01", "throughput": 2058.24, "total_tokens": 3059472}
|
| 3191 |
+
{"current_steps": 15860, "total_steps": 16510, "loss": 0.0, "lr": 2.3643231711197312e-07, "epoch": 9.606299212598426, "percentage": 96.06, "elapsed_time": "0:24:46", "remaining_time": "0:01:00", "throughput": 2058.29, "total_tokens": 3060400}
|
| 3192 |
+
{"current_steps": 15865, "total_steps": 16510, "loss": 0.0, "lr": 2.3282005742547297e-07, "epoch": 9.609327680193822, "percentage": 96.09, "elapsed_time": "0:24:47", "remaining_time": "0:01:00", "throughput": 2058.35, "total_tokens": 3061344}
|
| 3193 |
+
{"current_steps": 15870, "total_steps": 16510, "loss": 0.0, "lr": 2.2923547589209783e-07, "epoch": 9.61235614778922, "percentage": 96.12, "elapsed_time": "0:24:47", "remaining_time": "0:00:59", "throughput": 2058.41, "total_tokens": 3062288}
|
| 3194 |
+
{"current_steps": 15875, "total_steps": 16510, "loss": 0.0, "lr": 2.256785765177377e-07, "epoch": 9.615384615384615, "percentage": 96.15, "elapsed_time": "0:24:48", "remaining_time": "0:00:59", "throughput": 2058.49, "total_tokens": 3063248}
|
| 3195 |
+
{"current_steps": 15880, "total_steps": 16510, "loss": 0.0, "lr": 2.2214936327735192e-07, "epoch": 9.618413082980013, "percentage": 96.18, "elapsed_time": "0:24:48", "remaining_time": "0:00:59", "throughput": 2058.6, "total_tokens": 3064256}
|
| 3196 |
+
{"current_steps": 15885, "total_steps": 16510, "loss": 0.0, "lr": 2.1864784011494665e-07, "epoch": 9.621441550575408, "percentage": 96.21, "elapsed_time": "0:24:48", "remaining_time": "0:00:58", "throughput": 2058.64, "total_tokens": 3065168}
|
| 3197 |
+
{"current_steps": 15890, "total_steps": 16510, "loss": 0.0, "lr": 2.1517401094359457e-07, "epoch": 9.624470018170806, "percentage": 96.24, "elapsed_time": "0:24:49", "remaining_time": "0:00:58", "throughput": 2058.68, "total_tokens": 3066080}
|
| 3198 |
+
{"current_steps": 15895, "total_steps": 16510, "loss": 0.0001, "lr": 2.1172787964541808e-07, "epoch": 9.627498485766202, "percentage": 96.27, "elapsed_time": "0:24:49", "remaining_time": "0:00:57", "throughput": 2058.74, "total_tokens": 3067024}
|
| 3199 |
+
{"current_steps": 15900, "total_steps": 16510, "loss": 0.0023, "lr": 2.08309450071581e-07, "epoch": 9.6305269533616, "percentage": 96.31, "elapsed_time": "0:24:50", "remaining_time": "0:00:57", "throughput": 2058.84, "total_tokens": 3068032}
|
| 3200 |
+
{"current_steps": 15905, "total_steps": 16510, "loss": 0.0, "lr": 2.049187260422969e-07, "epoch": 9.633555420956995, "percentage": 96.34, "elapsed_time": "0:24:50", "remaining_time": "0:00:56", "throughput": 2058.93, "total_tokens": 3069024}
|
| 3201 |
+
{"current_steps": 15910, "total_steps": 16510, "loss": 0.0, "lr": 2.0155571134680972e-07, "epoch": 9.636583888552392, "percentage": 96.37, "elapsed_time": "0:24:51", "remaining_time": "0:00:56", "throughput": 2058.96, "total_tokens": 3069920}
|
| 3202 |
+
{"current_steps": 15915, "total_steps": 16510, "loss": 0.2, "lr": 1.9822040974340205e-07, "epoch": 9.63961235614779, "percentage": 96.4, "elapsed_time": "0:24:51", "remaining_time": "0:00:55", "throughput": 2059.06, "total_tokens": 3070912}
|
| 3203 |
+
{"current_steps": 15920, "total_steps": 16510, "loss": 0.0, "lr": 1.9491282495938678e-07, "epoch": 9.642640823743186, "percentage": 96.43, "elapsed_time": "0:24:51", "remaining_time": "0:00:55", "throughput": 2059.12, "total_tokens": 3071856}
|
| 3204 |
+
{"current_steps": 15925, "total_steps": 16510, "loss": 0.0001, "lr": 1.916329606910988e-07, "epoch": 9.645669291338583, "percentage": 96.46, "elapsed_time": "0:24:52", "remaining_time": "0:00:54", "throughput": 2059.19, "total_tokens": 3072816}
|
| 3205 |
+
{"current_steps": 15930, "total_steps": 16510, "loss": 0.0, "lr": 1.8838082060389784e-07, "epoch": 9.648697758933979, "percentage": 96.49, "elapsed_time": "0:24:52", "remaining_time": "0:00:54", "throughput": 2059.29, "total_tokens": 3073824}
|
| 3206 |
+
{"current_steps": 15935, "total_steps": 16510, "loss": 0.0, "lr": 1.8515640833215719e-07, "epoch": 9.651726226529377, "percentage": 96.52, "elapsed_time": "0:24:53", "remaining_time": "0:00:53", "throughput": 2059.32, "total_tokens": 3074720}
|
| 3207 |
+
{"current_steps": 15940, "total_steps": 16510, "loss": 0.0, "lr": 1.819597274792667e-07, "epoch": 9.654754694124772, "percentage": 96.55, "elapsed_time": "0:24:53", "remaining_time": "0:00:53", "throughput": 2059.37, "total_tokens": 3075648}
|
| 3208 |
+
{"current_steps": 15945, "total_steps": 16510, "loss": 0.0, "lr": 1.787907816176243e-07, "epoch": 9.65778316172017, "percentage": 96.58, "elapsed_time": "0:24:53", "remaining_time": "0:00:52", "throughput": 2059.47, "total_tokens": 3076656}
|
| 3209 |
+
{"current_steps": 15950, "total_steps": 16510, "loss": 0.0001, "lr": 1.7564957428863326e-07, "epoch": 9.660811629315566, "percentage": 96.61, "elapsed_time": "0:24:54", "remaining_time": "0:00:52", "throughput": 2059.57, "total_tokens": 3077648}
|
| 3210 |
+
{"current_steps": 15955, "total_steps": 16510, "loss": 0.0, "lr": 1.725361090026967e-07, "epoch": 9.663840096910963, "percentage": 96.64, "elapsed_time": "0:24:54", "remaining_time": "0:00:51", "throughput": 2059.66, "total_tokens": 3078640}
|
| 3211 |
+
{"current_steps": 15960, "total_steps": 16510, "loss": 0.0, "lr": 1.694503892392202e-07, "epoch": 9.666868564506359, "percentage": 96.67, "elapsed_time": "0:24:55", "remaining_time": "0:00:51", "throughput": 2059.76, "total_tokens": 3079648}
|
| 3212 |
+
{"current_steps": 15965, "total_steps": 16510, "loss": 0.0, "lr": 1.6639241844659537e-07, "epoch": 9.669897032101757, "percentage": 96.7, "elapsed_time": "0:24:55", "remaining_time": "0:00:51", "throughput": 2059.84, "total_tokens": 3080608}
|
| 3213 |
+
{"current_steps": 15970, "total_steps": 16510, "loss": 0.0487, "lr": 1.6336220004221082e-07, "epoch": 9.672925499697154, "percentage": 96.73, "elapsed_time": "0:24:55", "remaining_time": "0:00:50", "throughput": 2059.91, "total_tokens": 3081568}
|
| 3214 |
+
{"current_steps": 15975, "total_steps": 16510, "loss": 0.0, "lr": 1.6035973741242994e-07, "epoch": 9.67595396729255, "percentage": 96.76, "elapsed_time": "0:24:56", "remaining_time": "0:00:50", "throughput": 2059.95, "total_tokens": 3082480}
|
| 3215 |
+
{"current_steps": 15980, "total_steps": 16510, "loss": 0.0, "lr": 1.573850339126104e-07, "epoch": 9.678982434887947, "percentage": 96.79, "elapsed_time": "0:24:56", "remaining_time": "0:00:49", "throughput": 2059.99, "total_tokens": 3083392}
|
| 3216 |
+
{"current_steps": 15985, "total_steps": 16510, "loss": 0.0, "lr": 1.5443809286708466e-07, "epoch": 9.682010902483343, "percentage": 96.82, "elapsed_time": "0:24:57", "remaining_time": "0:00:49", "throughput": 2060.06, "total_tokens": 3084352}
|
| 3217 |
+
{"current_steps": 15990, "total_steps": 16510, "loss": 0.0, "lr": 1.5151891756915448e-07, "epoch": 9.68503937007874, "percentage": 96.85, "elapsed_time": "0:24:57", "remaining_time": "0:00:48", "throughput": 2060.1, "total_tokens": 3085264}
|
| 3218 |
+
{"current_steps": 15995, "total_steps": 16510, "loss": 0.094, "lr": 1.4862751128109643e-07, "epoch": 9.688067837674136, "percentage": 96.88, "elapsed_time": "0:24:58", "remaining_time": "0:00:48", "throughput": 2060.17, "total_tokens": 3086224}
|
| 3219 |
+
{"current_steps": 16000, "total_steps": 16510, "loss": 0.0, "lr": 1.4576387723415353e-07, "epoch": 9.691096305269534, "percentage": 96.91, "elapsed_time": "0:24:58", "remaining_time": "0:00:47", "throughput": 2060.21, "total_tokens": 3087136}
|
| 3220 |
+
{"current_steps": 16005, "total_steps": 16510, "loss": 0.0, "lr": 1.4292801862853533e-07, "epoch": 9.69412477286493, "percentage": 96.94, "elapsed_time": "0:24:58", "remaining_time": "0:00:47", "throughput": 2060.29, "total_tokens": 3088112}
|
| 3221 |
+
{"current_steps": 16010, "total_steps": 16510, "loss": 0.0, "lr": 1.4011993863340676e-07, "epoch": 9.697153240460327, "percentage": 96.97, "elapsed_time": "0:24:59", "remaining_time": "0:00:46", "throughput": 2060.38, "total_tokens": 3089104}
|
| 3222 |
+
{"current_steps": 16015, "total_steps": 16510, "loss": 0.0, "lr": 1.3733964038689374e-07, "epoch": 9.700181708055723, "percentage": 97.0, "elapsed_time": "0:24:59", "remaining_time": "0:00:46", "throughput": 2060.44, "total_tokens": 3090048}
|
| 3223 |
+
{"current_steps": 16020, "total_steps": 16510, "loss": 0.0001, "lr": 1.3458712699606635e-07, "epoch": 9.70321017565112, "percentage": 97.03, "elapsed_time": "0:25:00", "remaining_time": "0:00:45", "throughput": 2060.55, "total_tokens": 3091072}
|
| 3224 |
+
{"current_steps": 16025, "total_steps": 16510, "loss": 0.0, "lr": 1.318624015369585e-07, "epoch": 9.706238643246518, "percentage": 97.06, "elapsed_time": "0:25:00", "remaining_time": "0:00:45", "throughput": 2060.59, "total_tokens": 3091984}
|
| 3225 |
+
{"current_steps": 16030, "total_steps": 16510, "loss": 0.0, "lr": 1.2916546705453725e-07, "epoch": 9.709267110841914, "percentage": 97.09, "elapsed_time": "0:25:00", "remaining_time": "0:00:44", "throughput": 2060.69, "total_tokens": 3092992}
|
| 3226 |
+
{"current_steps": 16035, "total_steps": 16510, "loss": 0.0, "lr": 1.2649632656271947e-07, "epoch": 9.712295578437312, "percentage": 97.12, "elapsed_time": "0:25:01", "remaining_time": "0:00:44", "throughput": 2060.84, "total_tokens": 3094080}
|
| 3227 |
+
{"current_steps": 16040, "total_steps": 16510, "loss": 0.0, "lr": 1.238549830443553e-07, "epoch": 9.715324046032707, "percentage": 97.15, "elapsed_time": "0:25:01", "remaining_time": "0:00:44", "throughput": 2060.88, "total_tokens": 3094992}
|
| 3228 |
+
{"current_steps": 16045, "total_steps": 16510, "loss": 0.0, "lr": 1.2124143945124177e-07, "epoch": 9.718352513628105, "percentage": 97.18, "elapsed_time": "0:25:02", "remaining_time": "0:00:43", "throughput": 2060.97, "total_tokens": 3095984}
|
| 3229 |
+
{"current_steps": 16050, "total_steps": 16510, "loss": 0.0001, "lr": 1.1865569870409543e-07, "epoch": 9.7213809812235, "percentage": 97.21, "elapsed_time": "0:25:02", "remaining_time": "0:00:43", "throughput": 2061.03, "total_tokens": 3096912}
|
| 3230 |
+
{"current_steps": 16055, "total_steps": 16510, "loss": 0.0, "lr": 1.1609776369256586e-07, "epoch": 9.724409448818898, "percentage": 97.24, "elapsed_time": "0:25:03", "remaining_time": "0:00:42", "throughput": 2061.08, "total_tokens": 3097840}
|
| 3231 |
+
{"current_steps": 16060, "total_steps": 16510, "loss": 0.0003, "lr": 1.1356763727523867e-07, "epoch": 9.727437916414294, "percentage": 97.27, "elapsed_time": "0:25:03", "remaining_time": "0:00:42", "throughput": 2061.11, "total_tokens": 3098736}
|
| 3232 |
+
{"current_steps": 16065, "total_steps": 16510, "loss": 0.0, "lr": 1.110653222796132e-07, "epoch": 9.730466384009691, "percentage": 97.3, "elapsed_time": "0:25:03", "remaining_time": "0:00:41", "throughput": 2061.19, "total_tokens": 3099712}
|
| 3233 |
+
{"current_steps": 16070, "total_steps": 16510, "loss": 0.0, "lr": 1.0859082150210253e-07, "epoch": 9.733494851605087, "percentage": 97.33, "elapsed_time": "0:25:04", "remaining_time": "0:00:41", "throughput": 2061.27, "total_tokens": 3100688}
|
| 3234 |
+
{"current_steps": 16075, "total_steps": 16510, "loss": 0.0, "lr": 1.0614413770805298e-07, "epoch": 9.736523319200485, "percentage": 97.37, "elapsed_time": "0:25:04", "remaining_time": "0:00:40", "throughput": 2061.39, "total_tokens": 3101712}
|
| 3235 |
+
{"current_steps": 16080, "total_steps": 16510, "loss": 0.0001, "lr": 1.0372527363171347e-07, "epoch": 9.73955178679588, "percentage": 97.4, "elapsed_time": "0:25:05", "remaining_time": "0:00:40", "throughput": 2061.43, "total_tokens": 3102624}
|
| 3236 |
+
{"current_steps": 16085, "total_steps": 16510, "loss": 0.0, "lr": 1.0133423197624392e-07, "epoch": 9.742580254391278, "percentage": 97.43, "elapsed_time": "0:25:05", "remaining_time": "0:00:39", "throughput": 2061.46, "total_tokens": 3103520}
|
| 3237 |
+
{"current_steps": 16090, "total_steps": 16510, "loss": 0.0001, "lr": 9.897101541371246e-08, "epoch": 9.745608721986674, "percentage": 97.46, "elapsed_time": "0:25:05", "remaining_time": "0:00:39", "throughput": 2061.53, "total_tokens": 3104480}
|
| 3238 |
+
{"current_steps": 16095, "total_steps": 16510, "loss": 0.0, "lr": 9.663562658509817e-08, "epoch": 9.748637189582071, "percentage": 97.49, "elapsed_time": "0:25:06", "remaining_time": "0:00:38", "throughput": 2061.64, "total_tokens": 3105504}
|
| 3239 |
+
{"current_steps": 16100, "total_steps": 16510, "loss": 0.0, "lr": 9.432806810026895e-08, "epoch": 9.751665657177469, "percentage": 97.52, "elapsed_time": "0:25:06", "remaining_time": "0:00:38", "throughput": 2061.73, "total_tokens": 3106496}
|
| 3240 |
+
{"current_steps": 16105, "total_steps": 16510, "loss": 0.0, "lr": 9.204834253800365e-08, "epoch": 9.754694124772865, "percentage": 97.55, "elapsed_time": "0:25:07", "remaining_time": "0:00:37", "throughput": 2061.85, "total_tokens": 3107520}
|
| 3241 |
+
{"current_steps": 16110, "total_steps": 16510, "loss": 0.0, "lr": 8.97964524459699e-08, "epoch": 9.757722592368262, "percentage": 97.58, "elapsed_time": "0:25:07", "remaining_time": "0:00:37", "throughput": 2061.93, "total_tokens": 3108496}
|
| 3242 |
+
{"current_steps": 16115, "total_steps": 16510, "loss": 0.0, "lr": 8.757240034073244e-08, "epoch": 9.760751059963658, "percentage": 97.61, "elapsed_time": "0:25:07", "remaining_time": "0:00:36", "throughput": 2062.04, "total_tokens": 3109520}
|
| 3243 |
+
{"current_steps": 16120, "total_steps": 16510, "loss": 0.0, "lr": 8.537618870774477e-08, "epoch": 9.763779527559056, "percentage": 97.64, "elapsed_time": "0:25:08", "remaining_time": "0:00:36", "throughput": 2062.15, "total_tokens": 3110544}
|
| 3244 |
+
{"current_steps": 16125, "total_steps": 16510, "loss": 0.0, "lr": 8.320782000134086e-08, "epoch": 9.766807995154451, "percentage": 97.67, "elapsed_time": "0:25:08", "remaining_time": "0:00:36", "throughput": 2062.25, "total_tokens": 3111552}
|
| 3245 |
+
{"current_steps": 16130, "total_steps": 16510, "loss": 0.0, "lr": 8.106729664475176e-08, "epoch": 9.769836462749849, "percentage": 97.7, "elapsed_time": "0:25:09", "remaining_time": "0:00:35", "throughput": 2062.31, "total_tokens": 3112496}
|
| 3246 |
+
{"current_steps": 16135, "total_steps": 16510, "loss": 0.0, "lr": 7.895462103008066e-08, "epoch": 9.772864930345245, "percentage": 97.73, "elapsed_time": "0:25:09", "remaining_time": "0:00:35", "throughput": 2062.37, "total_tokens": 3113440}
|
| 3247 |
+
{"current_steps": 16140, "total_steps": 16510, "loss": 0.0, "lr": 7.68697955183112e-08, "epoch": 9.775893397940642, "percentage": 97.76, "elapsed_time": "0:25:10", "remaining_time": "0:00:34", "throughput": 2062.43, "total_tokens": 3114384}
|
| 3248 |
+
{"current_steps": 16145, "total_steps": 16510, "loss": 0.0, "lr": 7.481282243931298e-08, "epoch": 9.778921865536038, "percentage": 97.79, "elapsed_time": "0:25:10", "remaining_time": "0:00:34", "throughput": 2062.5, "total_tokens": 3115344}
|
| 3249 |
+
{"current_steps": 16150, "total_steps": 16510, "loss": 0.0001, "lr": 7.278370409181667e-08, "epoch": 9.781950333131435, "percentage": 97.82, "elapsed_time": "0:25:10", "remaining_time": "0:00:33", "throughput": 2062.56, "total_tokens": 3116288}
|
| 3250 |
+
{"current_steps": 16155, "total_steps": 16510, "loss": 0.0, "lr": 7.07824427434306e-08, "epoch": 9.784978800726833, "percentage": 97.85, "elapsed_time": "0:25:11", "remaining_time": "0:00:33", "throughput": 2062.65, "total_tokens": 3117280}
|
| 3251 |
+
{"current_steps": 16160, "total_steps": 16510, "loss": 0.0017, "lr": 6.880904063063243e-08, "epoch": 9.788007268322229, "percentage": 97.88, "elapsed_time": "0:25:11", "remaining_time": "0:00:32", "throughput": 2062.78, "total_tokens": 3118320}
|
| 3252 |
+
{"current_steps": 16165, "total_steps": 16510, "loss": 0.0, "lr": 6.686349995876639e-08, "epoch": 9.791035735917626, "percentage": 97.91, "elapsed_time": "0:25:12", "remaining_time": "0:00:32", "throughput": 2062.9, "total_tokens": 3119360}
|
| 3253 |
+
{"current_steps": 16170, "total_steps": 16510, "loss": 0.0, "lr": 6.494582290203777e-08, "epoch": 9.794064203513022, "percentage": 97.94, "elapsed_time": "0:25:12", "remaining_time": "0:00:31", "throughput": 2062.98, "total_tokens": 3120336}
|
| 3254 |
+
{"current_steps": 16175, "total_steps": 16510, "loss": 0.0, "lr": 6.305601160351282e-08, "epoch": 9.79709267110842, "percentage": 97.97, "elapsed_time": "0:25:12", "remaining_time": "0:00:31", "throughput": 2063.1, "total_tokens": 3121376}
|
| 3255 |
+
{"current_steps": 16180, "total_steps": 16510, "loss": 0.0, "lr": 6.119406817512163e-08, "epoch": 9.800121138703815, "percentage": 98.0, "elapsed_time": "0:25:13", "remaining_time": "0:00:30", "throughput": 2063.16, "total_tokens": 3122320}
|
| 3256 |
+
{"current_steps": 16185, "total_steps": 16510, "loss": 0.0, "lr": 5.935999469764697e-08, "epoch": 9.803149606299213, "percentage": 98.03, "elapsed_time": "0:25:13", "remaining_time": "0:00:30", "throughput": 2063.21, "total_tokens": 3123264}
|
| 3257 |
+
{"current_steps": 16190, "total_steps": 16510, "loss": 0.0001, "lr": 5.755379322072985e-08, "epoch": 9.806178073894609, "percentage": 98.06, "elapsed_time": "0:25:14", "remaining_time": "0:00:29", "throughput": 2063.28, "total_tokens": 3124224}
|
| 3258 |
+
{"current_steps": 16195, "total_steps": 16510, "loss": 0.0, "lr": 5.5775465762858414e-08, "epoch": 9.809206541490006, "percentage": 98.09, "elapsed_time": "0:25:14", "remaining_time": "0:00:29", "throughput": 2063.4, "total_tokens": 3125264}
|
| 3259 |
+
{"current_steps": 16200, "total_steps": 16510, "loss": 0.0, "lr": 5.40250143113763e-08, "epoch": 9.812235009085402, "percentage": 98.12, "elapsed_time": "0:25:15", "remaining_time": "0:00:28", "throughput": 2063.45, "total_tokens": 3126192}
|
| 3260 |
+
{"current_steps": 16205, "total_steps": 16510, "loss": 0.0001, "lr": 5.2302440822465937e-08, "epoch": 9.8152634766808, "percentage": 98.15, "elapsed_time": "0:25:15", "remaining_time": "0:00:28", "throughput": 2063.55, "total_tokens": 3127200}
|
| 3261 |
+
{"current_steps": 16210, "total_steps": 16510, "loss": 0.0, "lr": 5.0607747221165234e-08, "epoch": 9.818291944276197, "percentage": 98.18, "elapsed_time": "0:25:15", "remaining_time": "0:00:28", "throughput": 2063.65, "total_tokens": 3128208}
|
| 3262 |
+
{"current_steps": 16215, "total_steps": 16510, "loss": 0.0, "lr": 4.8940935401350916e-08, "epoch": 9.821320411871593, "percentage": 98.21, "elapsed_time": "0:25:16", "remaining_time": "0:00:27", "throughput": 2063.75, "total_tokens": 3129200}
|
| 3263 |
+
{"current_steps": 16220, "total_steps": 16510, "loss": 0.0, "lr": 4.7302007225741296e-08, "epoch": 9.82434887946699, "percentage": 98.24, "elapsed_time": "0:25:16", "remaining_time": "0:00:27", "throughput": 2063.85, "total_tokens": 3130224}
|
| 3264 |
+
{"current_steps": 16225, "total_steps": 16510, "loss": 0.0001, "lr": 4.5690964525890726e-08, "epoch": 9.827377347062386, "percentage": 98.27, "elapsed_time": "0:25:17", "remaining_time": "0:00:26", "throughput": 2063.92, "total_tokens": 3131184}
|
| 3265 |
+
{"current_steps": 16230, "total_steps": 16510, "loss": 0.0001, "lr": 4.410780910219514e-08, "epoch": 9.830405814657784, "percentage": 98.3, "elapsed_time": "0:25:17", "remaining_time": "0:00:26", "throughput": 2064.01, "total_tokens": 3132176}
|
| 3266 |
+
{"current_steps": 16235, "total_steps": 16510, "loss": 0.0, "lr": 4.255254272388376e-08, "epoch": 9.83343428225318, "percentage": 98.33, "elapsed_time": "0:25:17", "remaining_time": "0:00:25", "throughput": 2064.14, "total_tokens": 3133216}
|
| 3267 |
+
{"current_steps": 16240, "total_steps": 16510, "loss": 0.0, "lr": 4.102516712901905e-08, "epoch": 9.836462749848577, "percentage": 98.36, "elapsed_time": "0:25:18", "remaining_time": "0:00:25", "throughput": 2064.23, "total_tokens": 3134208}
|
| 3268 |
+
{"current_steps": 16245, "total_steps": 16510, "loss": 0.0, "lr": 3.952568402449397e-08, "epoch": 9.839491217443973, "percentage": 98.39, "elapsed_time": "0:25:18", "remaining_time": "0:00:24", "throughput": 2064.31, "total_tokens": 3135184}
|
| 3269 |
+
{"current_steps": 16250, "total_steps": 16510, "loss": 0.0, "lr": 3.80540950860292e-08, "epoch": 9.84251968503937, "percentage": 98.43, "elapsed_time": "0:25:19", "remaining_time": "0:00:24", "throughput": 2064.38, "total_tokens": 3136144}
|
| 3270 |
+
{"current_steps": 16255, "total_steps": 16510, "loss": 0.0, "lr": 3.661040195818144e-08, "epoch": 9.845548152634766, "percentage": 98.46, "elapsed_time": "0:25:19", "remaining_time": "0:00:23", "throughput": 2064.51, "total_tokens": 3137200}
|
| 3271 |
+
{"current_steps": 16260, "total_steps": 16510, "loss": 0.0, "lr": 3.51946062543157e-08, "epoch": 9.848576620230164, "percentage": 98.49, "elapsed_time": "0:25:20", "remaining_time": "0:00:23", "throughput": 2064.57, "total_tokens": 3138144}
|
| 3272 |
+
{"current_steps": 16265, "total_steps": 16510, "loss": 0.0, "lr": 3.3806709556641336e-08, "epoch": 9.85160508782556, "percentage": 98.52, "elapsed_time": "0:25:20", "remaining_time": "0:00:22", "throughput": 2064.6, "total_tokens": 3139056}
|
| 3273 |
+
{"current_steps": 16270, "total_steps": 16510, "loss": 0.0, "lr": 3.2446713416173225e-08, "epoch": 9.854633555420957, "percentage": 98.55, "elapsed_time": "0:25:20", "remaining_time": "0:00:22", "throughput": 2064.68, "total_tokens": 3140032}
|
| 3274 |
+
{"current_steps": 16275, "total_steps": 16510, "loss": 0.0001, "lr": 3.111461935275395e-08, "epoch": 9.857662023016355, "percentage": 98.58, "elapsed_time": "0:25:21", "remaining_time": "0:00:21", "throughput": 2064.76, "total_tokens": 3141008}
|
| 3275 |
+
{"current_steps": 16280, "total_steps": 16510, "loss": 0.0001, "lr": 2.981042885504548e-08, "epoch": 9.86069049061175, "percentage": 98.61, "elapsed_time": "0:25:21", "remaining_time": "0:00:21", "throughput": 2064.83, "total_tokens": 3141968}
|
| 3276 |
+
{"current_steps": 16285, "total_steps": 16510, "loss": 0.0, "lr": 2.853414338052085e-08, "epoch": 9.863718958207148, "percentage": 98.64, "elapsed_time": "0:25:22", "remaining_time": "0:00:21", "throughput": 2064.92, "total_tokens": 3142960}
|
| 3277 |
+
{"current_steps": 16290, "total_steps": 16510, "loss": 0.1009, "lr": 2.7285764355472475e-08, "epoch": 9.866747425802544, "percentage": 98.67, "elapsed_time": "0:25:22", "remaining_time": "0:00:20", "throughput": 2065.01, "total_tokens": 3143952}
|
| 3278 |
+
{"current_steps": 16295, "total_steps": 16510, "loss": 0.0001, "lr": 2.6065293175006612e-08, "epoch": 9.869775893397941, "percentage": 98.7, "elapsed_time": "0:25:22", "remaining_time": "0:00:20", "throughput": 2065.09, "total_tokens": 3144928}
|
| 3279 |
+
{"current_steps": 16300, "total_steps": 16510, "loss": 0.0, "lr": 2.4872731203043344e-08, "epoch": 9.872804360993337, "percentage": 98.73, "elapsed_time": "0:25:23", "remaining_time": "0:00:19", "throughput": 2065.16, "total_tokens": 3145888}
|
| 3280 |
+
{"current_steps": 16305, "total_steps": 16510, "loss": 0.0, "lr": 2.3708079772305492e-08, "epoch": 9.875832828588734, "percentage": 98.76, "elapsed_time": "0:25:23", "remaining_time": "0:00:19", "throughput": 2065.17, "total_tokens": 3146752}
|
| 3281 |
+
{"current_steps": 16310, "total_steps": 16510, "loss": 0.0, "lr": 2.257134018433249e-08, "epoch": 9.87886129618413, "percentage": 98.79, "elapsed_time": "0:25:24", "remaining_time": "0:00:18", "throughput": 2065.22, "total_tokens": 3147680}
|
| 3282 |
+
{"current_steps": 16315, "total_steps": 16510, "loss": 0.0001, "lr": 2.1462513709472056e-08, "epoch": 9.881889763779528, "percentage": 98.82, "elapsed_time": "0:25:24", "remaining_time": "0:00:18", "throughput": 2065.28, "total_tokens": 3148624}
|
| 3283 |
+
{"current_steps": 16320, "total_steps": 16510, "loss": 0.0, "lr": 2.0381601586869082e-08, "epoch": 9.884918231374924, "percentage": 98.85, "elapsed_time": "0:25:24", "remaining_time": "0:00:17", "throughput": 2065.41, "total_tokens": 3149680}
|
| 3284 |
+
{"current_steps": 16325, "total_steps": 16510, "loss": 0.0001, "lr": 1.9328605024482303e-08, "epoch": 9.887946698970321, "percentage": 98.88, "elapsed_time": "0:25:25", "remaining_time": "0:00:17", "throughput": 2065.46, "total_tokens": 3150608}
|
| 3285 |
+
{"current_steps": 16330, "total_steps": 16510, "loss": 0.0001, "lr": 1.8303525199070415e-08, "epoch": 9.890975166565717, "percentage": 98.91, "elapsed_time": "0:25:25", "remaining_time": "0:00:16", "throughput": 2065.52, "total_tokens": 3151552}
|
| 3286 |
+
{"current_steps": 16335, "total_steps": 16510, "loss": 0.0, "lr": 1.7306363256194835e-08, "epoch": 9.894003634161114, "percentage": 98.94, "elapsed_time": "0:25:26", "remaining_time": "0:00:16", "throughput": 2065.63, "total_tokens": 3152576}
|
| 3287 |
+
{"current_steps": 16340, "total_steps": 16510, "loss": 0.0, "lr": 1.633712031021417e-08, "epoch": 9.897032101756512, "percentage": 98.97, "elapsed_time": "0:25:26", "remaining_time": "0:00:15", "throughput": 2065.74, "total_tokens": 3153600}
|
| 3288 |
+
{"current_steps": 16345, "total_steps": 16510, "loss": 0.0, "lr": 1.5395797444295313e-08, "epoch": 9.900060569351908, "percentage": 99.0, "elapsed_time": "0:25:27", "remaining_time": "0:00:15", "throughput": 2065.8, "total_tokens": 3154544}
|
| 3289 |
+
{"current_steps": 16350, "total_steps": 16510, "loss": 0.0, "lr": 1.4482395710394003e-08, "epoch": 9.903089036947305, "percentage": 99.03, "elapsed_time": "0:25:27", "remaining_time": "0:00:14", "throughput": 2065.82, "total_tokens": 3155424}
|
| 3290 |
+
{"current_steps": 16355, "total_steps": 16510, "loss": 0.0, "lr": 1.3596916129268721e-08, "epoch": 9.906117504542701, "percentage": 99.06, "elapsed_time": "0:25:27", "remaining_time": "0:00:14", "throughput": 2065.85, "total_tokens": 3156336}
|
| 3291 |
+
{"current_steps": 16360, "total_steps": 16510, "loss": 0.0, "lr": 1.2739359690472353e-08, "epoch": 9.909145972138099, "percentage": 99.09, "elapsed_time": "0:25:28", "remaining_time": "0:00:14", "throughput": 2065.84, "total_tokens": 3157168}
|
| 3292 |
+
{"current_steps": 16365, "total_steps": 16510, "loss": 0.0001, "lr": 1.1909727352352184e-08, "epoch": 9.912174439733494, "percentage": 99.12, "elapsed_time": "0:25:28", "remaining_time": "0:00:13", "throughput": 2065.91, "total_tokens": 3158128}
|
| 3293 |
+
{"current_steps": 16370, "total_steps": 16510, "loss": 0.0001, "lr": 1.1108020042052692e-08, "epoch": 9.915202907328892, "percentage": 99.15, "elapsed_time": "0:25:29", "remaining_time": "0:00:13", "throughput": 2065.99, "total_tokens": 3159104}
|
| 3294 |
+
{"current_steps": 16375, "total_steps": 16510, "loss": 0.0, "lr": 1.03342386555072e-08, "epoch": 9.918231374924288, "percentage": 99.18, "elapsed_time": "0:25:29", "remaining_time": "0:00:12", "throughput": 2066.1, "total_tokens": 3160128}
|
| 3295 |
+
{"current_steps": 16380, "total_steps": 16510, "loss": 0.0, "lr": 9.588384057437894e-09, "epoch": 9.921259842519685, "percentage": 99.21, "elapsed_time": "0:25:29", "remaining_time": "0:00:12", "throughput": 2066.15, "total_tokens": 3161056}
|
| 3296 |
+
{"current_steps": 16385, "total_steps": 16510, "loss": 0.1918, "lr": 8.870457081369687e-09, "epoch": 9.924288310115081, "percentage": 99.24, "elapsed_time": "0:25:30", "remaining_time": "0:00:11", "throughput": 2066.22, "total_tokens": 3162032}
|
| 3297 |
+
{"current_steps": 16390, "total_steps": 16510, "loss": 0.0, "lr": 8.180458529608027e-09, "epoch": 9.927316777710478, "percentage": 99.27, "elapsed_time": "0:25:30", "remaining_time": "0:00:11", "throughput": 2066.23, "total_tokens": 3162928}
|
| 3298 |
+
{"current_steps": 16395, "total_steps": 16510, "loss": 0.0, "lr": 7.518389173244433e-09, "epoch": 9.930345245305876, "percentage": 99.3, "elapsed_time": "0:25:31", "remaining_time": "0:00:10", "throughput": 2066.28, "total_tokens": 3163856}
|
| 3299 |
+
{"current_steps": 16400, "total_steps": 16510, "loss": 0.0, "lr": 6.88424975217039e-09, "epoch": 9.933373712901272, "percentage": 99.33, "elapsed_time": "0:25:31", "remaining_time": "0:00:10", "throughput": 2066.33, "total_tokens": 3164784}
|
| 3300 |
+
{"current_steps": 16405, "total_steps": 16510, "loss": 0.0, "lr": 6.2780409750523575e-09, "epoch": 9.93640218049667, "percentage": 99.36, "elapsed_time": "0:25:32", "remaining_time": "0:00:09", "throughput": 2066.44, "total_tokens": 3165808}
|
| 3301 |
+
{"current_steps": 16410, "total_steps": 16510, "loss": 0.0, "lr": 5.699763519353974e-09, "epoch": 9.939430648092065, "percentage": 99.39, "elapsed_time": "0:25:32", "remaining_time": "0:00:09", "throughput": 2066.57, "total_tokens": 3166864}
|
| 3302 |
+
{"current_steps": 16415, "total_steps": 16510, "loss": 0.0001, "lr": 5.149418031316633e-09, "epoch": 9.942459115687463, "percentage": 99.42, "elapsed_time": "0:25:32", "remaining_time": "0:00:08", "throughput": 2066.67, "total_tokens": 3167888}
|
| 3303 |
+
{"current_steps": 16420, "total_steps": 16510, "loss": 0.0, "lr": 4.627005125967809e-09, "epoch": 9.945487583282858, "percentage": 99.45, "elapsed_time": "0:25:33", "remaining_time": "0:00:08", "throughput": 2066.76, "total_tokens": 3168880}
|
| 3304 |
+
{"current_steps": 16425, "total_steps": 16510, "loss": 0.0, "lr": 4.132525387126607e-09, "epoch": 9.948516050878256, "percentage": 99.49, "elapsed_time": "0:25:33", "remaining_time": "0:00:07", "throughput": 2066.85, "total_tokens": 3169872}
|
| 3305 |
+
{"current_steps": 16430, "total_steps": 16510, "loss": 0.0, "lr": 3.665979367387107e-09, "epoch": 9.951544518473652, "percentage": 99.52, "elapsed_time": "0:25:34", "remaining_time": "0:00:07", "throughput": 2066.98, "total_tokens": 3170928}
|
| 3306 |
+
{"current_steps": 16435, "total_steps": 16510, "loss": 0.0, "lr": 3.227367588129471e-09, "epoch": 9.95457298606905, "percentage": 99.55, "elapsed_time": "0:25:34", "remaining_time": "0:00:07", "throughput": 2067.06, "total_tokens": 3171904}
|
| 3307 |
+
{"current_steps": 16440, "total_steps": 16510, "loss": 0.1644, "lr": 2.8166905395199396e-09, "epoch": 9.957601453664445, "percentage": 99.58, "elapsed_time": "0:25:34", "remaining_time": "0:00:06", "throughput": 2067.07, "total_tokens": 3172768}
|
| 3308 |
+
{"current_steps": 16445, "total_steps": 16510, "loss": 0.0, "lr": 2.433948680502507e-09, "epoch": 9.960629921259843, "percentage": 99.61, "elapsed_time": "0:25:35", "remaining_time": "0:00:06", "throughput": 2067.13, "total_tokens": 3173728}
|
| 3309 |
+
{"current_steps": 16450, "total_steps": 16510, "loss": 0.0, "lr": 2.079142438804471e-09, "epoch": 9.96365838885524, "percentage": 99.64, "elapsed_time": "0:25:35", "remaining_time": "0:00:05", "throughput": 2067.19, "total_tokens": 3174672}
|
| 3310 |
+
{"current_steps": 16455, "total_steps": 16510, "loss": 0.0, "lr": 1.7522722109336587e-09, "epoch": 9.966686856450636, "percentage": 99.67, "elapsed_time": "0:25:36", "remaining_time": "0:00:05", "throughput": 2067.26, "total_tokens": 3175632}
|
| 3311 |
+
{"current_steps": 16460, "total_steps": 16510, "loss": 0.0, "lr": 1.4533383621756492e-09, "epoch": 9.969715324046033, "percentage": 99.7, "elapsed_time": "0:25:36", "remaining_time": "0:00:04", "throughput": 2067.38, "total_tokens": 3176672}
|
| 3312 |
+
{"current_steps": 16465, "total_steps": 16510, "loss": 0.0, "lr": 1.1823412266021017e-09, "epoch": 9.97274379164143, "percentage": 99.73, "elapsed_time": "0:25:36", "remaining_time": "0:00:04", "throughput": 2067.39, "total_tokens": 3177552}
|
| 3313 |
+
{"current_steps": 16470, "total_steps": 16510, "loss": 0.0, "lr": 9.392811070624286e-10, "epoch": 9.975772259236827, "percentage": 99.76, "elapsed_time": "0:25:37", "remaining_time": "0:00:03", "throughput": 2067.49, "total_tokens": 3178560}
|
| 3314 |
+
{"current_steps": 16475, "total_steps": 16510, "loss": 0.0001, "lr": 7.241582751810195e-10, "epoch": 9.978800726832223, "percentage": 99.79, "elapsed_time": "0:25:37", "remaining_time": "0:00:03", "throughput": 2067.59, "total_tokens": 3179584}
|
| 3315 |
+
{"current_steps": 16480, "total_steps": 16510, "loss": 0.0, "lr": 5.369729713683435e-10, "epoch": 9.98182919442762, "percentage": 99.82, "elapsed_time": "0:25:38", "remaining_time": "0:00:02", "throughput": 2067.65, "total_tokens": 3180528}
|
| 3316 |
+
{"current_steps": 16485, "total_steps": 16510, "loss": 0.0001, "lr": 3.7772540480707217e-10, "epoch": 9.984857662023016, "percentage": 99.85, "elapsed_time": "0:25:38", "remaining_time": "0:00:02", "throughput": 2067.75, "total_tokens": 3181536}
|
| 3317 |
+
{"current_steps": 16490, "total_steps": 16510, "loss": 0.0, "lr": 2.4641575346595615e-10, "epoch": 9.987886129618413, "percentage": 99.88, "elapsed_time": "0:25:39", "remaining_time": "0:00:01", "throughput": 2067.81, "total_tokens": 3182480}
|
| 3318 |
+
{"current_steps": 16495, "total_steps": 16510, "loss": 0.0001, "lr": 1.43044164080397e-10, "epoch": 9.99091459721381, "percentage": 99.91, "elapsed_time": "0:25:39", "remaining_time": "0:00:01", "throughput": 2067.86, "total_tokens": 3183424}
|
| 3319 |
+
{"current_steps": 16500, "total_steps": 16510, "loss": 0.0, "lr": 6.761075218020274e-11, "epoch": 9.993943064809207, "percentage": 99.94, "elapsed_time": "0:25:39", "remaining_time": "0:00:00", "throughput": 2067.9, "total_tokens": 3184336}
|
| 3320 |
+
{"current_steps": 16505, "total_steps": 16510, "loss": 0.0, "lr": 2.0115602059056494e-11, "epoch": 9.996971532404602, "percentage": 99.97, "elapsed_time": "0:25:40", "remaining_time": "0:00:00", "throughput": 2068.0, "total_tokens": 3185328}
|
| 3321 |
+
{"current_steps": 16510, "total_steps": 16510, "loss": 0.0, "lr": 5.587667967210664e-13, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "0:25:40", "remaining_time": "0:00:00", "throughput": 2067.9, "total_tokens": 3186272}
|
| 3322 |
+
{"current_steps": 16510, "total_steps": 16510, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "0:25:42", "remaining_time": "0:00:00", "throughput": 2065.92, "total_tokens": 3186272}
|