rbelanec commited on
Commit
fd6273a
·
verified ·
1 Parent(s): 5d4423e

Training in progress, step 15694

Browse files
Files changed (2) hide show
  1. adapter_model.safetensors +1 -1
  2. trainer_log.jsonl +166 -0
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e06bece3918b3f7614c1580d0b66215f1b67850dc7499136e4834b46f6d45a9b
3
  size 798032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7d51d738cbb5a567a4826fbdc9266200e484b0ef298364fc0489ea62e61b09a
3
  size 798032
trainer_log.jsonl CHANGED
@@ -2992,3 +2992,169 @@
2992
  {"current_steps": 14870, "total_steps": 16520, "loss": 0.1555, "lr": 1.5058782924478431e-06, "epoch": 18.002421307506054, "percentage": 90.01, "elapsed_time": "0:40:39", "remaining_time": "0:04:30", "throughput": 2498.97, "total_tokens": 6096512}
2993
  {"current_steps": 14875, "total_steps": 16520, "loss": 0.0721, "lr": 1.4968630887529339e-06, "epoch": 18.008474576271187, "percentage": 90.04, "elapsed_time": "0:40:40", "remaining_time": "0:04:29", "throughput": 2498.88, "total_tokens": 6098624}
2994
  {"current_steps": 14880, "total_steps": 16520, "loss": 0.0541, "lr": 1.4878741188118744e-06, "epoch": 18.01452784503632, "percentage": 90.07, "elapsed_time": "0:40:41", "remaining_time": "0:04:29", "throughput": 2498.88, "total_tokens": 6100736}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2992
  {"current_steps": 14870, "total_steps": 16520, "loss": 0.1555, "lr": 1.5058782924478431e-06, "epoch": 18.002421307506054, "percentage": 90.01, "elapsed_time": "0:40:39", "remaining_time": "0:04:30", "throughput": 2498.97, "total_tokens": 6096512}
2993
  {"current_steps": 14875, "total_steps": 16520, "loss": 0.0721, "lr": 1.4968630887529339e-06, "epoch": 18.008474576271187, "percentage": 90.04, "elapsed_time": "0:40:40", "remaining_time": "0:04:29", "throughput": 2498.88, "total_tokens": 6098624}
2994
  {"current_steps": 14880, "total_steps": 16520, "loss": 0.0541, "lr": 1.4878741188118744e-06, "epoch": 18.01452784503632, "percentage": 90.07, "elapsed_time": "0:40:41", "remaining_time": "0:04:29", "throughput": 2498.88, "total_tokens": 6100736}
2995
+ {"current_steps": 14885, "total_steps": 16520, "loss": 0.1131, "lr": 1.478911392657989e-06, "epoch": 18.020581113801452, "percentage": 90.1, "elapsed_time": "0:40:42", "remaining_time": "0:04:28", "throughput": 2498.91, "total_tokens": 6102880}
2996
+ {"current_steps": 14890, "total_steps": 16520, "loss": 0.115, "lr": 1.469974920295289e-06, "epoch": 18.026634382566584, "percentage": 90.13, "elapsed_time": "0:40:42", "remaining_time": "0:04:27", "throughput": 2498.92, "total_tokens": 6104864}
2997
+ {"current_steps": 14895, "total_steps": 16520, "loss": 0.0471, "lr": 1.4610647116985037e-06, "epoch": 18.03268765133172, "percentage": 90.16, "elapsed_time": "0:40:43", "remaining_time": "0:04:26", "throughput": 2498.94, "total_tokens": 6106944}
2998
+ {"current_steps": 14900, "total_steps": 16520, "loss": 0.0833, "lr": 1.4521807768130364e-06, "epoch": 18.038740920096853, "percentage": 90.19, "elapsed_time": "0:40:44", "remaining_time": "0:04:25", "throughput": 2499.0, "total_tokens": 6109056}
2999
+ {"current_steps": 14905, "total_steps": 16520, "loss": 0.1159, "lr": 1.4433231255549655e-06, "epoch": 18.044794188861985, "percentage": 90.22, "elapsed_time": "0:40:45", "remaining_time": "0:04:24", "throughput": 2499.03, "total_tokens": 6111168}
3000
+ {"current_steps": 14910, "total_steps": 16520, "loss": 0.0544, "lr": 1.4344917678110303e-06, "epoch": 18.050847457627118, "percentage": 90.25, "elapsed_time": "0:40:46", "remaining_time": "0:04:24", "throughput": 2499.04, "total_tokens": 6113152}
3001
+ {"current_steps": 14915, "total_steps": 16520, "loss": 0.0354, "lr": 1.4256867134386288e-06, "epoch": 18.05690072639225, "percentage": 90.28, "elapsed_time": "0:40:46", "remaining_time": "0:04:23", "throughput": 2499.11, "total_tokens": 6115104}
3002
+ {"current_steps": 14920, "total_steps": 16520, "loss": 0.1142, "lr": 1.416907972265788e-06, "epoch": 18.062953995157386, "percentage": 90.31, "elapsed_time": "0:40:47", "remaining_time": "0:04:22", "throughput": 2499.15, "total_tokens": 6117088}
3003
+ {"current_steps": 14925, "total_steps": 16520, "loss": 0.1497, "lr": 1.408155554091184e-06, "epoch": 18.06900726392252, "percentage": 90.35, "elapsed_time": "0:40:48", "remaining_time": "0:04:21", "throughput": 2499.2, "total_tokens": 6119200}
3004
+ {"current_steps": 14930, "total_steps": 16520, "loss": 0.0493, "lr": 1.3994294686840853e-06, "epoch": 18.07506053268765, "percentage": 90.38, "elapsed_time": "0:40:49", "remaining_time": "0:04:20", "throughput": 2499.22, "total_tokens": 6121280}
3005
+ {"current_steps": 14935, "total_steps": 16520, "loss": 0.1669, "lr": 1.3907297257843898e-06, "epoch": 18.081113801452783, "percentage": 90.41, "elapsed_time": "0:40:50", "remaining_time": "0:04:20", "throughput": 2499.24, "total_tokens": 6123456}
3006
+ {"current_steps": 14940, "total_steps": 16520, "loss": 0.0881, "lr": 1.3820563351025884e-06, "epoch": 18.087167070217916, "percentage": 90.44, "elapsed_time": "0:40:50", "remaining_time": "0:04:19", "throughput": 2499.24, "total_tokens": 6125568}
3007
+ {"current_steps": 14945, "total_steps": 16520, "loss": 0.1437, "lr": 1.3734093063197424e-06, "epoch": 18.093220338983052, "percentage": 90.47, "elapsed_time": "0:40:51", "remaining_time": "0:04:18", "throughput": 2499.26, "total_tokens": 6127488}
3008
+ {"current_steps": 14950, "total_steps": 16520, "loss": 0.1176, "lr": 1.3647886490875144e-06, "epoch": 18.099273607748184, "percentage": 90.5, "elapsed_time": "0:40:52", "remaining_time": "0:04:17", "throughput": 2499.31, "total_tokens": 6129472}
3009
+ {"current_steps": 14955, "total_steps": 16520, "loss": 0.0291, "lr": 1.3561943730281052e-06, "epoch": 18.105326876513317, "percentage": 90.53, "elapsed_time": "0:40:53", "remaining_time": "0:04:16", "throughput": 2499.3, "total_tokens": 6131488}
3010
+ {"current_steps": 14960, "total_steps": 16520, "loss": 0.1079, "lr": 1.3476264877342908e-06, "epoch": 18.11138014527845, "percentage": 90.56, "elapsed_time": "0:40:54", "remaining_time": "0:04:15", "throughput": 2499.28, "total_tokens": 6133408}
3011
+ {"current_steps": 14965, "total_steps": 16520, "loss": 0.0565, "lr": 1.3390850027693802e-06, "epoch": 18.11743341404358, "percentage": 90.59, "elapsed_time": "0:40:54", "remaining_time": "0:04:15", "throughput": 2499.33, "total_tokens": 6135424}
3012
+ {"current_steps": 14970, "total_steps": 16520, "loss": 0.0637, "lr": 1.3305699276672134e-06, "epoch": 18.123486682808718, "percentage": 90.62, "elapsed_time": "0:40:55", "remaining_time": "0:04:14", "throughput": 2499.37, "total_tokens": 6137472}
3013
+ {"current_steps": 14975, "total_steps": 16520, "loss": 0.1952, "lr": 1.3220812719321601e-06, "epoch": 18.12953995157385, "percentage": 90.65, "elapsed_time": "0:40:56", "remaining_time": "0:04:13", "throughput": 2499.42, "total_tokens": 6139552}
3014
+ {"current_steps": 14980, "total_steps": 16520, "loss": 0.1425, "lr": 1.3136190450390912e-06, "epoch": 18.135593220338983, "percentage": 90.68, "elapsed_time": "0:40:57", "remaining_time": "0:04:12", "throughput": 2499.45, "total_tokens": 6141536}
3015
+ {"current_steps": 14985, "total_steps": 16520, "loss": 0.0818, "lr": 1.3051832564333815e-06, "epoch": 18.141646489104115, "percentage": 90.71, "elapsed_time": "0:40:57", "remaining_time": "0:04:11", "throughput": 2499.5, "total_tokens": 6143552}
3016
+ {"current_steps": 14990, "total_steps": 16520, "loss": 0.1504, "lr": 1.2967739155309077e-06, "epoch": 18.147699757869248, "percentage": 90.74, "elapsed_time": "0:40:58", "remaining_time": "0:04:10", "throughput": 2499.55, "total_tokens": 6145632}
3017
+ {"current_steps": 14995, "total_steps": 16520, "loss": 0.0479, "lr": 1.2883910317180004e-06, "epoch": 18.153753026634384, "percentage": 90.77, "elapsed_time": "0:40:59", "remaining_time": "0:04:10", "throughput": 2499.55, "total_tokens": 6147680}
3018
+ {"current_steps": 15000, "total_steps": 16520, "loss": 0.1224, "lr": 1.2800346143514914e-06, "epoch": 18.159806295399516, "percentage": 90.8, "elapsed_time": "0:41:00", "remaining_time": "0:04:09", "throughput": 2499.61, "total_tokens": 6149792}
3019
+ {"current_steps": 15005, "total_steps": 16520, "loss": 0.0789, "lr": 1.2717046727586447e-06, "epoch": 18.16585956416465, "percentage": 90.83, "elapsed_time": "0:41:01", "remaining_time": "0:04:08", "throughput": 2499.64, "total_tokens": 6151744}
3020
+ {"current_steps": 15010, "total_steps": 16520, "loss": 0.1184, "lr": 1.2634012162371839e-06, "epoch": 18.17191283292978, "percentage": 90.86, "elapsed_time": "0:41:01", "remaining_time": "0:04:07", "throughput": 2499.67, "total_tokens": 6153856}
3021
+ {"current_steps": 15015, "total_steps": 16520, "loss": 0.0821, "lr": 1.2551242540552733e-06, "epoch": 18.177966101694917, "percentage": 90.89, "elapsed_time": "0:41:02", "remaining_time": "0:04:06", "throughput": 2499.68, "total_tokens": 6155840}
3022
+ {"current_steps": 15020, "total_steps": 16520, "loss": 0.113, "lr": 1.2468737954514948e-06, "epoch": 18.18401937046005, "percentage": 90.92, "elapsed_time": "0:41:03", "remaining_time": "0:04:06", "throughput": 2499.73, "total_tokens": 6157952}
3023
+ {"current_steps": 15025, "total_steps": 16520, "loss": 0.1893, "lr": 1.2386498496348541e-06, "epoch": 18.190072639225182, "percentage": 90.95, "elapsed_time": "0:41:04", "remaining_time": "0:04:05", "throughput": 2499.78, "total_tokens": 6159936}
3024
+ {"current_steps": 15030, "total_steps": 16520, "loss": 0.1303, "lr": 1.2304524257847672e-06, "epoch": 18.196125907990314, "percentage": 90.98, "elapsed_time": "0:41:04", "remaining_time": "0:04:04", "throughput": 2499.83, "total_tokens": 6162016}
3025
+ {"current_steps": 15035, "total_steps": 16520, "loss": 0.0892, "lr": 1.2222815330510367e-06, "epoch": 18.202179176755447, "percentage": 91.01, "elapsed_time": "0:41:05", "remaining_time": "0:04:03", "throughput": 2499.85, "total_tokens": 6164128}
3026
+ {"current_steps": 15040, "total_steps": 16520, "loss": 0.0638, "lr": 1.2141371805538593e-06, "epoch": 18.208232445520583, "percentage": 91.04, "elapsed_time": "0:41:06", "remaining_time": "0:04:02", "throughput": 2499.92, "total_tokens": 6166336}
3027
+ {"current_steps": 15045, "total_steps": 16520, "loss": 0.0868, "lr": 1.206019377383813e-06, "epoch": 18.214285714285715, "percentage": 91.07, "elapsed_time": "0:41:07", "remaining_time": "0:04:01", "throughput": 2499.96, "total_tokens": 6168384}
3028
+ {"current_steps": 15050, "total_steps": 16520, "loss": 0.1727, "lr": 1.197928132601825e-06, "epoch": 18.220338983050848, "percentage": 91.1, "elapsed_time": "0:41:08", "remaining_time": "0:04:01", "throughput": 2500.0, "total_tokens": 6170432}
3029
+ {"current_steps": 15055, "total_steps": 16520, "loss": 0.1405, "lr": 1.189863455239193e-06, "epoch": 18.22639225181598, "percentage": 91.13, "elapsed_time": "0:41:08", "remaining_time": "0:04:00", "throughput": 2500.04, "total_tokens": 6172416}
3030
+ {"current_steps": 15060, "total_steps": 16520, "loss": 0.0561, "lr": 1.1818253542975584e-06, "epoch": 18.232445520581113, "percentage": 91.16, "elapsed_time": "0:41:09", "remaining_time": "0:03:59", "throughput": 2500.1, "total_tokens": 6174336}
3031
+ {"current_steps": 15065, "total_steps": 16520, "loss": 0.0776, "lr": 1.173813838748894e-06, "epoch": 18.23849878934625, "percentage": 91.19, "elapsed_time": "0:41:10", "remaining_time": "0:03:58", "throughput": 2500.13, "total_tokens": 6176352}
3032
+ {"current_steps": 15070, "total_steps": 16520, "loss": 0.107, "lr": 1.1658289175354996e-06, "epoch": 18.24455205811138, "percentage": 91.22, "elapsed_time": "0:41:11", "remaining_time": "0:03:57", "throughput": 2500.18, "total_tokens": 6178272}
3033
+ {"current_steps": 15075, "total_steps": 16520, "loss": 0.1425, "lr": 1.1578705995699961e-06, "epoch": 18.250605326876514, "percentage": 91.25, "elapsed_time": "0:41:11", "remaining_time": "0:03:56", "throughput": 2500.19, "total_tokens": 6180320}
3034
+ {"current_steps": 15080, "total_steps": 16520, "loss": 0.1206, "lr": 1.1499388937352974e-06, "epoch": 18.256658595641646, "percentage": 91.28, "elapsed_time": "0:41:12", "remaining_time": "0:03:56", "throughput": 2500.23, "total_tokens": 6182368}
3035
+ {"current_steps": 15085, "total_steps": 16520, "loss": 0.1395, "lr": 1.1420338088846404e-06, "epoch": 18.26271186440678, "percentage": 91.31, "elapsed_time": "0:41:13", "remaining_time": "0:03:55", "throughput": 2500.23, "total_tokens": 6184480}
3036
+ {"current_steps": 15090, "total_steps": 16520, "loss": 0.123, "lr": 1.1341553538415095e-06, "epoch": 18.268765133171915, "percentage": 91.34, "elapsed_time": "0:41:14", "remaining_time": "0:03:54", "throughput": 2500.28, "total_tokens": 6186656}
3037
+ {"current_steps": 15095, "total_steps": 16520, "loss": 0.0486, "lr": 1.1263035373997033e-06, "epoch": 18.274818401937047, "percentage": 91.37, "elapsed_time": "0:41:15", "remaining_time": "0:03:53", "throughput": 2500.28, "total_tokens": 6188704}
3038
+ {"current_steps": 15100, "total_steps": 16520, "loss": 0.0732, "lr": 1.1184783683232585e-06, "epoch": 18.28087167070218, "percentage": 91.4, "elapsed_time": "0:41:15", "remaining_time": "0:03:52", "throughput": 2500.35, "total_tokens": 6190656}
3039
+ {"current_steps": 15105, "total_steps": 16520, "loss": 0.0979, "lr": 1.1106798553464804e-06, "epoch": 18.286924939467312, "percentage": 91.43, "elapsed_time": "0:41:16", "remaining_time": "0:03:52", "throughput": 2500.38, "total_tokens": 6192768}
3040
+ {"current_steps": 15110, "total_steps": 16520, "loss": 0.134, "lr": 1.1029080071739333e-06, "epoch": 18.292978208232444, "percentage": 91.46, "elapsed_time": "0:41:17", "remaining_time": "0:03:51", "throughput": 2500.4, "total_tokens": 6194784}
3041
+ {"current_steps": 15115, "total_steps": 16520, "loss": 0.0796, "lr": 1.095162832480387e-06, "epoch": 18.29903147699758, "percentage": 91.5, "elapsed_time": "0:41:18", "remaining_time": "0:03:50", "throughput": 2500.43, "total_tokens": 6196896}
3042
+ {"current_steps": 15120, "total_steps": 16520, "loss": 0.1114, "lr": 1.0874443399108702e-06, "epoch": 18.305084745762713, "percentage": 91.53, "elapsed_time": "0:41:19", "remaining_time": "0:03:49", "throughput": 2500.49, "total_tokens": 6198944}
3043
+ {"current_steps": 15125, "total_steps": 16520, "loss": 0.1043, "lr": 1.0797525380806168e-06, "epoch": 18.311138014527845, "percentage": 91.56, "elapsed_time": "0:41:19", "remaining_time": "0:03:48", "throughput": 2500.52, "total_tokens": 6201056}
3044
+ {"current_steps": 15130, "total_steps": 16520, "loss": 0.0658, "lr": 1.072087435575067e-06, "epoch": 18.317191283292978, "percentage": 91.59, "elapsed_time": "0:41:20", "remaining_time": "0:03:47", "throughput": 2500.58, "total_tokens": 6203168}
3045
+ {"current_steps": 15135, "total_steps": 16520, "loss": 0.1016, "lr": 1.0644490409498637e-06, "epoch": 18.32324455205811, "percentage": 91.62, "elapsed_time": "0:41:21", "remaining_time": "0:03:47", "throughput": 2500.6, "total_tokens": 6205248}
3046
+ {"current_steps": 15140, "total_steps": 16520, "loss": 0.0944, "lr": 1.0568373627308365e-06, "epoch": 18.329297820823246, "percentage": 91.65, "elapsed_time": "0:41:22", "remaining_time": "0:03:46", "throughput": 2500.6, "total_tokens": 6207296}
3047
+ {"current_steps": 15145, "total_steps": 16520, "loss": 0.1063, "lr": 1.0492524094139921e-06, "epoch": 18.33535108958838, "percentage": 91.68, "elapsed_time": "0:41:23", "remaining_time": "0:03:45", "throughput": 2500.65, "total_tokens": 6209472}
3048
+ {"current_steps": 15150, "total_steps": 16520, "loss": 0.1474, "lr": 1.0416941894655224e-06, "epoch": 18.34140435835351, "percentage": 91.71, "elapsed_time": "0:41:23", "remaining_time": "0:03:44", "throughput": 2500.71, "total_tokens": 6211488}
3049
+ {"current_steps": 15155, "total_steps": 16520, "loss": 0.1026, "lr": 1.0341627113217539e-06, "epoch": 18.347457627118644, "percentage": 91.74, "elapsed_time": "0:41:24", "remaining_time": "0:03:43", "throughput": 2500.75, "total_tokens": 6213536}
3050
+ {"current_steps": 15160, "total_steps": 16520, "loss": 0.1285, "lr": 1.026657983389187e-06, "epoch": 18.353510895883776, "percentage": 91.77, "elapsed_time": "0:41:25", "remaining_time": "0:03:42", "throughput": 2500.79, "total_tokens": 6215744}
3051
+ {"current_steps": 15165, "total_steps": 16520, "loss": 0.0783, "lr": 1.0191800140444574e-06, "epoch": 18.359564164648912, "percentage": 91.8, "elapsed_time": "0:41:26", "remaining_time": "0:03:42", "throughput": 2500.81, "total_tokens": 6217824}
3052
+ {"current_steps": 15170, "total_steps": 16520, "loss": 0.1054, "lr": 1.0117288116343298e-06, "epoch": 18.365617433414045, "percentage": 91.83, "elapsed_time": "0:41:27", "remaining_time": "0:03:41", "throughput": 2500.81, "total_tokens": 6219872}
3053
+ {"current_steps": 15175, "total_steps": 16520, "loss": 0.055, "lr": 1.0043043844756934e-06, "epoch": 18.371670702179177, "percentage": 91.86, "elapsed_time": "0:41:27", "remaining_time": "0:03:40", "throughput": 2500.84, "total_tokens": 6221888}
3054
+ {"current_steps": 15180, "total_steps": 16520, "loss": 0.2029, "lr": 9.96906740855555e-07, "epoch": 18.37772397094431, "percentage": 91.89, "elapsed_time": "0:41:28", "remaining_time": "0:03:39", "throughput": 2500.91, "total_tokens": 6223936}
3055
+ {"current_steps": 15185, "total_steps": 16520, "loss": 0.0654, "lr": 9.895358890310208e-07, "epoch": 18.383777239709442, "percentage": 91.92, "elapsed_time": "0:41:29", "remaining_time": "0:03:38", "throughput": 2500.95, "total_tokens": 6225920}
3056
+ {"current_steps": 15190, "total_steps": 16520, "loss": 0.1272, "lr": 9.821918372292959e-07, "epoch": 18.389830508474578, "percentage": 91.95, "elapsed_time": "0:41:30", "remaining_time": "0:03:38", "throughput": 2500.99, "total_tokens": 6228064}
3057
+ {"current_steps": 15195, "total_steps": 16520, "loss": 0.0596, "lr": 9.748745936476734e-07, "epoch": 18.39588377723971, "percentage": 91.98, "elapsed_time": "0:41:30", "remaining_time": "0:03:37", "throughput": 2501.02, "total_tokens": 6230016}
3058
+ {"current_steps": 15200, "total_steps": 16520, "loss": 0.1211, "lr": 9.675841664535167e-07, "epoch": 18.401937046004843, "percentage": 92.01, "elapsed_time": "0:41:31", "remaining_time": "0:03:36", "throughput": 2501.08, "total_tokens": 6232064}
3059
+ {"current_steps": 15205, "total_steps": 16520, "loss": 0.1236, "lr": 9.6032056378427e-07, "epoch": 18.407990314769975, "percentage": 92.04, "elapsed_time": "0:41:32", "remaining_time": "0:03:35", "throughput": 2501.12, "total_tokens": 6234112}
3060
+ {"current_steps": 15210, "total_steps": 16520, "loss": 0.0951, "lr": 9.53083793747414e-07, "epoch": 18.414043583535108, "percentage": 92.07, "elapsed_time": "0:41:33", "remaining_time": "0:03:34", "throughput": 2501.16, "total_tokens": 6236192}
3061
+ {"current_steps": 15215, "total_steps": 16520, "loss": 0.0411, "lr": 9.458738644205129e-07, "epoch": 18.420096852300244, "percentage": 92.1, "elapsed_time": "0:41:34", "remaining_time": "0:03:33", "throughput": 2501.24, "total_tokens": 6238368}
3062
+ {"current_steps": 15220, "total_steps": 16520, "loss": 0.1364, "lr": 9.386907838511344e-07, "epoch": 18.426150121065376, "percentage": 92.13, "elapsed_time": "0:41:34", "remaining_time": "0:03:33", "throughput": 2501.29, "total_tokens": 6240384}
3063
+ {"current_steps": 15225, "total_steps": 16520, "loss": 0.0797, "lr": 9.31534560056907e-07, "epoch": 18.43220338983051, "percentage": 92.16, "elapsed_time": "0:41:35", "remaining_time": "0:03:32", "throughput": 2501.32, "total_tokens": 6242496}
3064
+ {"current_steps": 15230, "total_steps": 16520, "loss": 0.1392, "lr": 9.244052010254662e-07, "epoch": 18.43825665859564, "percentage": 92.19, "elapsed_time": "0:41:36", "remaining_time": "0:03:31", "throughput": 2501.38, "total_tokens": 6244512}
3065
+ {"current_steps": 15235, "total_steps": 16520, "loss": 0.1566, "lr": 9.173027147144714e-07, "epoch": 18.444309927360774, "percentage": 92.22, "elapsed_time": "0:41:37", "remaining_time": "0:03:30", "throughput": 2501.43, "total_tokens": 6246688}
3066
+ {"current_steps": 15240, "total_steps": 16520, "loss": 0.0876, "lr": 9.102271090515784e-07, "epoch": 18.45036319612591, "percentage": 92.25, "elapsed_time": "0:41:38", "remaining_time": "0:03:29", "throughput": 2501.47, "total_tokens": 6248736}
3067
+ {"current_steps": 15245, "total_steps": 16520, "loss": 0.0659, "lr": 9.031783919344478e-07, "epoch": 18.456416464891042, "percentage": 92.28, "elapsed_time": "0:41:38", "remaining_time": "0:03:28", "throughput": 2501.53, "total_tokens": 6250944}
3068
+ {"current_steps": 15250, "total_steps": 16520, "loss": 0.0804, "lr": 8.961565712307163e-07, "epoch": 18.462469733656174, "percentage": 92.31, "elapsed_time": "0:41:39", "remaining_time": "0:03:28", "throughput": 2501.55, "total_tokens": 6253120}
3069
+ {"current_steps": 15255, "total_steps": 16520, "loss": 0.0788, "lr": 8.891616547780174e-07, "epoch": 18.468523002421307, "percentage": 92.34, "elapsed_time": "0:41:40", "remaining_time": "0:03:27", "throughput": 2501.6, "total_tokens": 6255264}
3070
+ {"current_steps": 15260, "total_steps": 16520, "loss": 0.1169, "lr": 8.821936503839334e-07, "epoch": 18.47457627118644, "percentage": 92.37, "elapsed_time": "0:41:41", "remaining_time": "0:03:26", "throughput": 2501.62, "total_tokens": 6257344}
3071
+ {"current_steps": 15265, "total_steps": 16520, "loss": 0.0922, "lr": 8.75252565826018e-07, "epoch": 18.480629539951575, "percentage": 92.4, "elapsed_time": "0:41:42", "remaining_time": "0:03:25", "throughput": 2501.66, "total_tokens": 6259328}
3072
+ {"current_steps": 15270, "total_steps": 16520, "loss": 0.0588, "lr": 8.683384088517904e-07, "epoch": 18.486682808716708, "percentage": 92.43, "elapsed_time": "0:41:42", "remaining_time": "0:03:24", "throughput": 2501.65, "total_tokens": 6261248}
3073
+ {"current_steps": 15275, "total_steps": 16520, "loss": 0.0759, "lr": 8.614511871786829e-07, "epoch": 18.49273607748184, "percentage": 92.46, "elapsed_time": "0:41:43", "remaining_time": "0:03:24", "throughput": 2501.7, "total_tokens": 6263168}
3074
+ {"current_steps": 15280, "total_steps": 16520, "loss": 0.157, "lr": 8.545909084940962e-07, "epoch": 18.498789346246973, "percentage": 92.49, "elapsed_time": "0:41:44", "remaining_time": "0:03:23", "throughput": 2501.73, "total_tokens": 6265216}
3075
+ {"current_steps": 15285, "total_steps": 16520, "loss": 0.0867, "lr": 8.477575804553356e-07, "epoch": 18.504842615012105, "percentage": 92.52, "elapsed_time": "0:41:45", "remaining_time": "0:03:22", "throughput": 2501.77, "total_tokens": 6267264}
3076
+ {"current_steps": 15290, "total_steps": 16520, "loss": 0.1281, "lr": 8.409512106896334e-07, "epoch": 18.51089588377724, "percentage": 92.55, "elapsed_time": "0:41:45", "remaining_time": "0:03:21", "throughput": 2501.81, "total_tokens": 6269344}
3077
+ {"current_steps": 15295, "total_steps": 16520, "loss": 0.0674, "lr": 8.34171806794129e-07, "epoch": 18.516949152542374, "percentage": 92.58, "elapsed_time": "0:41:46", "remaining_time": "0:03:20", "throughput": 2501.86, "total_tokens": 6271328}
3078
+ {"current_steps": 15300, "total_steps": 16520, "loss": 0.0681, "lr": 8.27419376335864e-07, "epoch": 18.523002421307506, "percentage": 92.62, "elapsed_time": "0:41:47", "remaining_time": "0:03:19", "throughput": 2501.92, "total_tokens": 6273248}
3079
+ {"current_steps": 15305, "total_steps": 16520, "loss": 0.111, "lr": 8.206939268517705e-07, "epoch": 18.52905569007264, "percentage": 92.65, "elapsed_time": "0:41:48", "remaining_time": "0:03:19", "throughput": 2501.94, "total_tokens": 6275328}
3080
+ {"current_steps": 15310, "total_steps": 16520, "loss": 0.1406, "lr": 8.139954658486771e-07, "epoch": 18.53510895883777, "percentage": 92.68, "elapsed_time": "0:41:48", "remaining_time": "0:03:18", "throughput": 2501.98, "total_tokens": 6277408}
3081
+ {"current_steps": 15315, "total_steps": 16520, "loss": 0.1418, "lr": 8.07324000803264e-07, "epoch": 18.541162227602907, "percentage": 92.71, "elapsed_time": "0:41:49", "remaining_time": "0:03:17", "throughput": 2502.01, "total_tokens": 6279328}
3082
+ {"current_steps": 15320, "total_steps": 16520, "loss": 0.0458, "lr": 8.006795391621053e-07, "epoch": 18.54721549636804, "percentage": 92.74, "elapsed_time": "0:41:50", "remaining_time": "0:03:16", "throughput": 2502.05, "total_tokens": 6281376}
3083
+ {"current_steps": 15325, "total_steps": 16520, "loss": 0.1008, "lr": 7.940620883416155e-07, "epoch": 18.553268765133172, "percentage": 92.77, "elapsed_time": "0:41:51", "remaining_time": "0:03:15", "throughput": 2502.07, "total_tokens": 6283392}
3084
+ {"current_steps": 15330, "total_steps": 16520, "loss": 0.1188, "lr": 7.874716557280698e-07, "epoch": 18.559322033898304, "percentage": 92.8, "elapsed_time": "0:41:52", "remaining_time": "0:03:15", "throughput": 2502.14, "total_tokens": 6285632}
3085
+ {"current_steps": 15335, "total_steps": 16520, "loss": 0.1077, "lr": 7.809082486775838e-07, "epoch": 18.565375302663437, "percentage": 92.83, "elapsed_time": "0:41:52", "remaining_time": "0:03:14", "throughput": 2502.17, "total_tokens": 6287680}
3086
+ {"current_steps": 15340, "total_steps": 16520, "loss": 0.1117, "lr": 7.743718745161083e-07, "epoch": 18.571428571428573, "percentage": 92.86, "elapsed_time": "0:41:53", "remaining_time": "0:03:13", "throughput": 2502.25, "total_tokens": 6289760}
3087
+ {"current_steps": 15345, "total_steps": 16520, "loss": 0.0793, "lr": 7.678625405394157e-07, "epoch": 18.577481840193705, "percentage": 92.89, "elapsed_time": "0:41:54", "remaining_time": "0:03:12", "throughput": 2502.29, "total_tokens": 6291808}
3088
+ {"current_steps": 15350, "total_steps": 16520, "loss": 0.1208, "lr": 7.613802540131054e-07, "epoch": 18.583535108958838, "percentage": 92.92, "elapsed_time": "0:41:55", "remaining_time": "0:03:11", "throughput": 2502.32, "total_tokens": 6293920}
3089
+ {"current_steps": 15355, "total_steps": 16520, "loss": 0.1002, "lr": 7.549250221725784e-07, "epoch": 18.58958837772397, "percentage": 92.95, "elapsed_time": "0:41:56", "remaining_time": "0:03:10", "throughput": 2502.33, "total_tokens": 6295904}
3090
+ {"current_steps": 15360, "total_steps": 16520, "loss": 0.0285, "lr": 7.484968522230434e-07, "epoch": 18.595641646489103, "percentage": 92.98, "elapsed_time": "0:41:56", "remaining_time": "0:03:10", "throughput": 2502.36, "total_tokens": 6297920}
3091
+ {"current_steps": 15365, "total_steps": 16520, "loss": 0.0718, "lr": 7.420957513395027e-07, "epoch": 18.60169491525424, "percentage": 93.01, "elapsed_time": "0:41:57", "remaining_time": "0:03:09", "throughput": 2502.39, "total_tokens": 6300064}
3092
+ {"current_steps": 15370, "total_steps": 16520, "loss": 0.0574, "lr": 7.357217266667355e-07, "epoch": 18.60774818401937, "percentage": 93.04, "elapsed_time": "0:41:58", "remaining_time": "0:03:08", "throughput": 2502.43, "total_tokens": 6302016}
3093
+ {"current_steps": 15375, "total_steps": 16520, "loss": 0.0671, "lr": 7.293747853193201e-07, "epoch": 18.613801452784504, "percentage": 93.07, "elapsed_time": "0:41:59", "remaining_time": "0:03:07", "throughput": 2502.49, "total_tokens": 6304064}
3094
+ {"current_steps": 15380, "total_steps": 16520, "loss": 0.1402, "lr": 7.230549343815813e-07, "epoch": 18.619854721549636, "percentage": 93.1, "elapsed_time": "0:41:59", "remaining_time": "0:03:06", "throughput": 2502.5, "total_tokens": 6305888}
3095
+ {"current_steps": 15385, "total_steps": 16520, "loss": 0.0822, "lr": 7.16762180907618e-07, "epoch": 18.62590799031477, "percentage": 93.13, "elapsed_time": "0:42:00", "remaining_time": "0:03:05", "throughput": 2502.51, "total_tokens": 6307872}
3096
+ {"current_steps": 15390, "total_steps": 16520, "loss": 0.0771, "lr": 7.10496531921287e-07, "epoch": 18.631961259079905, "percentage": 93.16, "elapsed_time": "0:42:01", "remaining_time": "0:03:05", "throughput": 2502.56, "total_tokens": 6310048}
3097
+ {"current_steps": 15395, "total_steps": 16520, "loss": 0.1386, "lr": 7.042579944161797e-07, "epoch": 18.638014527845037, "percentage": 93.19, "elapsed_time": "0:42:02", "remaining_time": "0:03:04", "throughput": 2502.57, "total_tokens": 6312128}
3098
+ {"current_steps": 15400, "total_steps": 16520, "loss": 0.0731, "lr": 6.980465753556376e-07, "epoch": 18.64406779661017, "percentage": 93.22, "elapsed_time": "0:42:03", "remaining_time": "0:03:03", "throughput": 2502.63, "total_tokens": 6314144}
3099
+ {"current_steps": 15405, "total_steps": 16520, "loss": 0.1487, "lr": 6.918622816727255e-07, "epoch": 18.650121065375302, "percentage": 93.25, "elapsed_time": "0:42:03", "remaining_time": "0:03:02", "throughput": 2502.69, "total_tokens": 6316192}
3100
+ {"current_steps": 15410, "total_steps": 16520, "loss": 0.0903, "lr": 6.85705120270233e-07, "epoch": 18.656174334140434, "percentage": 93.28, "elapsed_time": "0:42:04", "remaining_time": "0:03:01", "throughput": 2502.71, "total_tokens": 6318112}
3101
+ {"current_steps": 15415, "total_steps": 16520, "loss": 0.1183, "lr": 6.795750980206711e-07, "epoch": 18.66222760290557, "percentage": 93.31, "elapsed_time": "0:42:05", "remaining_time": "0:03:01", "throughput": 2502.72, "total_tokens": 6320256}
3102
+ {"current_steps": 15420, "total_steps": 16520, "loss": 0.1214, "lr": 6.734722217662526e-07, "epoch": 18.668280871670703, "percentage": 93.34, "elapsed_time": "0:42:06", "remaining_time": "0:03:00", "throughput": 2502.74, "total_tokens": 6322336}
3103
+ {"current_steps": 15425, "total_steps": 16520, "loss": 0.1038, "lr": 6.673964983188868e-07, "epoch": 18.674334140435835, "percentage": 93.37, "elapsed_time": "0:42:06", "remaining_time": "0:02:59", "throughput": 2502.8, "total_tokens": 6324288}
3104
+ {"current_steps": 15430, "total_steps": 16520, "loss": 0.0616, "lr": 6.613479344601881e-07, "epoch": 18.680387409200968, "percentage": 93.4, "elapsed_time": "0:42:07", "remaining_time": "0:02:58", "throughput": 2502.86, "total_tokens": 6326336}
3105
+ {"current_steps": 15435, "total_steps": 16520, "loss": 0.077, "lr": 6.553265369414419e-07, "epoch": 18.6864406779661, "percentage": 93.43, "elapsed_time": "0:42:08", "remaining_time": "0:02:57", "throughput": 2502.93, "total_tokens": 6328608}
3106
+ {"current_steps": 15440, "total_steps": 16520, "loss": 0.1096, "lr": 6.493323124836193e-07, "epoch": 18.692493946731236, "percentage": 93.46, "elapsed_time": "0:42:09", "remaining_time": "0:02:56", "throughput": 2502.94, "total_tokens": 6330592}
3107
+ {"current_steps": 15445, "total_steps": 16520, "loss": 0.0953, "lr": 6.433652677773627e-07, "epoch": 18.69854721549637, "percentage": 93.49, "elapsed_time": "0:42:09", "remaining_time": "0:02:56", "throughput": 2502.96, "total_tokens": 6332352}
3108
+ {"current_steps": 15450, "total_steps": 16520, "loss": 0.1658, "lr": 6.374254094829723e-07, "epoch": 18.7046004842615, "percentage": 93.52, "elapsed_time": "0:42:10", "remaining_time": "0:02:55", "throughput": 2502.96, "total_tokens": 6334464}
3109
+ {"current_steps": 15455, "total_steps": 16520, "loss": 0.0888, "lr": 6.315127442304003e-07, "epoch": 18.710653753026634, "percentage": 93.55, "elapsed_time": "0:42:11", "remaining_time": "0:02:54", "throughput": 2502.99, "total_tokens": 6336576}
3110
+ {"current_steps": 15460, "total_steps": 16520, "loss": 0.1595, "lr": 6.256272786192563e-07, "epoch": 18.716707021791766, "percentage": 93.58, "elapsed_time": "0:42:12", "remaining_time": "0:02:53", "throughput": 2503.02, "total_tokens": 6338720}
3111
+ {"current_steps": 15465, "total_steps": 16520, "loss": 0.0484, "lr": 6.197690192187827e-07, "epoch": 18.722760290556902, "percentage": 93.61, "elapsed_time": "0:42:13", "remaining_time": "0:02:52", "throughput": 2503.06, "total_tokens": 6340608}
3112
+ {"current_steps": 15470, "total_steps": 16520, "loss": 0.1598, "lr": 6.139379725678602e-07, "epoch": 18.728813559322035, "percentage": 93.64, "elapsed_time": "0:42:13", "remaining_time": "0:02:51", "throughput": 2503.08, "total_tokens": 6342624}
3113
+ {"current_steps": 15475, "total_steps": 16520, "loss": 0.078, "lr": 6.08134145174985e-07, "epoch": 18.734866828087167, "percentage": 93.67, "elapsed_time": "0:42:14", "remaining_time": "0:02:51", "throughput": 2503.15, "total_tokens": 6344672}
3114
+ {"current_steps": 15480, "total_steps": 16520, "loss": 0.1623, "lr": 6.023575435182865e-07, "epoch": 18.7409200968523, "percentage": 93.7, "elapsed_time": "0:42:15", "remaining_time": "0:02:50", "throughput": 2503.19, "total_tokens": 6346816}
3115
+ {"current_steps": 15485, "total_steps": 16520, "loss": 0.0856, "lr": 5.966081740454932e-07, "epoch": 18.746973365617432, "percentage": 93.73, "elapsed_time": "0:42:16", "remaining_time": "0:02:49", "throughput": 2503.18, "total_tokens": 6348768}
3116
+ {"current_steps": 15490, "total_steps": 16520, "loss": 0.176, "lr": 5.90886043173941e-07, "epoch": 18.753026634382568, "percentage": 93.77, "elapsed_time": "0:42:17", "remaining_time": "0:02:48", "throughput": 2503.2, "total_tokens": 6350848}
3117
+ {"current_steps": 15495, "total_steps": 16520, "loss": 0.1083, "lr": 5.851911572905711e-07, "epoch": 18.7590799031477, "percentage": 93.8, "elapsed_time": "0:42:17", "remaining_time": "0:02:47", "throughput": 2503.2, "total_tokens": 6352960}
3118
+ {"current_steps": 15500, "total_steps": 16520, "loss": 0.1052, "lr": 5.79523522751893e-07, "epoch": 18.765133171912833, "percentage": 93.83, "elapsed_time": "0:42:18", "remaining_time": "0:02:47", "throughput": 2503.24, "total_tokens": 6355200}
3119
+ {"current_steps": 15505, "total_steps": 16520, "loss": 0.1273, "lr": 5.738831458840243e-07, "epoch": 18.771186440677965, "percentage": 93.86, "elapsed_time": "0:42:19", "remaining_time": "0:02:46", "throughput": 2503.26, "total_tokens": 6357280}
3120
+ {"current_steps": 15510, "total_steps": 16520, "loss": 0.1265, "lr": 5.682700329826401e-07, "epoch": 18.777239709443098, "percentage": 93.89, "elapsed_time": "0:42:20", "remaining_time": "0:02:45", "throughput": 2503.3, "total_tokens": 6359328}
3121
+ {"current_steps": 15515, "total_steps": 16520, "loss": 0.0685, "lr": 5.626841903129954e-07, "epoch": 18.783292978208234, "percentage": 93.92, "elapsed_time": "0:42:21", "remaining_time": "0:02:44", "throughput": 2503.35, "total_tokens": 6361312}
3122
+ {"current_steps": 15520, "total_steps": 16520, "loss": 0.1262, "lr": 5.571256241098943e-07, "epoch": 18.789346246973366, "percentage": 93.95, "elapsed_time": "0:42:21", "remaining_time": "0:02:43", "throughput": 2503.35, "total_tokens": 6363360}
3123
+ {"current_steps": 15525, "total_steps": 16520, "loss": 0.074, "lr": 5.515943405777102e-07, "epoch": 18.7953995157385, "percentage": 93.98, "elapsed_time": "0:42:22", "remaining_time": "0:02:42", "throughput": 2503.43, "total_tokens": 6365344}
3124
+ {"current_steps": 15530, "total_steps": 16520, "loss": 0.1522, "lr": 5.460903458903488e-07, "epoch": 18.80145278450363, "percentage": 94.01, "elapsed_time": "0:42:23", "remaining_time": "0:02:42", "throughput": 2503.47, "total_tokens": 6367328}
3125
+ {"current_steps": 15535, "total_steps": 16520, "loss": 0.0787, "lr": 5.406136461912709e-07, "epoch": 18.807506053268764, "percentage": 94.04, "elapsed_time": "0:42:24", "remaining_time": "0:02:41", "throughput": 2503.49, "total_tokens": 6369408}
3126
+ {"current_steps": 15540, "total_steps": 16520, "loss": 0.1408, "lr": 5.351642475934587e-07, "epoch": 18.8135593220339, "percentage": 94.07, "elapsed_time": "0:42:25", "remaining_time": "0:02:40", "throughput": 2503.54, "total_tokens": 6371584}
3127
+ {"current_steps": 15545, "total_steps": 16520, "loss": 0.0928, "lr": 5.29742156179433e-07, "epoch": 18.819612590799032, "percentage": 94.1, "elapsed_time": "0:42:25", "remaining_time": "0:02:39", "throughput": 2503.58, "total_tokens": 6373664}
3128
+ {"current_steps": 15550, "total_steps": 16520, "loss": 0.0964, "lr": 5.243473780012248e-07, "epoch": 18.825665859564165, "percentage": 94.13, "elapsed_time": "0:42:26", "remaining_time": "0:02:38", "throughput": 2503.63, "total_tokens": 6375648}
3129
+ {"current_steps": 15555, "total_steps": 16520, "loss": 0.0714, "lr": 5.18979919080384e-07, "epoch": 18.831719128329297, "percentage": 94.16, "elapsed_time": "0:42:27", "remaining_time": "0:02:38", "throughput": 2503.66, "total_tokens": 6377792}
3130
+ {"current_steps": 15560, "total_steps": 16520, "loss": 0.1585, "lr": 5.136397854079655e-07, "epoch": 18.83777239709443, "percentage": 94.19, "elapsed_time": "0:42:28", "remaining_time": "0:02:37", "throughput": 2503.71, "total_tokens": 6379968}
3131
+ {"current_steps": 15565, "total_steps": 16520, "loss": 0.1043, "lr": 5.083269829445236e-07, "epoch": 18.843825665859566, "percentage": 94.22, "elapsed_time": "0:42:28", "remaining_time": "0:02:36", "throughput": 2503.77, "total_tokens": 6382080}
3132
+ {"current_steps": 15570, "total_steps": 16520, "loss": 0.1085, "lr": 5.030415176201093e-07, "epoch": 18.849878934624698, "percentage": 94.25, "elapsed_time": "0:42:29", "remaining_time": "0:02:35", "throughput": 2503.8, "total_tokens": 6384288}
3133
+ {"current_steps": 15575, "total_steps": 16520, "loss": 0.1129, "lr": 4.977833953342615e-07, "epoch": 18.85593220338983, "percentage": 94.28, "elapsed_time": "0:42:30", "remaining_time": "0:02:34", "throughput": 2503.82, "total_tokens": 6386304}
3134
+ {"current_steps": 15580, "total_steps": 16520, "loss": 0.139, "lr": 4.925526219559912e-07, "epoch": 18.861985472154963, "percentage": 94.31, "elapsed_time": "0:42:31", "remaining_time": "0:02:33", "throughput": 2503.89, "total_tokens": 6388544}
3135
+ {"current_steps": 15585, "total_steps": 16520, "loss": 0.0162, "lr": 4.873492033237864e-07, "epoch": 18.868038740920095, "percentage": 94.34, "elapsed_time": "0:42:32", "remaining_time": "0:02:33", "throughput": 2503.9, "total_tokens": 6390528}
3136
+ {"current_steps": 15590, "total_steps": 16520, "loss": 0.0686, "lr": 4.821731452456125e-07, "epoch": 18.87409200968523, "percentage": 94.37, "elapsed_time": "0:42:33", "remaining_time": "0:02:32", "throughput": 2503.95, "total_tokens": 6392608}
3137
+ {"current_steps": 15595, "total_steps": 16520, "loss": 0.0786, "lr": 4.770244534988754e-07, "epoch": 18.880145278450364, "percentage": 94.4, "elapsed_time": "0:42:33", "remaining_time": "0:02:31", "throughput": 2504.0, "total_tokens": 6394784}
3138
+ {"current_steps": 15600, "total_steps": 16520, "loss": 0.1351, "lr": 4.7190313383045637e-07, "epoch": 18.886198547215496, "percentage": 94.43, "elapsed_time": "0:42:34", "remaining_time": "0:02:30", "throughput": 2504.05, "total_tokens": 6396800}
3139
+ {"current_steps": 15605, "total_steps": 16520, "loss": 0.1016, "lr": 4.6680919195667137e-07, "epoch": 18.89225181598063, "percentage": 94.46, "elapsed_time": "0:42:35", "remaining_time": "0:02:29", "throughput": 2504.09, "total_tokens": 6398688}
3140
+ {"current_steps": 15610, "total_steps": 16520, "loss": 0.0627, "lr": 4.6174263356328075e-07, "epoch": 18.89830508474576, "percentage": 94.49, "elapsed_time": "0:42:36", "remaining_time": "0:02:29", "throughput": 2504.13, "total_tokens": 6400736}
3141
+ {"current_steps": 15615, "total_steps": 16520, "loss": 0.1248, "lr": 4.567034643054802e-07, "epoch": 18.904358353510897, "percentage": 94.52, "elapsed_time": "0:42:36", "remaining_time": "0:02:28", "throughput": 2504.18, "total_tokens": 6402720}
3142
+ {"current_steps": 15620, "total_steps": 16520, "loss": 0.1576, "lr": 4.5169168980789545e-07, "epoch": 18.91041162227603, "percentage": 94.55, "elapsed_time": "0:42:37", "remaining_time": "0:02:27", "throughput": 2504.21, "total_tokens": 6404832}
3143
+ {"current_steps": 15625, "total_steps": 16520, "loss": 0.0326, "lr": 4.4670731566457126e-07, "epoch": 18.916464891041162, "percentage": 94.58, "elapsed_time": "0:42:38", "remaining_time": "0:02:26", "throughput": 2504.22, "total_tokens": 6406816}
3144
+ {"current_steps": 15630, "total_steps": 16520, "loss": 0.0862, "lr": 4.4175034743897947e-07, "epoch": 18.922518159806295, "percentage": 94.61, "elapsed_time": "0:42:39", "remaining_time": "0:02:25", "throughput": 2504.21, "total_tokens": 6408832}
3145
+ {"current_steps": 15635, "total_steps": 16520, "loss": 0.0548, "lr": 4.368207906639804e-07, "epoch": 18.928571428571427, "percentage": 94.64, "elapsed_time": "0:42:39", "remaining_time": "0:02:24", "throughput": 2504.24, "total_tokens": 6410848}
3146
+ {"current_steps": 15640, "total_steps": 16520, "loss": 0.1499, "lr": 4.319186508418671e-07, "epoch": 18.934624697336563, "percentage": 94.67, "elapsed_time": "0:42:40", "remaining_time": "0:02:24", "throughput": 2504.29, "total_tokens": 6413024}
3147
+ {"current_steps": 15645, "total_steps": 16520, "loss": 0.0994, "lr": 4.270439334442988e-07, "epoch": 18.940677966101696, "percentage": 94.7, "elapsed_time": "0:42:41", "remaining_time": "0:02:23", "throughput": 2504.34, "total_tokens": 6415040}
3148
+ {"current_steps": 15650, "total_steps": 16520, "loss": 0.0711, "lr": 4.221966439123509e-07, "epoch": 18.946731234866828, "percentage": 94.73, "elapsed_time": "0:42:42", "remaining_time": "0:02:22", "throughput": 2504.36, "total_tokens": 6417216}
3149
+ {"current_steps": 15655, "total_steps": 16520, "loss": 0.0636, "lr": 4.173767876564788e-07, "epoch": 18.95278450363196, "percentage": 94.76, "elapsed_time": "0:42:43", "remaining_time": "0:02:21", "throughput": 2504.44, "total_tokens": 6419200}
3150
+ {"current_steps": 15660, "total_steps": 16520, "loss": 0.1007, "lr": 4.1258437005650687e-07, "epoch": 18.958837772397093, "percentage": 94.79, "elapsed_time": "0:42:43", "remaining_time": "0:02:20", "throughput": 2504.44, "total_tokens": 6421152}
3151
+ {"current_steps": 15665, "total_steps": 16520, "loss": 0.1656, "lr": 4.0781939646164226e-07, "epoch": 18.96489104116223, "percentage": 94.82, "elapsed_time": "0:42:44", "remaining_time": "0:02:19", "throughput": 2504.45, "total_tokens": 6423200}
3152
+ {"current_steps": 15670, "total_steps": 16520, "loss": 0.1192, "lr": 4.030818721904611e-07, "epoch": 18.97094430992736, "percentage": 94.85, "elapsed_time": "0:42:45", "remaining_time": "0:02:19", "throughput": 2504.47, "total_tokens": 6425216}
3153
+ {"current_steps": 15675, "total_steps": 16520, "loss": 0.0817, "lr": 3.983718025308947e-07, "epoch": 18.976997578692494, "percentage": 94.88, "elapsed_time": "0:42:46", "remaining_time": "0:02:18", "throughput": 2504.49, "total_tokens": 6427200}
3154
+ {"current_steps": 15680, "total_steps": 16520, "loss": 0.1085, "lr": 3.9368919274023475e-07, "epoch": 18.983050847457626, "percentage": 94.92, "elapsed_time": "0:42:47", "remaining_time": "0:02:17", "throughput": 2504.51, "total_tokens": 6429216}
3155
+ {"current_steps": 15685, "total_steps": 16520, "loss": 0.09, "lr": 3.890340480451199e-07, "epoch": 18.98910411622276, "percentage": 94.95, "elapsed_time": "0:42:47", "remaining_time": "0:02:16", "throughput": 2504.58, "total_tokens": 6431168}
3156
+ {"current_steps": 15690, "total_steps": 16520, "loss": 0.1607, "lr": 3.8440637364153265e-07, "epoch": 18.995157384987895, "percentage": 94.98, "elapsed_time": "0:42:48", "remaining_time": "0:02:15", "throughput": 2504.59, "total_tokens": 6433152}
3157
+ {"current_steps": 15694, "total_steps": 16520, "eval_loss": 0.14278624951839447, "epoch": 19.0, "percentage": 95.0, "elapsed_time": "0:42:53", "remaining_time": "0:02:15", "throughput": 2499.89, "total_tokens": 6434448}
3158
+ {"current_steps": 15695, "total_steps": 16520, "loss": 0.0971, "lr": 3.7980617469479953e-07, "epoch": 19.001210653753027, "percentage": 95.01, "elapsed_time": "0:42:54", "remaining_time": "0:02:15", "throughput": 2499.06, "total_tokens": 6434832}
3159
+ {"current_steps": 15700, "total_steps": 16520, "loss": 0.1391, "lr": 3.7523345633957153e-07, "epoch": 19.00726392251816, "percentage": 95.04, "elapsed_time": "0:42:55", "remaining_time": "0:02:14", "throughput": 2498.98, "total_tokens": 6436976}
3160
+ {"current_steps": 15705, "total_steps": 16520, "loss": 0.0693, "lr": 3.706882236798298e-07, "epoch": 19.013317191283292, "percentage": 95.07, "elapsed_time": "0:42:56", "remaining_time": "0:02:13", "throughput": 2499.03, "total_tokens": 6438896}