rbelanec commited on
Commit
34f95da
verified
1 Parent(s): 123fda7

Training in progress, step 36252

Browse files
Files changed (2) hide show
  1. adapter_model.safetensors +1 -1
  2. trainer_log.jsonl +383 -0
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:839f9343ab4c570769fd780d2043c03d5be3cbdcfc8af44444f537653df8ea78
3
  size 8388736
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b9bcb811e3bf6de432235111024192ae250c7e08b865f831efba6026dd8c0d8
3
  size 8388736
trainer_log.jsonl CHANGED
@@ -6890,3 +6890,386 @@
6890
  {"current_steps": 34360, "total_steps": 38150, "loss": 0.1973, "lr": 1.488964589940489e-06, "epoch": 9.006553079947576, "percentage": 90.07, "elapsed_time": "0:57:00", "remaining_time": "0:06:17", "throughput": 2693.79, "total_tokens": 9215224}
6891
  {"current_steps": 34365, "total_steps": 38150, "loss": 0.0915, "lr": 1.4850788801528653e-06, "epoch": 9.00786369593709, "percentage": 90.08, "elapsed_time": "0:57:01", "remaining_time": "0:06:16", "throughput": 2693.77, "total_tokens": 9216280}
6892
  {"current_steps": 34370, "total_steps": 38150, "loss": 0.0635, "lr": 1.4811980919976043e-06, "epoch": 9.009174311926605, "percentage": 90.09, "elapsed_time": "0:57:01", "remaining_time": "0:06:16", "throughput": 2693.8, "total_tokens": 9217608}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6890
  {"current_steps": 34360, "total_steps": 38150, "loss": 0.1973, "lr": 1.488964589940489e-06, "epoch": 9.006553079947576, "percentage": 90.07, "elapsed_time": "0:57:00", "remaining_time": "0:06:17", "throughput": 2693.79, "total_tokens": 9215224}
6891
  {"current_steps": 34365, "total_steps": 38150, "loss": 0.0915, "lr": 1.4850788801528653e-06, "epoch": 9.00786369593709, "percentage": 90.08, "elapsed_time": "0:57:01", "remaining_time": "0:06:16", "throughput": 2693.77, "total_tokens": 9216280}
6892
  {"current_steps": 34370, "total_steps": 38150, "loss": 0.0635, "lr": 1.4811980919976043e-06, "epoch": 9.009174311926605, "percentage": 90.09, "elapsed_time": "0:57:01", "remaining_time": "0:06:16", "throughput": 2693.8, "total_tokens": 9217608}
6893
+ {"current_steps": 34375, "total_steps": 38150, "loss": 0.1578, "lr": 1.4773222262869423e-06, "epoch": 9.01048492791612, "percentage": 90.1, "elapsed_time": "0:57:02", "remaining_time": "0:06:15", "throughput": 2693.85, "total_tokens": 9219000}
6894
+ {"current_steps": 34380, "total_steps": 38150, "loss": 0.0768, "lr": 1.4734512838320974e-06, "epoch": 9.011795543905636, "percentage": 90.12, "elapsed_time": "0:57:02", "remaining_time": "0:06:15", "throughput": 2693.93, "total_tokens": 9220488}
6895
+ {"current_steps": 34385, "total_steps": 38150, "loss": 0.1303, "lr": 1.4695852654432463e-06, "epoch": 9.01310615989515, "percentage": 90.13, "elapsed_time": "0:57:03", "remaining_time": "0:06:14", "throughput": 2693.94, "total_tokens": 9221640}
6896
+ {"current_steps": 34390, "total_steps": 38150, "loss": 0.1586, "lr": 1.4657241719295362e-06, "epoch": 9.014416775884666, "percentage": 90.14, "elapsed_time": "0:57:03", "remaining_time": "0:06:14", "throughput": 2693.98, "total_tokens": 9223064}
6897
+ {"current_steps": 34395, "total_steps": 38150, "loss": 0.1394, "lr": 1.4618680040990983e-06, "epoch": 9.015727391874181, "percentage": 90.16, "elapsed_time": "0:57:03", "remaining_time": "0:06:13", "throughput": 2693.98, "total_tokens": 9224184}
6898
+ {"current_steps": 34400, "total_steps": 38150, "loss": 0.1608, "lr": 1.4580167627590153e-06, "epoch": 9.017038007863697, "percentage": 90.17, "elapsed_time": "0:57:04", "remaining_time": "0:06:13", "throughput": 2694.08, "total_tokens": 9225816}
6899
+ {"current_steps": 34405, "total_steps": 38150, "loss": 0.079, "lr": 1.4541704487153474e-06, "epoch": 9.01834862385321, "percentage": 90.18, "elapsed_time": "0:57:04", "remaining_time": "0:06:12", "throughput": 2694.07, "total_tokens": 9226888}
6900
+ {"current_steps": 34410, "total_steps": 38150, "loss": 0.18, "lr": 1.4503290627731175e-06, "epoch": 9.019659239842726, "percentage": 90.2, "elapsed_time": "0:57:05", "remaining_time": "0:06:12", "throughput": 2694.24, "total_tokens": 9229096}
6901
+ {"current_steps": 34415, "total_steps": 38150, "loss": 0.0974, "lr": 1.4464926057363298e-06, "epoch": 9.020969855832242, "percentage": 90.21, "elapsed_time": "0:57:05", "remaining_time": "0:06:11", "throughput": 2694.16, "total_tokens": 9229896}
6902
+ {"current_steps": 34420, "total_steps": 38150, "loss": 0.1185, "lr": 1.4426610784079391e-06, "epoch": 9.022280471821757, "percentage": 90.22, "elapsed_time": "0:57:06", "remaining_time": "0:06:11", "throughput": 2694.12, "total_tokens": 9230872}
6903
+ {"current_steps": 34425, "total_steps": 38150, "loss": 0.1647, "lr": 1.438834481589882e-06, "epoch": 9.02359108781127, "percentage": 90.24, "elapsed_time": "0:57:06", "remaining_time": "0:06:10", "throughput": 2694.19, "total_tokens": 9232296}
6904
+ {"current_steps": 34430, "total_steps": 38150, "loss": 0.1677, "lr": 1.4350128160830562e-06, "epoch": 9.024901703800786, "percentage": 90.25, "elapsed_time": "0:57:07", "remaining_time": "0:06:10", "throughput": 2694.26, "total_tokens": 9233880}
6905
+ {"current_steps": 34435, "total_steps": 38150, "loss": 0.1668, "lr": 1.4311960826873305e-06, "epoch": 9.026212319790302, "percentage": 90.26, "elapsed_time": "0:57:07", "remaining_time": "0:06:09", "throughput": 2694.27, "total_tokens": 9235000}
6906
+ {"current_steps": 34440, "total_steps": 38150, "loss": 0.1543, "lr": 1.427384282201541e-06, "epoch": 9.027522935779816, "percentage": 90.28, "elapsed_time": "0:57:08", "remaining_time": "0:06:09", "throughput": 2694.29, "total_tokens": 9236232}
6907
+ {"current_steps": 34445, "total_steps": 38150, "loss": 0.1273, "lr": 1.4235774154234855e-06, "epoch": 9.028833551769331, "percentage": 90.29, "elapsed_time": "0:57:08", "remaining_time": "0:06:08", "throughput": 2694.35, "total_tokens": 9237656}
6908
+ {"current_steps": 34450, "total_steps": 38150, "loss": 0.1291, "lr": 1.4197754831499488e-06, "epoch": 9.030144167758847, "percentage": 90.3, "elapsed_time": "0:57:08", "remaining_time": "0:06:08", "throughput": 2694.4, "total_tokens": 9238984}
6909
+ {"current_steps": 34455, "total_steps": 38150, "loss": 0.1061, "lr": 1.4159784861766584e-06, "epoch": 9.031454783748362, "percentage": 90.31, "elapsed_time": "0:57:09", "remaining_time": "0:06:07", "throughput": 2694.38, "total_tokens": 9240024}
6910
+ {"current_steps": 34460, "total_steps": 38150, "loss": 0.1514, "lr": 1.4121864252983174e-06, "epoch": 9.032765399737876, "percentage": 90.33, "elapsed_time": "0:57:09", "remaining_time": "0:06:07", "throughput": 2694.42, "total_tokens": 9241416}
6911
+ {"current_steps": 34465, "total_steps": 38150, "loss": 0.0981, "lr": 1.408399301308605e-06, "epoch": 9.034076015727392, "percentage": 90.34, "elapsed_time": "0:57:10", "remaining_time": "0:06:06", "throughput": 2694.45, "total_tokens": 9242712}
6912
+ {"current_steps": 34470, "total_steps": 38150, "loss": 0.213, "lr": 1.4046171150001508e-06, "epoch": 9.035386631716907, "percentage": 90.35, "elapsed_time": "0:57:10", "remaining_time": "0:06:06", "throughput": 2694.57, "total_tokens": 9244584}
6913
+ {"current_steps": 34475, "total_steps": 38150, "loss": 0.1191, "lr": 1.4008398671645688e-06, "epoch": 9.036697247706423, "percentage": 90.37, "elapsed_time": "0:57:11", "remaining_time": "0:06:05", "throughput": 2694.62, "total_tokens": 9246040}
6914
+ {"current_steps": 34480, "total_steps": 38150, "loss": 0.1186, "lr": 1.3970675585924241e-06, "epoch": 9.038007863695936, "percentage": 90.38, "elapsed_time": "0:57:11", "remaining_time": "0:06:05", "throughput": 2694.56, "total_tokens": 9246936}
6915
+ {"current_steps": 34485, "total_steps": 38150, "loss": 0.1932, "lr": 1.3933001900732572e-06, "epoch": 9.039318479685452, "percentage": 90.39, "elapsed_time": "0:57:12", "remaining_time": "0:06:04", "throughput": 2694.55, "total_tokens": 9248104}
6916
+ {"current_steps": 34490, "total_steps": 38150, "loss": 0.101, "lr": 1.3895377623955707e-06, "epoch": 9.040629095674968, "percentage": 90.41, "elapsed_time": "0:57:12", "remaining_time": "0:06:04", "throughput": 2694.64, "total_tokens": 9249848}
6917
+ {"current_steps": 34495, "total_steps": 38150, "loss": 0.1304, "lr": 1.3857802763468403e-06, "epoch": 9.041939711664483, "percentage": 90.42, "elapsed_time": "0:57:13", "remaining_time": "0:06:03", "throughput": 2694.68, "total_tokens": 9251144}
6918
+ {"current_steps": 34500, "total_steps": 38150, "loss": 0.1539, "lr": 1.3820277327134866e-06, "epoch": 9.043250327653997, "percentage": 90.43, "elapsed_time": "0:57:13", "remaining_time": "0:06:03", "throughput": 2694.7, "total_tokens": 9252472}
6919
+ {"current_steps": 34505, "total_steps": 38150, "loss": 0.0882, "lr": 1.3782801322809262e-06, "epoch": 9.044560943643512, "percentage": 90.45, "elapsed_time": "0:57:13", "remaining_time": "0:06:02", "throughput": 2694.7, "total_tokens": 9253576}
6920
+ {"current_steps": 34510, "total_steps": 38150, "loss": 0.0872, "lr": 1.374537475833515e-06, "epoch": 9.045871559633028, "percentage": 90.46, "elapsed_time": "0:57:14", "remaining_time": "0:06:02", "throughput": 2694.76, "total_tokens": 9254968}
6921
+ {"current_steps": 34515, "total_steps": 38150, "loss": 0.2852, "lr": 1.37079976415459e-06, "epoch": 9.047182175622543, "percentage": 90.47, "elapsed_time": "0:57:14", "remaining_time": "0:06:01", "throughput": 2694.84, "total_tokens": 9256440}
6922
+ {"current_steps": 34520, "total_steps": 38150, "loss": 0.1108, "lr": 1.3670669980264477e-06, "epoch": 9.048492791612057, "percentage": 90.48, "elapsed_time": "0:57:15", "remaining_time": "0:06:01", "throughput": 2694.87, "total_tokens": 9257688}
6923
+ {"current_steps": 34525, "total_steps": 38150, "loss": 0.0785, "lr": 1.3633391782303468e-06, "epoch": 9.049803407601573, "percentage": 90.5, "elapsed_time": "0:57:15", "remaining_time": "0:06:00", "throughput": 2694.83, "total_tokens": 9258664}
6924
+ {"current_steps": 34530, "total_steps": 38150, "loss": 0.1338, "lr": 1.3596163055465154e-06, "epoch": 9.051114023591088, "percentage": 90.51, "elapsed_time": "0:57:16", "remaining_time": "0:06:00", "throughput": 2694.93, "total_tokens": 9260344}
6925
+ {"current_steps": 34535, "total_steps": 38150, "loss": 0.2299, "lr": 1.3558983807541476e-06, "epoch": 9.052424639580602, "percentage": 90.52, "elapsed_time": "0:57:16", "remaining_time": "0:05:59", "throughput": 2695.15, "total_tokens": 9263144}
6926
+ {"current_steps": 34540, "total_steps": 38150, "loss": 0.1693, "lr": 1.3521854046313897e-06, "epoch": 9.053735255570118, "percentage": 90.54, "elapsed_time": "0:57:17", "remaining_time": "0:05:59", "throughput": 2695.11, "total_tokens": 9264136}
6927
+ {"current_steps": 34545, "total_steps": 38150, "loss": 0.2379, "lr": 1.3484773779553677e-06, "epoch": 9.055045871559633, "percentage": 90.55, "elapsed_time": "0:57:17", "remaining_time": "0:05:58", "throughput": 2695.32, "total_tokens": 9266360}
6928
+ {"current_steps": 34550, "total_steps": 38150, "loss": 0.1891, "lr": 1.3447743015021636e-06, "epoch": 9.056356487549149, "percentage": 90.56, "elapsed_time": "0:57:18", "remaining_time": "0:05:58", "throughput": 2695.33, "total_tokens": 9267624}
6929
+ {"current_steps": 34555, "total_steps": 38150, "loss": 0.1979, "lr": 1.3410761760468265e-06, "epoch": 9.057667103538662, "percentage": 90.58, "elapsed_time": "0:57:18", "remaining_time": "0:05:57", "throughput": 2695.4, "total_tokens": 9269112}
6930
+ {"current_steps": 34560, "total_steps": 38150, "loss": 0.1239, "lr": 1.3373830023633598e-06, "epoch": 9.058977719528178, "percentage": 90.59, "elapsed_time": "0:57:19", "remaining_time": "0:05:57", "throughput": 2695.45, "total_tokens": 9270440}
6931
+ {"current_steps": 34565, "total_steps": 38150, "loss": 0.0569, "lr": 1.3336947812247507e-06, "epoch": 9.060288335517694, "percentage": 90.6, "elapsed_time": "0:57:19", "remaining_time": "0:05:56", "throughput": 2695.47, "total_tokens": 9271704}
6932
+ {"current_steps": 34570, "total_steps": 38150, "loss": 0.0781, "lr": 1.3300115134029345e-06, "epoch": 9.061598951507209, "percentage": 90.62, "elapsed_time": "0:57:20", "remaining_time": "0:05:56", "throughput": 2695.5, "total_tokens": 9272984}
6933
+ {"current_steps": 34575, "total_steps": 38150, "loss": 0.0586, "lr": 1.3263331996688055e-06, "epoch": 9.062909567496723, "percentage": 90.63, "elapsed_time": "0:57:20", "remaining_time": "0:05:55", "throughput": 2695.57, "total_tokens": 9274488}
6934
+ {"current_steps": 34580, "total_steps": 38150, "loss": 0.1368, "lr": 1.3226598407922342e-06, "epoch": 9.064220183486238, "percentage": 90.64, "elapsed_time": "0:57:21", "remaining_time": "0:05:55", "throughput": 2695.72, "total_tokens": 9276504}
6935
+ {"current_steps": 34585, "total_steps": 38150, "loss": 0.1066, "lr": 1.318991437542047e-06, "epoch": 9.065530799475754, "percentage": 90.66, "elapsed_time": "0:57:21", "remaining_time": "0:05:54", "throughput": 2695.82, "total_tokens": 9278200}
6936
+ {"current_steps": 34590, "total_steps": 38150, "loss": 0.1826, "lr": 1.3153279906860355e-06, "epoch": 9.06684141546527, "percentage": 90.67, "elapsed_time": "0:57:22", "remaining_time": "0:05:54", "throughput": 2695.9, "total_tokens": 9279880}
6937
+ {"current_steps": 34595, "total_steps": 38150, "loss": 0.0608, "lr": 1.3116695009909474e-06, "epoch": 9.068152031454783, "percentage": 90.68, "elapsed_time": "0:57:22", "remaining_time": "0:05:53", "throughput": 2695.88, "total_tokens": 9280952}
6938
+ {"current_steps": 34600, "total_steps": 38150, "loss": 0.1406, "lr": 1.3080159692225063e-06, "epoch": 9.069462647444299, "percentage": 90.69, "elapsed_time": "0:57:23", "remaining_time": "0:05:53", "throughput": 2695.86, "total_tokens": 9282008}
6939
+ {"current_steps": 34605, "total_steps": 38150, "loss": 0.1662, "lr": 1.304367396145384e-06, "epoch": 9.070773263433814, "percentage": 90.71, "elapsed_time": "0:57:23", "remaining_time": "0:05:52", "throughput": 2695.89, "total_tokens": 9283304}
6940
+ {"current_steps": 34610, "total_steps": 38150, "loss": 0.1283, "lr": 1.300723782523225e-06, "epoch": 9.07208387942333, "percentage": 90.72, "elapsed_time": "0:57:23", "remaining_time": "0:05:52", "throughput": 2695.95, "total_tokens": 9284696}
6941
+ {"current_steps": 34615, "total_steps": 38150, "loss": 0.0908, "lr": 1.2970851291186276e-06, "epoch": 9.073394495412844, "percentage": 90.73, "elapsed_time": "0:57:24", "remaining_time": "0:05:51", "throughput": 2695.94, "total_tokens": 9285880}
6942
+ {"current_steps": 34620, "total_steps": 38150, "loss": 0.0626, "lr": 1.2934514366931578e-06, "epoch": 9.07470511140236, "percentage": 90.75, "elapsed_time": "0:57:24", "remaining_time": "0:05:51", "throughput": 2695.96, "total_tokens": 9287112}
6943
+ {"current_steps": 34625, "total_steps": 38150, "loss": 0.1082, "lr": 1.2898227060073403e-06, "epoch": 9.076015727391875, "percentage": 90.76, "elapsed_time": "0:57:25", "remaining_time": "0:05:50", "throughput": 2696.03, "total_tokens": 9288488}
6944
+ {"current_steps": 34630, "total_steps": 38150, "loss": 0.1496, "lr": 1.2861989378206624e-06, "epoch": 9.077326343381388, "percentage": 90.77, "elapsed_time": "0:57:25", "remaining_time": "0:05:50", "throughput": 2696.06, "total_tokens": 9290072}
6945
+ {"current_steps": 34635, "total_steps": 38150, "loss": 0.1152, "lr": 1.2825801328915699e-06, "epoch": 9.078636959370904, "percentage": 90.79, "elapsed_time": "0:57:26", "remaining_time": "0:05:49", "throughput": 2696.05, "total_tokens": 9291160}
6946
+ {"current_steps": 34640, "total_steps": 38150, "loss": 0.1418, "lr": 1.2789662919774792e-06, "epoch": 9.07994757536042, "percentage": 90.8, "elapsed_time": "0:57:26", "remaining_time": "0:05:49", "throughput": 2696.07, "total_tokens": 9292408}
6947
+ {"current_steps": 34645, "total_steps": 38150, "loss": 0.2247, "lr": 1.2753574158347547e-06, "epoch": 9.081258191349935, "percentage": 90.81, "elapsed_time": "0:57:27", "remaining_time": "0:05:48", "throughput": 2696.2, "total_tokens": 9294296}
6948
+ {"current_steps": 34650, "total_steps": 38150, "loss": 0.1494, "lr": 1.271753505218734e-06, "epoch": 9.082568807339449, "percentage": 90.83, "elapsed_time": "0:57:27", "remaining_time": "0:05:48", "throughput": 2696.16, "total_tokens": 9295256}
6949
+ {"current_steps": 34655, "total_steps": 38150, "loss": 0.1617, "lr": 1.2681545608837026e-06, "epoch": 9.083879423328964, "percentage": 90.84, "elapsed_time": "0:57:28", "remaining_time": "0:05:47", "throughput": 2696.22, "total_tokens": 9296696}
6950
+ {"current_steps": 34660, "total_steps": 38150, "loss": 0.1594, "lr": 1.2645605835829132e-06, "epoch": 9.08519003931848, "percentage": 90.85, "elapsed_time": "0:57:28", "remaining_time": "0:05:47", "throughput": 2696.38, "total_tokens": 9298792}
6951
+ {"current_steps": 34665, "total_steps": 38150, "loss": 0.1629, "lr": 1.2609715740685869e-06, "epoch": 9.086500655307995, "percentage": 90.87, "elapsed_time": "0:57:29", "remaining_time": "0:05:46", "throughput": 2696.45, "total_tokens": 9300456}
6952
+ {"current_steps": 34670, "total_steps": 38150, "loss": 0.1262, "lr": 1.257387533091889e-06, "epoch": 9.08781127129751, "percentage": 90.88, "elapsed_time": "0:57:29", "remaining_time": "0:05:46", "throughput": 2696.54, "total_tokens": 9302104}
6953
+ {"current_steps": 34675, "total_steps": 38150, "loss": 0.1103, "lr": 1.2538084614029527e-06, "epoch": 9.089121887287025, "percentage": 90.89, "elapsed_time": "0:57:30", "remaining_time": "0:05:45", "throughput": 2696.54, "total_tokens": 9303256}
6954
+ {"current_steps": 34680, "total_steps": 38150, "loss": 0.1129, "lr": 1.2502343597508792e-06, "epoch": 9.09043250327654, "percentage": 90.9, "elapsed_time": "0:57:30", "remaining_time": "0:05:45", "throughput": 2696.62, "total_tokens": 9304776}
6955
+ {"current_steps": 34685, "total_steps": 38150, "loss": 0.3951, "lr": 1.2466652288837227e-06, "epoch": 9.091743119266056, "percentage": 90.92, "elapsed_time": "0:57:31", "remaining_time": "0:05:44", "throughput": 2696.68, "total_tokens": 9306296}
6956
+ {"current_steps": 34690, "total_steps": 38150, "loss": 0.1972, "lr": 1.2431010695484858e-06, "epoch": 9.09305373525557, "percentage": 90.93, "elapsed_time": "0:57:31", "remaining_time": "0:05:44", "throughput": 2696.77, "total_tokens": 9307800}
6957
+ {"current_steps": 34695, "total_steps": 38150, "loss": 0.1351, "lr": 1.2395418824911464e-06, "epoch": 9.094364351245085, "percentage": 90.94, "elapsed_time": "0:57:31", "remaining_time": "0:05:43", "throughput": 2696.81, "total_tokens": 9309128}
6958
+ {"current_steps": 34700, "total_steps": 38150, "loss": 0.1244, "lr": 1.2359876684566368e-06, "epoch": 9.0956749672346, "percentage": 90.96, "elapsed_time": "0:57:32", "remaining_time": "0:05:43", "throughput": 2696.87, "total_tokens": 9310520}
6959
+ {"current_steps": 34705, "total_steps": 38150, "loss": 0.1674, "lr": 1.232438428188848e-06, "epoch": 9.096985583224116, "percentage": 90.97, "elapsed_time": "0:57:32", "remaining_time": "0:05:42", "throughput": 2696.86, "total_tokens": 9311640}
6960
+ {"current_steps": 34710, "total_steps": 38150, "loss": 0.1018, "lr": 1.228894162430627e-06, "epoch": 9.09829619921363, "percentage": 90.98, "elapsed_time": "0:57:33", "remaining_time": "0:05:42", "throughput": 2696.82, "total_tokens": 9312568}
6961
+ {"current_steps": 34715, "total_steps": 38150, "loss": 0.1521, "lr": 1.225354871923784e-06, "epoch": 9.099606815203146, "percentage": 91.0, "elapsed_time": "0:57:33", "remaining_time": "0:05:41", "throughput": 2696.92, "total_tokens": 9314424}
6962
+ {"current_steps": 34720, "total_steps": 38150, "loss": 0.1366, "lr": 1.2218205574090896e-06, "epoch": 9.100917431192661, "percentage": 91.01, "elapsed_time": "0:57:34", "remaining_time": "0:05:41", "throughput": 2697.03, "total_tokens": 9316008}
6963
+ {"current_steps": 34725, "total_steps": 38150, "loss": 0.096, "lr": 1.2182912196262664e-06, "epoch": 9.102228047182175, "percentage": 91.02, "elapsed_time": "0:57:34", "remaining_time": "0:05:40", "throughput": 2697.03, "total_tokens": 9317128}
6964
+ {"current_steps": 34730, "total_steps": 38150, "loss": 0.1461, "lr": 1.2147668593139982e-06, "epoch": 9.10353866317169, "percentage": 91.04, "elapsed_time": "0:57:35", "remaining_time": "0:05:40", "throughput": 2697.02, "total_tokens": 9318216}
6965
+ {"current_steps": 34735, "total_steps": 38150, "loss": 0.1207, "lr": 1.2112474772099285e-06, "epoch": 9.104849279161206, "percentage": 91.05, "elapsed_time": "0:57:35", "remaining_time": "0:05:39", "throughput": 2696.97, "total_tokens": 9319208}
6966
+ {"current_steps": 34740, "total_steps": 38150, "loss": 0.1102, "lr": 1.2077330740506598e-06, "epoch": 9.106159895150721, "percentage": 91.06, "elapsed_time": "0:57:35", "remaining_time": "0:05:39", "throughput": 2696.95, "total_tokens": 9320248}
6967
+ {"current_steps": 34745, "total_steps": 38150, "loss": 0.1088, "lr": 1.2042236505717452e-06, "epoch": 9.107470511140235, "percentage": 91.07, "elapsed_time": "0:57:36", "remaining_time": "0:05:38", "throughput": 2697.05, "total_tokens": 9321912}
6968
+ {"current_steps": 34750, "total_steps": 38150, "loss": 0.0969, "lr": 1.2007192075077085e-06, "epoch": 9.10878112712975, "percentage": 91.09, "elapsed_time": "0:57:36", "remaining_time": "0:05:38", "throughput": 2697.05, "total_tokens": 9323080}
6969
+ {"current_steps": 34755, "total_steps": 38150, "loss": 0.1627, "lr": 1.1972197455920181e-06, "epoch": 9.110091743119266, "percentage": 91.1, "elapsed_time": "0:57:37", "remaining_time": "0:05:37", "throughput": 2697.09, "total_tokens": 9324328}
6970
+ {"current_steps": 34760, "total_steps": 38150, "loss": 0.0799, "lr": 1.193725265557108e-06, "epoch": 9.111402359108782, "percentage": 91.11, "elapsed_time": "0:57:37", "remaining_time": "0:05:37", "throughput": 2697.05, "total_tokens": 9325320}
6971
+ {"current_steps": 34765, "total_steps": 38150, "loss": 0.1456, "lr": 1.1902357681343623e-06, "epoch": 9.112712975098296, "percentage": 91.13, "elapsed_time": "0:57:38", "remaining_time": "0:05:36", "throughput": 2697.09, "total_tokens": 9326632}
6972
+ {"current_steps": 34770, "total_steps": 38150, "loss": 0.1347, "lr": 1.1867512540541388e-06, "epoch": 9.114023591087811, "percentage": 91.14, "elapsed_time": "0:57:38", "remaining_time": "0:05:36", "throughput": 2697.09, "total_tokens": 9327768}
6973
+ {"current_steps": 34775, "total_steps": 38150, "loss": 0.15, "lr": 1.1832717240457231e-06, "epoch": 9.115334207077327, "percentage": 91.15, "elapsed_time": "0:57:38", "remaining_time": "0:05:35", "throughput": 2697.16, "total_tokens": 9329176}
6974
+ {"current_steps": 34780, "total_steps": 38150, "loss": 0.0989, "lr": 1.1797971788373857e-06, "epoch": 9.116644823066842, "percentage": 91.17, "elapsed_time": "0:57:39", "remaining_time": "0:05:35", "throughput": 2697.19, "total_tokens": 9330504}
6975
+ {"current_steps": 34785, "total_steps": 38150, "loss": 0.1233, "lr": 1.1763276191563422e-06, "epoch": 9.117955439056356, "percentage": 91.18, "elapsed_time": "0:57:39", "remaining_time": "0:05:34", "throughput": 2697.22, "total_tokens": 9331768}
6976
+ {"current_steps": 34790, "total_steps": 38150, "loss": 0.1091, "lr": 1.1728630457287587e-06, "epoch": 9.119266055045872, "percentage": 91.19, "elapsed_time": "0:57:40", "remaining_time": "0:05:34", "throughput": 2697.21, "total_tokens": 9332824}
6977
+ {"current_steps": 34795, "total_steps": 38150, "loss": 0.1607, "lr": 1.1694034592797747e-06, "epoch": 9.120576671035387, "percentage": 91.21, "elapsed_time": "0:57:40", "remaining_time": "0:05:33", "throughput": 2697.33, "total_tokens": 9334616}
6978
+ {"current_steps": 34800, "total_steps": 38150, "loss": 0.1656, "lr": 1.1659488605334695e-06, "epoch": 9.1218872870249, "percentage": 91.22, "elapsed_time": "0:57:41", "remaining_time": "0:05:33", "throughput": 2697.36, "total_tokens": 9335864}
6979
+ {"current_steps": 34805, "total_steps": 38150, "loss": 0.2345, "lr": 1.1624992502128868e-06, "epoch": 9.123197903014416, "percentage": 91.23, "elapsed_time": "0:57:41", "remaining_time": "0:05:32", "throughput": 2697.42, "total_tokens": 9337336}
6980
+ {"current_steps": 34810, "total_steps": 38150, "loss": 0.0748, "lr": 1.159054629040024e-06, "epoch": 9.124508519003932, "percentage": 91.25, "elapsed_time": "0:57:42", "remaining_time": "0:05:32", "throughput": 2697.45, "total_tokens": 9338600}
6981
+ {"current_steps": 34815, "total_steps": 38150, "loss": 0.0783, "lr": 1.1556149977358295e-06, "epoch": 9.125819134993447, "percentage": 91.26, "elapsed_time": "0:57:42", "remaining_time": "0:05:31", "throughput": 2697.44, "total_tokens": 9339704}
6982
+ {"current_steps": 34820, "total_steps": 38150, "loss": 0.0724, "lr": 1.1521803570202188e-06, "epoch": 9.127129750982961, "percentage": 91.27, "elapsed_time": "0:57:42", "remaining_time": "0:05:31", "throughput": 2697.41, "total_tokens": 9340712}
6983
+ {"current_steps": 34825, "total_steps": 38150, "loss": 0.1074, "lr": 1.1487507076120507e-06, "epoch": 9.128440366972477, "percentage": 91.28, "elapsed_time": "0:57:43", "remaining_time": "0:05:30", "throughput": 2697.35, "total_tokens": 9341640}
6984
+ {"current_steps": 34830, "total_steps": 38150, "loss": 0.2548, "lr": 1.1453260502291507e-06, "epoch": 9.129750982961992, "percentage": 91.3, "elapsed_time": "0:57:43", "remaining_time": "0:05:30", "throughput": 2697.41, "total_tokens": 9343096}
6985
+ {"current_steps": 34835, "total_steps": 38150, "loss": 0.136, "lr": 1.14190638558829e-06, "epoch": 9.131061598951508, "percentage": 91.31, "elapsed_time": "0:57:44", "remaining_time": "0:05:29", "throughput": 2697.4, "total_tokens": 9344184}
6986
+ {"current_steps": 34840, "total_steps": 38150, "loss": 0.1355, "lr": 1.1384917144051987e-06, "epoch": 9.132372214941022, "percentage": 91.32, "elapsed_time": "0:57:44", "remaining_time": "0:05:29", "throughput": 2697.45, "total_tokens": 9345512}
6987
+ {"current_steps": 34845, "total_steps": 38150, "loss": 0.1417, "lr": 1.135082037394561e-06, "epoch": 9.133682830930537, "percentage": 91.34, "elapsed_time": "0:57:45", "remaining_time": "0:05:28", "throughput": 2697.51, "total_tokens": 9346888}
6988
+ {"current_steps": 34850, "total_steps": 38150, "loss": 0.1402, "lr": 1.1316773552700193e-06, "epoch": 9.134993446920053, "percentage": 91.35, "elapsed_time": "0:57:45", "remaining_time": "0:05:28", "throughput": 2697.52, "total_tokens": 9348104}
6989
+ {"current_steps": 34855, "total_steps": 38150, "loss": 0.2211, "lr": 1.1282776687441626e-06, "epoch": 9.136304062909568, "percentage": 91.36, "elapsed_time": "0:57:45", "remaining_time": "0:05:27", "throughput": 2697.59, "total_tokens": 9349640}
6990
+ {"current_steps": 34860, "total_steps": 38150, "loss": 0.1368, "lr": 1.1248829785285463e-06, "epoch": 9.137614678899082, "percentage": 91.38, "elapsed_time": "0:57:46", "remaining_time": "0:05:27", "throughput": 2697.63, "total_tokens": 9350984}
6991
+ {"current_steps": 34865, "total_steps": 38150, "loss": 0.1729, "lr": 1.121493285333669e-06, "epoch": 9.138925294888598, "percentage": 91.39, "elapsed_time": "0:57:46", "remaining_time": "0:05:26", "throughput": 2697.65, "total_tokens": 9352200}
6992
+ {"current_steps": 34870, "total_steps": 38150, "loss": 0.1574, "lr": 1.1181085898689881e-06, "epoch": 9.140235910878113, "percentage": 91.4, "elapsed_time": "0:57:47", "remaining_time": "0:05:26", "throughput": 2697.69, "total_tokens": 9353656}
6993
+ {"current_steps": 34875, "total_steps": 38150, "loss": 0.0941, "lr": 1.1147288928429118e-06, "epoch": 9.141546526867629, "percentage": 91.42, "elapsed_time": "0:57:47", "remaining_time": "0:05:25", "throughput": 2697.74, "total_tokens": 9355032}
6994
+ {"current_steps": 34880, "total_steps": 38150, "loss": 0.1459, "lr": 1.1113541949628104e-06, "epoch": 9.142857142857142, "percentage": 91.43, "elapsed_time": "0:57:48", "remaining_time": "0:05:25", "throughput": 2697.75, "total_tokens": 9356248}
6995
+ {"current_steps": 34885, "total_steps": 38150, "loss": 0.0994, "lr": 1.1079844969350023e-06, "epoch": 9.144167758846658, "percentage": 91.44, "elapsed_time": "0:57:48", "remaining_time": "0:05:24", "throughput": 2697.75, "total_tokens": 9357480}
6996
+ {"current_steps": 34890, "total_steps": 38150, "loss": 0.11, "lr": 1.104619799464754e-06, "epoch": 9.145478374836173, "percentage": 91.45, "elapsed_time": "0:57:49", "remaining_time": "0:05:24", "throughput": 2697.71, "total_tokens": 9358472}
6997
+ {"current_steps": 34895, "total_steps": 38150, "loss": 0.1664, "lr": 1.1012601032562935e-06, "epoch": 9.146788990825687, "percentage": 91.47, "elapsed_time": "0:57:49", "remaining_time": "0:05:23", "throughput": 2697.79, "total_tokens": 9360072}
6998
+ {"current_steps": 34900, "total_steps": 38150, "loss": 0.1578, "lr": 1.0979054090127978e-06, "epoch": 9.148099606815203, "percentage": 91.48, "elapsed_time": "0:57:49", "remaining_time": "0:05:23", "throughput": 2697.82, "total_tokens": 9361432}
6999
+ {"current_steps": 34905, "total_steps": 38150, "loss": 0.1322, "lr": 1.094555717436399e-06, "epoch": 9.149410222804718, "percentage": 91.49, "elapsed_time": "0:57:50", "remaining_time": "0:05:22", "throughput": 2697.83, "total_tokens": 9362600}
7000
+ {"current_steps": 34910, "total_steps": 38150, "loss": 0.125, "lr": 1.0912110292281868e-06, "epoch": 9.150720838794234, "percentage": 91.51, "elapsed_time": "0:57:50", "remaining_time": "0:05:22", "throughput": 2697.86, "total_tokens": 9363832}
7001
+ {"current_steps": 34915, "total_steps": 38150, "loss": 0.1194, "lr": 1.0878713450881928e-06, "epoch": 9.152031454783748, "percentage": 91.52, "elapsed_time": "0:57:51", "remaining_time": "0:05:21", "throughput": 2697.9, "total_tokens": 9365160}
7002
+ {"current_steps": 34920, "total_steps": 38150, "loss": 0.4938, "lr": 1.084536665715416e-06, "epoch": 9.153342070773263, "percentage": 91.53, "elapsed_time": "0:57:51", "remaining_time": "0:05:21", "throughput": 2697.94, "total_tokens": 9366504}
7003
+ {"current_steps": 34925, "total_steps": 38150, "loss": 0.1065, "lr": 1.0812069918077844e-06, "epoch": 9.154652686762779, "percentage": 91.55, "elapsed_time": "0:57:52", "remaining_time": "0:05:20", "throughput": 2697.96, "total_tokens": 9367768}
7004
+ {"current_steps": 34930, "total_steps": 38150, "loss": 0.0997, "lr": 1.0778823240622043e-06, "epoch": 9.155963302752294, "percentage": 91.56, "elapsed_time": "0:57:52", "remaining_time": "0:05:20", "throughput": 2697.95, "total_tokens": 9368872}
7005
+ {"current_steps": 34935, "total_steps": 38150, "loss": 0.2265, "lr": 1.0745626631745193e-06, "epoch": 9.157273918741808, "percentage": 91.57, "elapsed_time": "0:57:53", "remaining_time": "0:05:19", "throughput": 2698.19, "total_tokens": 9371912}
7006
+ {"current_steps": 34940, "total_steps": 38150, "loss": 0.1502, "lr": 1.0712480098395289e-06, "epoch": 9.158584534731324, "percentage": 91.59, "elapsed_time": "0:57:53", "remaining_time": "0:05:19", "throughput": 2698.27, "total_tokens": 9373624}
7007
+ {"current_steps": 34945, "total_steps": 38150, "loss": 0.0645, "lr": 1.0679383647509839e-06, "epoch": 9.159895150720839, "percentage": 91.6, "elapsed_time": "0:57:54", "remaining_time": "0:05:18", "throughput": 2698.31, "total_tokens": 9374984}
7008
+ {"current_steps": 34950, "total_steps": 38150, "loss": 0.1084, "lr": 1.0646337286015856e-06, "epoch": 9.161205766710355, "percentage": 91.61, "elapsed_time": "0:57:54", "remaining_time": "0:05:18", "throughput": 2698.25, "total_tokens": 9375880}
7009
+ {"current_steps": 34955, "total_steps": 38150, "loss": 0.1666, "lr": 1.0613341020829948e-06, "epoch": 9.162516382699868, "percentage": 91.63, "elapsed_time": "0:57:55", "remaining_time": "0:05:17", "throughput": 2698.27, "total_tokens": 9377112}
7010
+ {"current_steps": 34960, "total_steps": 38150, "loss": 0.1074, "lr": 1.0580394858858117e-06, "epoch": 9.163826998689384, "percentage": 91.64, "elapsed_time": "0:57:55", "remaining_time": "0:05:17", "throughput": 2698.25, "total_tokens": 9378120}
7011
+ {"current_steps": 34965, "total_steps": 38150, "loss": 0.0691, "lr": 1.054749880699593e-06, "epoch": 9.1651376146789, "percentage": 91.65, "elapsed_time": "0:57:56", "remaining_time": "0:05:16", "throughput": 2698.23, "total_tokens": 9379192}
7012
+ {"current_steps": 34970, "total_steps": 38150, "loss": 0.1478, "lr": 1.051465287212855e-06, "epoch": 9.166448230668415, "percentage": 91.66, "elapsed_time": "0:57:56", "remaining_time": "0:05:16", "throughput": 2698.24, "total_tokens": 9380312}
7013
+ {"current_steps": 34975, "total_steps": 38150, "loss": 0.1261, "lr": 1.04818570611305e-06, "epoch": 9.167758846657929, "percentage": 91.68, "elapsed_time": "0:57:56", "remaining_time": "0:05:15", "throughput": 2698.33, "total_tokens": 9381976}
7014
+ {"current_steps": 34980, "total_steps": 38150, "loss": 0.0601, "lr": 1.0449111380865906e-06, "epoch": 9.169069462647444, "percentage": 91.69, "elapsed_time": "0:57:57", "remaining_time": "0:05:15", "throughput": 2698.3, "total_tokens": 9383000}
7015
+ {"current_steps": 34985, "total_steps": 38150, "loss": 0.2446, "lr": 1.0416415838188393e-06, "epoch": 9.17038007863696, "percentage": 91.7, "elapsed_time": "0:57:57", "remaining_time": "0:05:14", "throughput": 2698.33, "total_tokens": 9384264}
7016
+ {"current_steps": 34990, "total_steps": 38150, "loss": 0.0356, "lr": 1.038377043994107e-06, "epoch": 9.171690694626474, "percentage": 91.72, "elapsed_time": "0:57:58", "remaining_time": "0:05:14", "throughput": 2698.37, "total_tokens": 9385640}
7017
+ {"current_steps": 34995, "total_steps": 38150, "loss": 0.167, "lr": 1.0351175192956585e-06, "epoch": 9.17300131061599, "percentage": 91.73, "elapsed_time": "0:57:58", "remaining_time": "0:05:13", "throughput": 2698.45, "total_tokens": 9387160}
7018
+ {"current_steps": 35000, "total_steps": 38150, "loss": 0.1946, "lr": 1.0318630104057037e-06, "epoch": 9.174311926605505, "percentage": 91.74, "elapsed_time": "0:57:59", "remaining_time": "0:05:13", "throughput": 2698.44, "total_tokens": 9388248}
7019
+ {"current_steps": 35005, "total_steps": 38150, "loss": 0.0751, "lr": 1.0286135180054114e-06, "epoch": 9.17562254259502, "percentage": 91.76, "elapsed_time": "0:57:59", "remaining_time": "0:05:12", "throughput": 2698.44, "total_tokens": 9389480}
7020
+ {"current_steps": 35010, "total_steps": 38150, "loss": 0.2073, "lr": 1.0253690427748874e-06, "epoch": 9.176933158584534, "percentage": 91.77, "elapsed_time": "0:58:00", "remaining_time": "0:05:12", "throughput": 2698.5, "total_tokens": 9390856}
7021
+ {"current_steps": 35015, "total_steps": 38150, "loss": 0.0787, "lr": 1.0221295853931973e-06, "epoch": 9.17824377457405, "percentage": 91.78, "elapsed_time": "0:58:00", "remaining_time": "0:05:11", "throughput": 2698.58, "total_tokens": 9392488}
7022
+ {"current_steps": 35020, "total_steps": 38150, "loss": 0.1462, "lr": 1.0188951465383511e-06, "epoch": 9.179554390563565, "percentage": 91.8, "elapsed_time": "0:58:01", "remaining_time": "0:05:11", "throughput": 2698.67, "total_tokens": 9394072}
7023
+ {"current_steps": 35025, "total_steps": 38150, "loss": 0.0629, "lr": 1.015665726887316e-06, "epoch": 9.18086500655308, "percentage": 91.81, "elapsed_time": "0:58:01", "remaining_time": "0:05:10", "throughput": 2698.69, "total_tokens": 9395288}
7024
+ {"current_steps": 35030, "total_steps": 38150, "loss": 0.1367, "lr": 1.0124413271160038e-06, "epoch": 9.182175622542594, "percentage": 91.82, "elapsed_time": "0:58:01", "remaining_time": "0:05:10", "throughput": 2698.7, "total_tokens": 9396472}
7025
+ {"current_steps": 35035, "total_steps": 38150, "loss": 0.1133, "lr": 1.0092219478992776e-06, "epoch": 9.18348623853211, "percentage": 91.83, "elapsed_time": "0:58:02", "remaining_time": "0:05:09", "throughput": 2698.74, "total_tokens": 9397832}
7026
+ {"current_steps": 35040, "total_steps": 38150, "loss": 0.1684, "lr": 1.0060075899109427e-06, "epoch": 9.184796854521625, "percentage": 91.85, "elapsed_time": "0:58:02", "remaining_time": "0:05:09", "throughput": 2698.81, "total_tokens": 9399256}
7027
+ {"current_steps": 35045, "total_steps": 38150, "loss": 0.1562, "lr": 1.0027982538237612e-06, "epoch": 9.186107470511141, "percentage": 91.86, "elapsed_time": "0:58:03", "remaining_time": "0:05:08", "throughput": 2698.95, "total_tokens": 9401176}
7028
+ {"current_steps": 35050, "total_steps": 38150, "loss": 0.1126, "lr": 9.995939403094402e-07, "epoch": 9.187418086500655, "percentage": 91.87, "elapsed_time": "0:58:03", "remaining_time": "0:05:08", "throughput": 2698.95, "total_tokens": 9402280}
7029
+ {"current_steps": 35055, "total_steps": 38150, "loss": 0.128, "lr": 9.963946500386374e-07, "epoch": 9.18872870249017, "percentage": 91.89, "elapsed_time": "0:58:04", "remaining_time": "0:05:07", "throughput": 2698.93, "total_tokens": 9403352}
7030
+ {"current_steps": 35060, "total_steps": 38150, "loss": 0.2393, "lr": 9.932003836809616e-07, "epoch": 9.190039318479686, "percentage": 91.9, "elapsed_time": "0:58:04", "remaining_time": "0:05:07", "throughput": 2698.95, "total_tokens": 9404568}
7031
+ {"current_steps": 35065, "total_steps": 38150, "loss": 0.0635, "lr": 9.900111419049619e-07, "epoch": 9.191349934469201, "percentage": 91.91, "elapsed_time": "0:58:04", "remaining_time": "0:05:06", "throughput": 2698.89, "total_tokens": 9405496}
7032
+ {"current_steps": 35070, "total_steps": 38150, "loss": 0.1395, "lr": 9.868269253781453e-07, "epoch": 9.192660550458715, "percentage": 91.93, "elapsed_time": "0:58:05", "remaining_time": "0:05:06", "throughput": 2698.96, "total_tokens": 9406904}
7033
+ {"current_steps": 35075, "total_steps": 38150, "loss": 0.1554, "lr": 9.836477347669626e-07, "epoch": 9.19397116644823, "percentage": 91.94, "elapsed_time": "0:58:05", "remaining_time": "0:05:05", "throughput": 2699.0, "total_tokens": 9408200}
7034
+ {"current_steps": 35080, "total_steps": 38150, "loss": 0.1369, "lr": 9.804735707368118e-07, "epoch": 9.195281782437746, "percentage": 91.95, "elapsed_time": "0:58:06", "remaining_time": "0:05:05", "throughput": 2699.01, "total_tokens": 9409448}
7035
+ {"current_steps": 35085, "total_steps": 38150, "loss": 0.1549, "lr": 9.773044339520392e-07, "epoch": 9.19659239842726, "percentage": 91.97, "elapsed_time": "0:58:06", "remaining_time": "0:05:04", "throughput": 2699.04, "total_tokens": 9410888}
7036
+ {"current_steps": 35090, "total_steps": 38150, "loss": 0.0932, "lr": 9.741403250759424e-07, "epoch": 9.197903014416775, "percentage": 91.98, "elapsed_time": "0:58:07", "remaining_time": "0:05:04", "throughput": 2699.02, "total_tokens": 9412008}
7037
+ {"current_steps": 35095, "total_steps": 38150, "loss": 0.1668, "lr": 9.709812447707606e-07, "epoch": 9.199213630406291, "percentage": 91.99, "elapsed_time": "0:58:07", "remaining_time": "0:05:03", "throughput": 2699.06, "total_tokens": 9413304}
7038
+ {"current_steps": 35100, "total_steps": 38150, "loss": 0.1223, "lr": 9.678271936976847e-07, "epoch": 9.200524246395807, "percentage": 92.01, "elapsed_time": "0:58:08", "remaining_time": "0:05:03", "throughput": 2699.06, "total_tokens": 9414440}
7039
+ {"current_steps": 35105, "total_steps": 38150, "loss": 0.1319, "lr": 9.64678172516853e-07, "epoch": 9.20183486238532, "percentage": 92.02, "elapsed_time": "0:58:08", "remaining_time": "0:05:02", "throughput": 2699.08, "total_tokens": 9415672}
7040
+ {"current_steps": 35110, "total_steps": 38150, "loss": 0.0595, "lr": 9.615341818873496e-07, "epoch": 9.203145478374836, "percentage": 92.03, "elapsed_time": "0:58:08", "remaining_time": "0:05:02", "throughput": 2699.1, "total_tokens": 9416872}
7041
+ {"current_steps": 35115, "total_steps": 38150, "loss": 0.1512, "lr": 9.583952224672094e-07, "epoch": 9.204456094364351, "percentage": 92.04, "elapsed_time": "0:58:09", "remaining_time": "0:05:01", "throughput": 2699.09, "total_tokens": 9417976}
7042
+ {"current_steps": 35120, "total_steps": 38150, "loss": 0.1381, "lr": 9.552612949134094e-07, "epoch": 9.205766710353867, "percentage": 92.06, "elapsed_time": "0:58:09", "remaining_time": "0:05:01", "throughput": 2699.17, "total_tokens": 9419496}
7043
+ {"current_steps": 35125, "total_steps": 38150, "loss": 0.1018, "lr": 9.521323998818694e-07, "epoch": 9.20707732634338, "percentage": 92.07, "elapsed_time": "0:58:10", "remaining_time": "0:05:00", "throughput": 2699.14, "total_tokens": 9420536}
7044
+ {"current_steps": 35130, "total_steps": 38150, "loss": 0.1439, "lr": 9.490085380274682e-07, "epoch": 9.208387942332896, "percentage": 92.08, "elapsed_time": "0:58:10", "remaining_time": "0:05:00", "throughput": 2699.26, "total_tokens": 9422248}
7045
+ {"current_steps": 35135, "total_steps": 38150, "loss": 0.0923, "lr": 9.45889710004022e-07, "epoch": 9.209698558322412, "percentage": 92.1, "elapsed_time": "0:58:11", "remaining_time": "0:04:59", "throughput": 2699.21, "total_tokens": 9423176}
7046
+ {"current_steps": 35140, "total_steps": 38150, "loss": 0.13, "lr": 9.427759164642974e-07, "epoch": 9.211009174311927, "percentage": 92.11, "elapsed_time": "0:58:11", "remaining_time": "0:04:59", "throughput": 2699.23, "total_tokens": 9424360}
7047
+ {"current_steps": 35145, "total_steps": 38150, "loss": 0.0571, "lr": 9.396671580600064e-07, "epoch": 9.212319790301441, "percentage": 92.12, "elapsed_time": "0:58:11", "remaining_time": "0:04:58", "throughput": 2699.21, "total_tokens": 9425384}
7048
+ {"current_steps": 35150, "total_steps": 38150, "loss": 0.087, "lr": 9.36563435441809e-07, "epoch": 9.213630406290957, "percentage": 92.14, "elapsed_time": "0:58:12", "remaining_time": "0:04:58", "throughput": 2699.33, "total_tokens": 9427304}
7049
+ {"current_steps": 35155, "total_steps": 38150, "loss": 0.1045, "lr": 9.334647492593051e-07, "epoch": 9.214941022280472, "percentage": 92.15, "elapsed_time": "0:58:12", "remaining_time": "0:04:57", "throughput": 2699.34, "total_tokens": 9428472}
7050
+ {"current_steps": 35160, "total_steps": 38150, "loss": 0.2124, "lr": 9.303711001610454e-07, "epoch": 9.216251638269988, "percentage": 92.16, "elapsed_time": "0:58:13", "remaining_time": "0:04:57", "throughput": 2699.34, "total_tokens": 9429576}
7051
+ {"current_steps": 35165, "total_steps": 38150, "loss": 0.0549, "lr": 9.27282488794523e-07, "epoch": 9.217562254259501, "percentage": 92.18, "elapsed_time": "0:58:13", "remaining_time": "0:04:56", "throughput": 2699.29, "total_tokens": 9430504}
7052
+ {"current_steps": 35170, "total_steps": 38150, "loss": 0.1368, "lr": 9.24198915806182e-07, "epoch": 9.218872870249017, "percentage": 92.19, "elapsed_time": "0:58:14", "remaining_time": "0:04:56", "throughput": 2699.42, "total_tokens": 9432408}
7053
+ {"current_steps": 35175, "total_steps": 38150, "loss": 0.103, "lr": 9.211203818414088e-07, "epoch": 9.220183486238533, "percentage": 92.2, "elapsed_time": "0:58:14", "remaining_time": "0:04:55", "throughput": 2699.43, "total_tokens": 9433592}
7054
+ {"current_steps": 35180, "total_steps": 38150, "loss": 0.1064, "lr": 9.180468875445352e-07, "epoch": 9.221494102228046, "percentage": 92.21, "elapsed_time": "0:58:15", "remaining_time": "0:04:55", "throughput": 2699.41, "total_tokens": 9434648}
7055
+ {"current_steps": 35185, "total_steps": 38150, "loss": 0.1038, "lr": 9.149784335588357e-07, "epoch": 9.222804718217562, "percentage": 92.23, "elapsed_time": "0:58:15", "remaining_time": "0:04:54", "throughput": 2699.47, "total_tokens": 9436152}
7056
+ {"current_steps": 35190, "total_steps": 38150, "loss": 0.097, "lr": 9.119150205265326e-07, "epoch": 9.224115334207077, "percentage": 92.24, "elapsed_time": "0:58:15", "remaining_time": "0:04:54", "throughput": 2699.48, "total_tokens": 9437336}
7057
+ {"current_steps": 35195, "total_steps": 38150, "loss": 0.1104, "lr": 9.088566490888017e-07, "epoch": 9.225425950196593, "percentage": 92.25, "elapsed_time": "0:58:16", "remaining_time": "0:04:53", "throughput": 2699.52, "total_tokens": 9438728}
7058
+ {"current_steps": 35200, "total_steps": 38150, "loss": 0.1539, "lr": 9.058033198857424e-07, "epoch": 9.226736566186107, "percentage": 92.27, "elapsed_time": "0:58:16", "remaining_time": "0:04:53", "throughput": 2699.58, "total_tokens": 9440216}
7059
+ {"current_steps": 35205, "total_steps": 38150, "loss": 0.1086, "lr": 9.027550335564184e-07, "epoch": 9.228047182175622, "percentage": 92.28, "elapsed_time": "0:58:17", "remaining_time": "0:04:52", "throughput": 2699.55, "total_tokens": 9441208}
7060
+ {"current_steps": 35210, "total_steps": 38150, "loss": 0.1238, "lr": 8.997117907388275e-07, "epoch": 9.229357798165138, "percentage": 92.29, "elapsed_time": "0:58:17", "remaining_time": "0:04:52", "throughput": 2699.51, "total_tokens": 9442152}
7061
+ {"current_steps": 35215, "total_steps": 38150, "loss": 0.1082, "lr": 8.966735920699187e-07, "epoch": 9.230668414154653, "percentage": 92.31, "elapsed_time": "0:58:18", "remaining_time": "0:04:51", "throughput": 2699.57, "total_tokens": 9443528}
7062
+ {"current_steps": 35220, "total_steps": 38150, "loss": 0.0832, "lr": 8.936404381855806e-07, "epoch": 9.231979030144167, "percentage": 92.32, "elapsed_time": "0:58:18", "remaining_time": "0:04:51", "throughput": 2699.53, "total_tokens": 9444488}
7063
+ {"current_steps": 35225, "total_steps": 38150, "loss": 0.1057, "lr": 8.906123297206442e-07, "epoch": 9.233289646133683, "percentage": 92.33, "elapsed_time": "0:58:18", "remaining_time": "0:04:50", "throughput": 2699.55, "total_tokens": 9445720}
7064
+ {"current_steps": 35230, "total_steps": 38150, "loss": 0.1465, "lr": 8.875892673088887e-07, "epoch": 9.234600262123198, "percentage": 92.35, "elapsed_time": "0:58:19", "remaining_time": "0:04:50", "throughput": 2699.58, "total_tokens": 9447064}
7065
+ {"current_steps": 35235, "total_steps": 38150, "loss": 0.1657, "lr": 8.845712515830385e-07, "epoch": 9.235910878112714, "percentage": 92.36, "elapsed_time": "0:58:19", "remaining_time": "0:04:49", "throughput": 2699.61, "total_tokens": 9448392}
7066
+ {"current_steps": 35240, "total_steps": 38150, "loss": 0.1745, "lr": 8.815582831747576e-07, "epoch": 9.237221494102227, "percentage": 92.37, "elapsed_time": "0:58:20", "remaining_time": "0:04:49", "throughput": 2699.69, "total_tokens": 9449912}
7067
+ {"current_steps": 35245, "total_steps": 38150, "loss": 0.2851, "lr": 8.785503627146501e-07, "epoch": 9.238532110091743, "percentage": 92.39, "elapsed_time": "0:58:20", "remaining_time": "0:04:48", "throughput": 2699.78, "total_tokens": 9451560}
7068
+ {"current_steps": 35250, "total_steps": 38150, "loss": 0.1834, "lr": 8.755474908322764e-07, "epoch": 9.239842726081259, "percentage": 92.4, "elapsed_time": "0:58:21", "remaining_time": "0:04:48", "throughput": 2699.83, "total_tokens": 9452984}
7069
+ {"current_steps": 35255, "total_steps": 38150, "loss": 0.218, "lr": 8.725496681561279e-07, "epoch": 9.241153342070774, "percentage": 92.41, "elapsed_time": "0:58:21", "remaining_time": "0:04:47", "throughput": 2699.92, "total_tokens": 9454568}
7070
+ {"current_steps": 35260, "total_steps": 38150, "loss": 0.1905, "lr": 8.695568953136446e-07, "epoch": 9.242463958060288, "percentage": 92.42, "elapsed_time": "0:58:22", "remaining_time": "0:04:47", "throughput": 2699.96, "total_tokens": 9455880}
7071
+ {"current_steps": 35265, "total_steps": 38150, "loss": 0.1849, "lr": 8.665691729312115e-07, "epoch": 9.243774574049803, "percentage": 92.44, "elapsed_time": "0:58:22", "remaining_time": "0:04:46", "throughput": 2700.07, "total_tokens": 9457624}
7072
+ {"current_steps": 35270, "total_steps": 38150, "loss": 0.0897, "lr": 8.635865016341477e-07, "epoch": 9.245085190039319, "percentage": 92.45, "elapsed_time": "0:58:23", "remaining_time": "0:04:46", "throughput": 2700.18, "total_tokens": 9459352}
7073
+ {"current_steps": 35275, "total_steps": 38150, "loss": 0.0769, "lr": 8.606088820467318e-07, "epoch": 9.246395806028833, "percentage": 92.46, "elapsed_time": "0:58:23", "remaining_time": "0:04:45", "throughput": 2700.14, "total_tokens": 9460328}
7074
+ {"current_steps": 35280, "total_steps": 38150, "loss": 0.2808, "lr": 8.576363147921596e-07, "epoch": 9.247706422018348, "percentage": 92.48, "elapsed_time": "0:58:24", "remaining_time": "0:04:45", "throughput": 2700.21, "total_tokens": 9461880}
7075
+ {"current_steps": 35285, "total_steps": 38150, "loss": 0.0622, "lr": 8.546688004925946e-07, "epoch": 9.249017038007864, "percentage": 92.49, "elapsed_time": "0:58:24", "remaining_time": "0:04:44", "throughput": 2700.27, "total_tokens": 9463336}
7076
+ {"current_steps": 35290, "total_steps": 38150, "loss": 0.1228, "lr": 8.517063397691288e-07, "epoch": 9.25032765399738, "percentage": 92.5, "elapsed_time": "0:58:25", "remaining_time": "0:04:44", "throughput": 2700.41, "total_tokens": 9465400}
7077
+ {"current_steps": 35295, "total_steps": 38150, "loss": 0.1586, "lr": 8.487489332418025e-07, "epoch": 9.251638269986893, "percentage": 92.52, "elapsed_time": "0:58:25", "remaining_time": "0:04:43", "throughput": 2700.51, "total_tokens": 9467048}
7078
+ {"current_steps": 35300, "total_steps": 38150, "loss": 0.1023, "lr": 8.457965815295926e-07, "epoch": 9.252948885976409, "percentage": 92.53, "elapsed_time": "0:58:26", "remaining_time": "0:04:43", "throughput": 2700.5, "total_tokens": 9468104}
7079
+ {"current_steps": 35305, "total_steps": 38150, "loss": 0.1474, "lr": 8.428492852504216e-07, "epoch": 9.254259501965924, "percentage": 92.54, "elapsed_time": "0:58:26", "remaining_time": "0:04:42", "throughput": 2700.47, "total_tokens": 9469128}
7080
+ {"current_steps": 35310, "total_steps": 38150, "loss": 0.0923, "lr": 8.399070450211599e-07, "epoch": 9.25557011795544, "percentage": 92.56, "elapsed_time": "0:58:26", "remaining_time": "0:04:42", "throughput": 2700.43, "total_tokens": 9470088}
7081
+ {"current_steps": 35315, "total_steps": 38150, "loss": 0.1324, "lr": 8.369698614576038e-07, "epoch": 9.256880733944953, "percentage": 92.57, "elapsed_time": "0:58:27", "remaining_time": "0:04:41", "throughput": 2700.43, "total_tokens": 9471208}
7082
+ {"current_steps": 35320, "total_steps": 38150, "loss": 0.3219, "lr": 8.340377351745088e-07, "epoch": 9.258191349934469, "percentage": 92.58, "elapsed_time": "0:58:27", "remaining_time": "0:04:41", "throughput": 2700.5, "total_tokens": 9472728}
7083
+ {"current_steps": 35325, "total_steps": 38150, "loss": 0.0927, "lr": 8.311106667855562e-07, "epoch": 9.259501965923985, "percentage": 92.6, "elapsed_time": "0:58:28", "remaining_time": "0:04:40", "throughput": 2700.52, "total_tokens": 9474008}
7084
+ {"current_steps": 35330, "total_steps": 38150, "loss": 0.1399, "lr": 8.281886569033836e-07, "epoch": 9.2608125819135, "percentage": 92.61, "elapsed_time": "0:58:28", "remaining_time": "0:04:40", "throughput": 2700.6, "total_tokens": 9475528}
7085
+ {"current_steps": 35335, "total_steps": 38150, "loss": 0.4019, "lr": 8.252717061395576e-07, "epoch": 9.262123197903014, "percentage": 92.62, "elapsed_time": "0:58:29", "remaining_time": "0:04:39", "throughput": 2700.63, "total_tokens": 9476856}
7086
+ {"current_steps": 35340, "total_steps": 38150, "loss": 0.0696, "lr": 8.223598151045952e-07, "epoch": 9.26343381389253, "percentage": 92.63, "elapsed_time": "0:58:29", "remaining_time": "0:04:39", "throughput": 2700.61, "total_tokens": 9477880}
7087
+ {"current_steps": 35345, "total_steps": 38150, "loss": 0.201, "lr": 8.194529844079451e-07, "epoch": 9.264744429882045, "percentage": 92.65, "elapsed_time": "0:58:29", "remaining_time": "0:04:38", "throughput": 2700.68, "total_tokens": 9479304}
7088
+ {"current_steps": 35350, "total_steps": 38150, "loss": 0.1267, "lr": 8.16551214658004e-07, "epoch": 9.26605504587156, "percentage": 92.66, "elapsed_time": "0:58:30", "remaining_time": "0:04:38", "throughput": 2700.64, "total_tokens": 9480280}
7089
+ {"current_steps": 35355, "total_steps": 38150, "loss": 0.1633, "lr": 8.136545064621137e-07, "epoch": 9.267365661861074, "percentage": 92.67, "elapsed_time": "0:58:30", "remaining_time": "0:04:37", "throughput": 2700.67, "total_tokens": 9481496}
7090
+ {"current_steps": 35360, "total_steps": 38150, "loss": 0.0994, "lr": 8.107628604265366e-07, "epoch": 9.26867627785059, "percentage": 92.69, "elapsed_time": "0:58:31", "remaining_time": "0:04:37", "throughput": 2700.77, "total_tokens": 9483144}
7091
+ {"current_steps": 35365, "total_steps": 38150, "loss": 0.1237, "lr": 8.078762771564996e-07, "epoch": 9.269986893840105, "percentage": 92.7, "elapsed_time": "0:58:31", "remaining_time": "0:04:36", "throughput": 2700.84, "total_tokens": 9484616}
7092
+ {"current_steps": 35370, "total_steps": 38150, "loss": 0.0909, "lr": 8.049947572561584e-07, "epoch": 9.271297509829619, "percentage": 92.71, "elapsed_time": "0:58:32", "remaining_time": "0:04:36", "throughput": 2700.85, "total_tokens": 9485800}
7093
+ {"current_steps": 35375, "total_steps": 38150, "loss": 0.0871, "lr": 8.021183013286082e-07, "epoch": 9.272608125819135, "percentage": 92.73, "elapsed_time": "0:58:32", "remaining_time": "0:04:35", "throughput": 2700.86, "total_tokens": 9486984}
7094
+ {"current_steps": 35380, "total_steps": 38150, "loss": 0.2082, "lr": 7.99246909975887e-07, "epoch": 9.27391874180865, "percentage": 92.74, "elapsed_time": "0:58:33", "remaining_time": "0:04:35", "throughput": 2700.95, "total_tokens": 9488632}
7095
+ {"current_steps": 35385, "total_steps": 38150, "loss": 0.2168, "lr": 7.963805837989724e-07, "epoch": 9.275229357798166, "percentage": 92.75, "elapsed_time": "0:58:33", "remaining_time": "0:04:34", "throughput": 2700.97, "total_tokens": 9489880}
7096
+ {"current_steps": 35390, "total_steps": 38150, "loss": 0.1017, "lr": 7.935193233977845e-07, "epoch": 9.27653997378768, "percentage": 92.77, "elapsed_time": "0:58:33", "remaining_time": "0:04:34", "throughput": 2700.98, "total_tokens": 9491032}
7097
+ {"current_steps": 35395, "total_steps": 38150, "loss": 0.1099, "lr": 7.906631293711719e-07, "epoch": 9.277850589777195, "percentage": 92.78, "elapsed_time": "0:58:34", "remaining_time": "0:04:33", "throughput": 2701.05, "total_tokens": 9492520}
7098
+ {"current_steps": 35400, "total_steps": 38150, "loss": 0.0886, "lr": 7.878120023169344e-07, "epoch": 9.27916120576671, "percentage": 92.79, "elapsed_time": "0:58:34", "remaining_time": "0:04:33", "throughput": 2701.12, "total_tokens": 9494040}
7099
+ {"current_steps": 35405, "total_steps": 38150, "loss": 0.129, "lr": 7.849659428318113e-07, "epoch": 9.280471821756226, "percentage": 92.8, "elapsed_time": "0:58:35", "remaining_time": "0:04:32", "throughput": 2701.09, "total_tokens": 9495048}
7100
+ {"current_steps": 35410, "total_steps": 38150, "loss": 0.1485, "lr": 7.821249515114787e-07, "epoch": 9.28178243774574, "percentage": 92.82, "elapsed_time": "0:58:35", "remaining_time": "0:04:32", "throughput": 2701.18, "total_tokens": 9496776}
7101
+ {"current_steps": 35415, "total_steps": 38150, "loss": 0.2719, "lr": 7.792890289505444e-07, "epoch": 9.283093053735255, "percentage": 92.83, "elapsed_time": "0:58:36", "remaining_time": "0:04:31", "throughput": 2701.21, "total_tokens": 9498040}
7102
+ {"current_steps": 35420, "total_steps": 38150, "loss": 0.0649, "lr": 7.764581757425642e-07, "epoch": 9.284403669724771, "percentage": 92.84, "elapsed_time": "0:58:36", "remaining_time": "0:04:31", "throughput": 2701.25, "total_tokens": 9499608}
7103
+ {"current_steps": 35425, "total_steps": 38150, "loss": 0.2314, "lr": 7.736323924800365e-07, "epoch": 9.285714285714286, "percentage": 92.86, "elapsed_time": "0:58:37", "remaining_time": "0:04:30", "throughput": 2701.31, "total_tokens": 9500984}
7104
+ {"current_steps": 35430, "total_steps": 38150, "loss": 0.0834, "lr": 7.708116797543907e-07, "epoch": 9.2870249017038, "percentage": 92.87, "elapsed_time": "0:58:37", "remaining_time": "0:04:30", "throughput": 2701.32, "total_tokens": 9502184}
7105
+ {"current_steps": 35435, "total_steps": 38150, "loss": 0.2034, "lr": 7.679960381559936e-07, "epoch": 9.288335517693316, "percentage": 92.88, "elapsed_time": "0:58:38", "remaining_time": "0:04:29", "throughput": 2701.4, "total_tokens": 9503624}
7106
+ {"current_steps": 35440, "total_steps": 38150, "loss": 0.1565, "lr": 7.651854682741572e-07, "epoch": 9.289646133682831, "percentage": 92.9, "elapsed_time": "0:58:38", "remaining_time": "0:04:29", "throughput": 2701.47, "total_tokens": 9505176}
7107
+ {"current_steps": 35445, "total_steps": 38150, "loss": 0.1834, "lr": 7.623799706971274e-07, "epoch": 9.290956749672347, "percentage": 92.91, "elapsed_time": "0:58:38", "remaining_time": "0:04:28", "throughput": 2701.51, "total_tokens": 9506504}
7108
+ {"current_steps": 35450, "total_steps": 38150, "loss": 0.0962, "lr": 7.595795460120903e-07, "epoch": 9.29226736566186, "percentage": 92.92, "elapsed_time": "0:58:39", "remaining_time": "0:04:28", "throughput": 2701.47, "total_tokens": 9507448}
7109
+ {"current_steps": 35455, "total_steps": 38150, "loss": 0.1529, "lr": 7.567841948051685e-07, "epoch": 9.293577981651376, "percentage": 92.94, "elapsed_time": "0:58:39", "remaining_time": "0:04:27", "throughput": 2701.48, "total_tokens": 9508760}
7110
+ {"current_steps": 35460, "total_steps": 38150, "loss": 0.1505, "lr": 7.539939176614247e-07, "epoch": 9.294888597640892, "percentage": 92.95, "elapsed_time": "0:58:40", "remaining_time": "0:04:27", "throughput": 2701.52, "total_tokens": 9510072}
7111
+ {"current_steps": 35465, "total_steps": 38150, "loss": 0.0983, "lr": 7.512087151648639e-07, "epoch": 9.296199213630405, "percentage": 92.96, "elapsed_time": "0:58:40", "remaining_time": "0:04:26", "throughput": 2701.53, "total_tokens": 9511288}
7112
+ {"current_steps": 35470, "total_steps": 38150, "loss": 0.1978, "lr": 7.484285878984171e-07, "epoch": 9.297509829619921, "percentage": 92.98, "elapsed_time": "0:58:41", "remaining_time": "0:04:26", "throughput": 2701.52, "total_tokens": 9512408}
7113
+ {"current_steps": 35475, "total_steps": 38150, "loss": 0.1101, "lr": 7.45653536443966e-07, "epoch": 9.298820445609437, "percentage": 92.99, "elapsed_time": "0:58:41", "remaining_time": "0:04:25", "throughput": 2701.58, "total_tokens": 9513752}
7114
+ {"current_steps": 35480, "total_steps": 38150, "loss": 0.2371, "lr": 7.428835613823182e-07, "epoch": 9.300131061598952, "percentage": 93.0, "elapsed_time": "0:58:42", "remaining_time": "0:04:25", "throughput": 2701.59, "total_tokens": 9515000}
7115
+ {"current_steps": 35485, "total_steps": 38150, "loss": 0.1024, "lr": 7.401186632932294e-07, "epoch": 9.301441677588466, "percentage": 93.01, "elapsed_time": "0:58:42", "remaining_time": "0:04:24", "throughput": 2701.58, "total_tokens": 9516136}
7116
+ {"current_steps": 35490, "total_steps": 38150, "loss": 0.1386, "lr": 7.373588427553869e-07, "epoch": 9.302752293577981, "percentage": 93.03, "elapsed_time": "0:58:42", "remaining_time": "0:04:24", "throughput": 2701.59, "total_tokens": 9517288}
7117
+ {"current_steps": 35495, "total_steps": 38150, "loss": 0.1331, "lr": 7.346041003464172e-07, "epoch": 9.304062909567497, "percentage": 93.04, "elapsed_time": "0:58:43", "remaining_time": "0:04:23", "throughput": 2701.62, "total_tokens": 9518536}
7118
+ {"current_steps": 35500, "total_steps": 38150, "loss": 0.1108, "lr": 7.318544366428814e-07, "epoch": 9.305373525557012, "percentage": 93.05, "elapsed_time": "0:58:43", "remaining_time": "0:04:23", "throughput": 2701.6, "total_tokens": 9519608}
7119
+ {"current_steps": 35505, "total_steps": 38150, "loss": 0.1212, "lr": 7.291098522202777e-07, "epoch": 9.306684141546526, "percentage": 93.07, "elapsed_time": "0:58:44", "remaining_time": "0:04:22", "throughput": 2701.61, "total_tokens": 9520808}
7120
+ {"current_steps": 35510, "total_steps": 38150, "loss": 0.133, "lr": 7.263703476530492e-07, "epoch": 9.307994757536042, "percentage": 93.08, "elapsed_time": "0:58:44", "remaining_time": "0:04:22", "throughput": 2701.63, "total_tokens": 9522008}
7121
+ {"current_steps": 35515, "total_steps": 38150, "loss": 0.1994, "lr": 7.236359235145624e-07, "epoch": 9.309305373525557, "percentage": 93.09, "elapsed_time": "0:58:44", "remaining_time": "0:04:21", "throughput": 2701.65, "total_tokens": 9523288}
7122
+ {"current_steps": 35520, "total_steps": 38150, "loss": 0.1438, "lr": 7.209065803771315e-07, "epoch": 9.310615989515073, "percentage": 93.11, "elapsed_time": "0:58:45", "remaining_time": "0:04:21", "throughput": 2701.76, "total_tokens": 9525064}
7123
+ {"current_steps": 35525, "total_steps": 38150, "loss": 0.1555, "lr": 7.181823188120024e-07, "epoch": 9.311926605504587, "percentage": 93.12, "elapsed_time": "0:58:45", "remaining_time": "0:04:20", "throughput": 2701.74, "total_tokens": 9526120}
7124
+ {"current_steps": 35530, "total_steps": 38150, "loss": 0.1646, "lr": 7.154631393893552e-07, "epoch": 9.313237221494102, "percentage": 93.13, "elapsed_time": "0:58:46", "remaining_time": "0:04:20", "throughput": 2701.79, "total_tokens": 9527512}
7125
+ {"current_steps": 35535, "total_steps": 38150, "loss": 0.2143, "lr": 7.127490426783123e-07, "epoch": 9.314547837483618, "percentage": 93.15, "elapsed_time": "0:58:46", "remaining_time": "0:04:19", "throughput": 2701.78, "total_tokens": 9528600}
7126
+ {"current_steps": 35540, "total_steps": 38150, "loss": 0.2043, "lr": 7.100400292469333e-07, "epoch": 9.315858453473133, "percentage": 93.16, "elapsed_time": "0:58:47", "remaining_time": "0:04:19", "throughput": 2701.8, "total_tokens": 9529800}
7127
+ {"current_steps": 35545, "total_steps": 38150, "loss": 0.1104, "lr": 7.073360996622064e-07, "epoch": 9.317169069462647, "percentage": 93.17, "elapsed_time": "0:58:47", "remaining_time": "0:04:18", "throughput": 2701.81, "total_tokens": 9530968}
7128
+ {"current_steps": 35550, "total_steps": 38150, "loss": 0.0827, "lr": 7.046372544900592e-07, "epoch": 9.318479685452163, "percentage": 93.18, "elapsed_time": "0:58:48", "remaining_time": "0:04:18", "throughput": 2701.83, "total_tokens": 9532168}
7129
+ {"current_steps": 35555, "total_steps": 38150, "loss": 0.155, "lr": 7.01943494295354e-07, "epoch": 9.319790301441678, "percentage": 93.2, "elapsed_time": "0:58:48", "remaining_time": "0:04:17", "throughput": 2701.89, "total_tokens": 9533640}
7130
+ {"current_steps": 35560, "total_steps": 38150, "loss": 0.0823, "lr": 6.992548196418924e-07, "epoch": 9.321100917431192, "percentage": 93.21, "elapsed_time": "0:58:48", "remaining_time": "0:04:17", "throughput": 2701.86, "total_tokens": 9534632}
7131
+ {"current_steps": 35565, "total_steps": 38150, "loss": 0.1137, "lr": 6.965712310924078e-07, "epoch": 9.322411533420707, "percentage": 93.22, "elapsed_time": "0:58:49", "remaining_time": "0:04:16", "throughput": 2701.92, "total_tokens": 9536040}
7132
+ {"current_steps": 35570, "total_steps": 38150, "loss": 0.0873, "lr": 6.9389272920857e-07, "epoch": 9.323722149410223, "percentage": 93.24, "elapsed_time": "0:58:49", "remaining_time": "0:04:16", "throughput": 2701.93, "total_tokens": 9537192}
7133
+ {"current_steps": 35575, "total_steps": 38150, "loss": 0.0737, "lr": 6.912193145509893e-07, "epoch": 9.325032765399738, "percentage": 93.25, "elapsed_time": "0:58:50", "remaining_time": "0:04:15", "throughput": 2701.99, "total_tokens": 9538680}
7134
+ {"current_steps": 35580, "total_steps": 38150, "loss": 0.0689, "lr": 6.885509876792012e-07, "epoch": 9.326343381389252, "percentage": 93.26, "elapsed_time": "0:58:50", "remaining_time": "0:04:15", "throughput": 2701.95, "total_tokens": 9539656}
7135
+ {"current_steps": 35585, "total_steps": 38150, "loss": 0.1105, "lr": 6.858877491516869e-07, "epoch": 9.327653997378768, "percentage": 93.28, "elapsed_time": "0:58:51", "remaining_time": "0:04:14", "throughput": 2701.99, "total_tokens": 9540968}
7136
+ {"current_steps": 35590, "total_steps": 38150, "loss": 0.2372, "lr": 6.832295995258531e-07, "epoch": 9.328964613368283, "percentage": 93.29, "elapsed_time": "0:58:51", "remaining_time": "0:04:14", "throughput": 2702.01, "total_tokens": 9542248}
7137
+ {"current_steps": 35595, "total_steps": 38150, "loss": 0.1463, "lr": 6.805765393580493e-07, "epoch": 9.330275229357799, "percentage": 93.3, "elapsed_time": "0:58:51", "remaining_time": "0:04:13", "throughput": 2702.02, "total_tokens": 9543416}
7138
+ {"current_steps": 35600, "total_steps": 38150, "loss": 0.165, "lr": 6.779285692035536e-07, "epoch": 9.331585845347313, "percentage": 93.32, "elapsed_time": "0:58:52", "remaining_time": "0:04:13", "throughput": 2702.15, "total_tokens": 9545272}
7139
+ {"current_steps": 35605, "total_steps": 38150, "loss": 0.1597, "lr": 6.752856896165866e-07, "epoch": 9.332896461336828, "percentage": 93.33, "elapsed_time": "0:58:52", "remaining_time": "0:04:12", "throughput": 2702.16, "total_tokens": 9546488}
7140
+ {"current_steps": 35610, "total_steps": 38150, "loss": 0.0795, "lr": 6.726479011502917e-07, "epoch": 9.334207077326344, "percentage": 93.34, "elapsed_time": "0:58:53", "remaining_time": "0:04:12", "throughput": 2702.12, "total_tokens": 9547496}
7141
+ {"current_steps": 35615, "total_steps": 38150, "loss": 0.114, "lr": 6.700152043567554e-07, "epoch": 9.33551769331586, "percentage": 93.36, "elapsed_time": "0:58:53", "remaining_time": "0:04:11", "throughput": 2702.16, "total_tokens": 9548856}
7142
+ {"current_steps": 35620, "total_steps": 38150, "loss": 0.1159, "lr": 6.673875997870005e-07, "epoch": 9.336828309305373, "percentage": 93.37, "elapsed_time": "0:58:54", "remaining_time": "0:04:11", "throughput": 2702.19, "total_tokens": 9550280}
7143
+ {"current_steps": 35625, "total_steps": 38150, "loss": 0.1715, "lr": 6.64765087990979e-07, "epoch": 9.338138925294889, "percentage": 93.38, "elapsed_time": "0:58:54", "remaining_time": "0:04:10", "throughput": 2702.29, "total_tokens": 9551864}
7144
+ {"current_steps": 35630, "total_steps": 38150, "loss": 0.1171, "lr": 6.621476695175738e-07, "epoch": 9.339449541284404, "percentage": 93.39, "elapsed_time": "0:58:55", "remaining_time": "0:04:10", "throughput": 2702.3, "total_tokens": 9553032}
7145
+ {"current_steps": 35635, "total_steps": 38150, "loss": 0.2346, "lr": 6.595353449146108e-07, "epoch": 9.34076015727392, "percentage": 93.41, "elapsed_time": "0:58:55", "remaining_time": "0:04:09", "throughput": 2702.38, "total_tokens": 9554520}
7146
+ {"current_steps": 35640, "total_steps": 38150, "loss": 0.2527, "lr": 6.569281147288414e-07, "epoch": 9.342070773263433, "percentage": 93.42, "elapsed_time": "0:58:56", "remaining_time": "0:04:09", "throughput": 2702.52, "total_tokens": 9556424}
7147
+ {"current_steps": 35645, "total_steps": 38150, "loss": 0.1813, "lr": 6.54325979505957e-07, "epoch": 9.343381389252949, "percentage": 93.43, "elapsed_time": "0:58:56", "remaining_time": "0:04:08", "throughput": 2702.68, "total_tokens": 9558392}
7148
+ {"current_steps": 35650, "total_steps": 38150, "loss": 0.1414, "lr": 6.517289397905746e-07, "epoch": 9.344692005242464, "percentage": 93.45, "elapsed_time": "0:58:57", "remaining_time": "0:04:08", "throughput": 2702.71, "total_tokens": 9559704}
7149
+ {"current_steps": 35655, "total_steps": 38150, "loss": 0.2222, "lr": 6.491369961262566e-07, "epoch": 9.346002621231978, "percentage": 93.46, "elapsed_time": "0:58:57", "remaining_time": "0:04:07", "throughput": 2702.8, "total_tokens": 9561288}
7150
+ {"current_steps": 35660, "total_steps": 38150, "loss": 0.1929, "lr": 6.465501490554914e-07, "epoch": 9.347313237221494, "percentage": 93.47, "elapsed_time": "0:58:57", "remaining_time": "0:04:07", "throughput": 2702.8, "total_tokens": 9562440}
7151
+ {"current_steps": 35665, "total_steps": 38150, "loss": 0.1088, "lr": 6.439683991197043e-07, "epoch": 9.34862385321101, "percentage": 93.49, "elapsed_time": "0:58:58", "remaining_time": "0:04:06", "throughput": 2702.86, "total_tokens": 9563880}
7152
+ {"current_steps": 35670, "total_steps": 38150, "loss": 0.1752, "lr": 6.413917468592434e-07, "epoch": 9.349934469200525, "percentage": 93.5, "elapsed_time": "0:58:58", "remaining_time": "0:04:06", "throughput": 2702.86, "total_tokens": 9565032}
7153
+ {"current_steps": 35675, "total_steps": 38150, "loss": 0.0719, "lr": 6.388201928134025e-07, "epoch": 9.351245085190039, "percentage": 93.51, "elapsed_time": "0:58:59", "remaining_time": "0:04:05", "throughput": 2702.82, "total_tokens": 9566008}
7154
+ {"current_steps": 35680, "total_steps": 38150, "loss": 0.2382, "lr": 6.362537375204009e-07, "epoch": 9.352555701179554, "percentage": 93.53, "elapsed_time": "0:58:59", "remaining_time": "0:04:05", "throughput": 2702.87, "total_tokens": 9567336}
7155
+ {"current_steps": 35685, "total_steps": 38150, "loss": 0.0771, "lr": 6.336923815173951e-07, "epoch": 9.35386631716907, "percentage": 93.54, "elapsed_time": "0:59:00", "remaining_time": "0:04:04", "throughput": 2702.84, "total_tokens": 9568312}
7156
+ {"current_steps": 35690, "total_steps": 38150, "loss": 0.1928, "lr": 6.31136125340473e-07, "epoch": 9.355176933158585, "percentage": 93.55, "elapsed_time": "0:59:00", "remaining_time": "0:04:04", "throughput": 2702.9, "total_tokens": 9569816}
7157
+ {"current_steps": 35695, "total_steps": 38150, "loss": 0.1062, "lr": 6.285849695246537e-07, "epoch": 9.356487549148099, "percentage": 93.56, "elapsed_time": "0:59:01", "remaining_time": "0:04:03", "throughput": 2702.95, "total_tokens": 9571224}
7158
+ {"current_steps": 35700, "total_steps": 38150, "loss": 0.1598, "lr": 6.260389146038881e-07, "epoch": 9.357798165137615, "percentage": 93.58, "elapsed_time": "0:59:01", "remaining_time": "0:04:03", "throughput": 2702.96, "total_tokens": 9572360}
7159
+ {"current_steps": 35705, "total_steps": 38150, "loss": 0.1306, "lr": 6.234979611110637e-07, "epoch": 9.35910878112713, "percentage": 93.59, "elapsed_time": "0:59:01", "remaining_time": "0:04:02", "throughput": 2702.98, "total_tokens": 9573656}
7160
+ {"current_steps": 35710, "total_steps": 38150, "loss": 0.0632, "lr": 6.209621095779993e-07, "epoch": 9.360419397116646, "percentage": 93.6, "elapsed_time": "0:59:02", "remaining_time": "0:04:02", "throughput": 2702.98, "total_tokens": 9574776}
7161
+ {"current_steps": 35715, "total_steps": 38150, "loss": 0.1472, "lr": 6.184313605354375e-07, "epoch": 9.36173001310616, "percentage": 93.62, "elapsed_time": "0:59:02", "remaining_time": "0:04:01", "throughput": 2703.04, "total_tokens": 9576248}
7162
+ {"current_steps": 35720, "total_steps": 38150, "loss": 0.1474, "lr": 6.159057145130653e-07, "epoch": 9.363040629095675, "percentage": 93.63, "elapsed_time": "0:59:03", "remaining_time": "0:04:01", "throughput": 2703.05, "total_tokens": 9577432}
7163
+ {"current_steps": 35725, "total_steps": 38150, "loss": 0.0471, "lr": 6.133851720394962e-07, "epoch": 9.36435124508519, "percentage": 93.64, "elapsed_time": "0:59:03", "remaining_time": "0:04:00", "throughput": 2703.05, "total_tokens": 9578552}
7164
+ {"current_steps": 35730, "total_steps": 38150, "loss": 0.2357, "lr": 6.10869733642272e-07, "epoch": 9.365661861074706, "percentage": 93.66, "elapsed_time": "0:59:04", "remaining_time": "0:04:00", "throughput": 2703.11, "total_tokens": 9580024}
7165
+ {"current_steps": 35735, "total_steps": 38150, "loss": 0.1009, "lr": 6.083593998478715e-07, "epoch": 9.36697247706422, "percentage": 93.67, "elapsed_time": "0:59:04", "remaining_time": "0:03:59", "throughput": 2703.17, "total_tokens": 9581496}
7166
+ {"current_steps": 35740, "total_steps": 38150, "loss": 0.1216, "lr": 6.058541711817023e-07, "epoch": 9.368283093053735, "percentage": 93.68, "elapsed_time": "0:59:04", "remaining_time": "0:03:59", "throughput": 2703.13, "total_tokens": 9582440}
7167
+ {"current_steps": 35745, "total_steps": 38150, "loss": 0.101, "lr": 6.033540481681088e-07, "epoch": 9.36959370904325, "percentage": 93.7, "elapsed_time": "0:59:05", "remaining_time": "0:03:58", "throughput": 2703.18, "total_tokens": 9583896}
7168
+ {"current_steps": 35750, "total_steps": 38150, "loss": 0.1489, "lr": 6.008590313303586e-07, "epoch": 9.370904325032765, "percentage": 93.71, "elapsed_time": "0:59:05", "remaining_time": "0:03:58", "throughput": 2703.22, "total_tokens": 9585304}
7169
+ {"current_steps": 35755, "total_steps": 38150, "loss": 0.1538, "lr": 5.983691211906506e-07, "epoch": 9.37221494102228, "percentage": 93.72, "elapsed_time": "0:59:06", "remaining_time": "0:03:57", "throughput": 2703.23, "total_tokens": 9586488}
7170
+ {"current_steps": 35760, "total_steps": 38150, "loss": 0.1763, "lr": 5.958843182701263e-07, "epoch": 9.373525557011796, "percentage": 93.74, "elapsed_time": "0:59:06", "remaining_time": "0:03:57", "throughput": 2703.27, "total_tokens": 9587880}
7171
+ {"current_steps": 35765, "total_steps": 38150, "loss": 0.1175, "lr": 5.934046230888418e-07, "epoch": 9.374836173001311, "percentage": 93.75, "elapsed_time": "0:59:07", "remaining_time": "0:03:56", "throughput": 2703.26, "total_tokens": 9588920}
7172
+ {"current_steps": 35770, "total_steps": 38150, "loss": 0.1124, "lr": 5.909300361658015e-07, "epoch": 9.376146788990825, "percentage": 93.76, "elapsed_time": "0:59:07", "remaining_time": "0:03:56", "throughput": 2703.24, "total_tokens": 9589976}
7173
+ {"current_steps": 35775, "total_steps": 38150, "loss": 0.0959, "lr": 5.8846055801893e-07, "epoch": 9.37745740498034, "percentage": 93.77, "elapsed_time": "0:59:08", "remaining_time": "0:03:55", "throughput": 2703.33, "total_tokens": 9591704}
7174
+ {"current_steps": 35780, "total_steps": 38150, "loss": 0.2196, "lr": 5.859961891650861e-07, "epoch": 9.378768020969856, "percentage": 93.79, "elapsed_time": "0:59:08", "remaining_time": "0:03:55", "throughput": 2703.35, "total_tokens": 9593000}
7175
+ {"current_steps": 35785, "total_steps": 38150, "loss": 0.1422, "lr": 5.835369301200516e-07, "epoch": 9.380078636959372, "percentage": 93.8, "elapsed_time": "0:59:09", "remaining_time": "0:03:54", "throughput": 2703.4, "total_tokens": 9594440}
7176
+ {"current_steps": 35790, "total_steps": 38150, "loss": 0.0781, "lr": 5.810827813985509e-07, "epoch": 9.381389252948885, "percentage": 93.81, "elapsed_time": "0:59:09", "remaining_time": "0:03:54", "throughput": 2703.4, "total_tokens": 9595672}
7177
+ {"current_steps": 35795, "total_steps": 38150, "loss": 0.1189, "lr": 5.786337435142314e-07, "epoch": 9.382699868938401, "percentage": 93.83, "elapsed_time": "0:59:09", "remaining_time": "0:03:53", "throughput": 2703.47, "total_tokens": 9597112}
7178
+ {"current_steps": 35800, "total_steps": 38150, "loss": 0.0633, "lr": 5.761898169796721e-07, "epoch": 9.384010484927916, "percentage": 93.84, "elapsed_time": "0:59:10", "remaining_time": "0:03:53", "throughput": 2703.51, "total_tokens": 9598456}
7179
+ {"current_steps": 35805, "total_steps": 38150, "loss": 0.2168, "lr": 5.73751002306383e-07, "epoch": 9.385321100917432, "percentage": 93.85, "elapsed_time": "0:59:10", "remaining_time": "0:03:52", "throughput": 2703.52, "total_tokens": 9599608}
7180
+ {"current_steps": 35810, "total_steps": 38150, "loss": 0.1245, "lr": 5.713173000048033e-07, "epoch": 9.386631716906946, "percentage": 93.87, "elapsed_time": "0:59:11", "remaining_time": "0:03:52", "throughput": 2703.56, "total_tokens": 9601000}
7181
+ {"current_steps": 35815, "total_steps": 38150, "loss": 0.1228, "lr": 5.688887105843032e-07, "epoch": 9.387942332896461, "percentage": 93.88, "elapsed_time": "0:59:11", "remaining_time": "0:03:51", "throughput": 2703.58, "total_tokens": 9602232}
7182
+ {"current_steps": 35820, "total_steps": 38150, "loss": 0.1257, "lr": 5.664652345531846e-07, "epoch": 9.389252948885977, "percentage": 93.89, "elapsed_time": "0:59:12", "remaining_time": "0:03:51", "throughput": 2703.59, "total_tokens": 9603400}
7183
+ {"current_steps": 35825, "total_steps": 38150, "loss": 0.1649, "lr": 5.640468724186721e-07, "epoch": 9.390563564875492, "percentage": 93.91, "elapsed_time": "0:59:12", "remaining_time": "0:03:50", "throughput": 2703.6, "total_tokens": 9604648}
7184
+ {"current_steps": 35830, "total_steps": 38150, "loss": 0.0858, "lr": 5.616336246869275e-07, "epoch": 9.391874180865006, "percentage": 93.92, "elapsed_time": "0:59:12", "remaining_time": "0:03:50", "throughput": 2703.65, "total_tokens": 9606008}
7185
+ {"current_steps": 35835, "total_steps": 38150, "loss": 0.1968, "lr": 5.592254918630385e-07, "epoch": 9.393184796854522, "percentage": 93.93, "elapsed_time": "0:59:13", "remaining_time": "0:03:49", "throughput": 2703.64, "total_tokens": 9607304}
7186
+ {"current_steps": 35840, "total_steps": 38150, "loss": 0.1554, "lr": 5.568224744510242e-07, "epoch": 9.394495412844037, "percentage": 93.94, "elapsed_time": "0:59:13", "remaining_time": "0:03:49", "throughput": 2703.63, "total_tokens": 9608360}
7187
+ {"current_steps": 35845, "total_steps": 38150, "loss": 0.1332, "lr": 5.544245729538322e-07, "epoch": 9.395806028833551, "percentage": 93.96, "elapsed_time": "0:59:14", "remaining_time": "0:03:48", "throughput": 2703.68, "total_tokens": 9609720}
7188
+ {"current_steps": 35850, "total_steps": 38150, "loss": 0.1039, "lr": 5.520317878733389e-07, "epoch": 9.397116644823067, "percentage": 93.97, "elapsed_time": "0:59:14", "remaining_time": "0:03:48", "throughput": 2703.6, "total_tokens": 9610536}
7189
+ {"current_steps": 35855, "total_steps": 38150, "loss": 0.1462, "lr": 5.496441197103463e-07, "epoch": 9.398427260812582, "percentage": 93.98, "elapsed_time": "0:59:15", "remaining_time": "0:03:47", "throughput": 2703.61, "total_tokens": 9611832}
7190
+ {"current_steps": 35860, "total_steps": 38150, "loss": 0.1491, "lr": 5.472615689645965e-07, "epoch": 9.399737876802098, "percentage": 94.0, "elapsed_time": "0:59:15", "remaining_time": "0:03:47", "throughput": 2703.7, "total_tokens": 9613416}
7191
+ {"current_steps": 35865, "total_steps": 38150, "loss": 0.1124, "lr": 5.44884136134749e-07, "epoch": 9.401048492791611, "percentage": 94.01, "elapsed_time": "0:59:16", "remaining_time": "0:03:46", "throughput": 2703.64, "total_tokens": 9614296}
7192
+ {"current_steps": 35870, "total_steps": 38150, "loss": 0.0312, "lr": 5.425118217183945e-07, "epoch": 9.402359108781127, "percentage": 94.02, "elapsed_time": "0:59:16", "remaining_time": "0:03:46", "throughput": 2703.58, "total_tokens": 9615176}
7193
+ {"current_steps": 35875, "total_steps": 38150, "loss": 0.1195, "lr": 5.40144626212058e-07, "epoch": 9.403669724770642, "percentage": 94.04, "elapsed_time": "0:59:16", "remaining_time": "0:03:45", "throughput": 2703.6, "total_tokens": 9616472}
7194
+ {"current_steps": 35880, "total_steps": 38150, "loss": 0.1108, "lr": 5.377825501111849e-07, "epoch": 9.404980340760158, "percentage": 94.05, "elapsed_time": "0:59:17", "remaining_time": "0:03:45", "throughput": 2703.64, "total_tokens": 9617784}
7195
+ {"current_steps": 35885, "total_steps": 38150, "loss": 0.1249, "lr": 5.354255939101576e-07, "epoch": 9.406290956749672, "percentage": 94.06, "elapsed_time": "0:59:17", "remaining_time": "0:03:44", "throughput": 2703.71, "total_tokens": 9619272}
7196
+ {"current_steps": 35890, "total_steps": 38150, "loss": 0.13, "lr": 5.330737581022843e-07, "epoch": 9.407601572739187, "percentage": 94.08, "elapsed_time": "0:59:18", "remaining_time": "0:03:44", "throughput": 2703.73, "total_tokens": 9620472}
7197
+ {"current_steps": 35895, "total_steps": 38150, "loss": 0.1398, "lr": 5.307270431797962e-07, "epoch": 9.408912188728703, "percentage": 94.09, "elapsed_time": "0:59:18", "remaining_time": "0:03:43", "throughput": 2703.79, "total_tokens": 9622040}
7198
+ {"current_steps": 35900, "total_steps": 38150, "loss": 0.1704, "lr": 5.283854496338586e-07, "epoch": 9.410222804718218, "percentage": 94.1, "elapsed_time": "0:59:19", "remaining_time": "0:03:43", "throughput": 2703.82, "total_tokens": 9623304}
7199
+ {"current_steps": 35905, "total_steps": 38150, "loss": 0.1038, "lr": 5.26048977954563e-07, "epoch": 9.411533420707732, "percentage": 94.12, "elapsed_time": "0:59:19", "remaining_time": "0:03:42", "throughput": 2703.9, "total_tokens": 9624936}
7200
+ {"current_steps": 35910, "total_steps": 38150, "loss": 0.1058, "lr": 5.237176286309292e-07, "epoch": 9.412844036697248, "percentage": 94.13, "elapsed_time": "0:59:20", "remaining_time": "0:03:42", "throughput": 2704.03, "total_tokens": 9626952}
7201
+ {"current_steps": 35915, "total_steps": 38150, "loss": 0.2019, "lr": 5.213914021509031e-07, "epoch": 9.414154652686763, "percentage": 94.14, "elapsed_time": "0:59:20", "remaining_time": "0:03:41", "throughput": 2704.05, "total_tokens": 9628200}
7202
+ {"current_steps": 35920, "total_steps": 38150, "loss": 0.0869, "lr": 5.190702990013618e-07, "epoch": 9.415465268676279, "percentage": 94.15, "elapsed_time": "0:59:21", "remaining_time": "0:03:41", "throughput": 2704.08, "total_tokens": 9629432}
7203
+ {"current_steps": 35925, "total_steps": 38150, "loss": 0.0715, "lr": 5.167543196681058e-07, "epoch": 9.416775884665793, "percentage": 94.17, "elapsed_time": "0:59:21", "remaining_time": "0:03:40", "throughput": 2704.14, "total_tokens": 9630824}
7204
+ {"current_steps": 35930, "total_steps": 38150, "loss": 0.1621, "lr": 5.144434646358665e-07, "epoch": 9.418086500655308, "percentage": 94.18, "elapsed_time": "0:59:21", "remaining_time": "0:03:40", "throughput": 2704.16, "total_tokens": 9632104}
7205
+ {"current_steps": 35935, "total_steps": 38150, "loss": 0.1022, "lr": 5.121377343883016e-07, "epoch": 9.419397116644824, "percentage": 94.19, "elapsed_time": "0:59:22", "remaining_time": "0:03:39", "throughput": 2704.23, "total_tokens": 9633704}
7206
+ {"current_steps": 35940, "total_steps": 38150, "loss": 0.0729, "lr": 5.098371294080001e-07, "epoch": 9.420707732634337, "percentage": 94.21, "elapsed_time": "0:59:22", "remaining_time": "0:03:39", "throughput": 2704.25, "total_tokens": 9634968}
7207
+ {"current_steps": 35945, "total_steps": 38150, "loss": 0.2971, "lr": 5.075416501764685e-07, "epoch": 9.422018348623853, "percentage": 94.22, "elapsed_time": "0:59:23", "remaining_time": "0:03:38", "throughput": 2704.29, "total_tokens": 9636392}
7208
+ {"current_steps": 35950, "total_steps": 38150, "loss": 0.1308, "lr": 5.052512971741529e-07, "epoch": 9.423328964613368, "percentage": 94.23, "elapsed_time": "0:59:23", "remaining_time": "0:03:38", "throughput": 2704.22, "total_tokens": 9637400}
7209
+ {"current_steps": 35955, "total_steps": 38150, "loss": 0.1734, "lr": 5.029660708804146e-07, "epoch": 9.424639580602884, "percentage": 94.25, "elapsed_time": "0:59:24", "remaining_time": "0:03:37", "throughput": 2704.3, "total_tokens": 9638952}
7210
+ {"current_steps": 35960, "total_steps": 38150, "loss": 0.1645, "lr": 5.006859717735512e-07, "epoch": 9.425950196592398, "percentage": 94.26, "elapsed_time": "0:59:24", "remaining_time": "0:03:37", "throughput": 2704.33, "total_tokens": 9640232}
7211
+ {"current_steps": 35965, "total_steps": 38150, "loss": 0.1239, "lr": 4.984110003307812e-07, "epoch": 9.427260812581913, "percentage": 94.27, "elapsed_time": "0:59:25", "remaining_time": "0:03:36", "throughput": 2704.34, "total_tokens": 9641384}
7212
+ {"current_steps": 35970, "total_steps": 38150, "loss": 0.0858, "lr": 4.961411570282543e-07, "epoch": 9.428571428571429, "percentage": 94.29, "elapsed_time": "0:59:25", "remaining_time": "0:03:36", "throughput": 2704.46, "total_tokens": 9643128}
7213
+ {"current_steps": 35975, "total_steps": 38150, "loss": 0.0895, "lr": 4.938764423410458e-07, "epoch": 9.429882044560944, "percentage": 94.3, "elapsed_time": "0:59:26", "remaining_time": "0:03:35", "throughput": 2704.47, "total_tokens": 9644312}
7214
+ {"current_steps": 35980, "total_steps": 38150, "loss": 0.1189, "lr": 4.916168567431544e-07, "epoch": 9.431192660550458, "percentage": 94.31, "elapsed_time": "0:59:26", "remaining_time": "0:03:35", "throughput": 2704.48, "total_tokens": 9645544}
7215
+ {"current_steps": 35985, "total_steps": 38150, "loss": 0.1423, "lr": 4.893624007075048e-07, "epoch": 9.432503276539974, "percentage": 94.33, "elapsed_time": "0:59:26", "remaining_time": "0:03:34", "throughput": 2704.53, "total_tokens": 9646888}
7216
+ {"current_steps": 35990, "total_steps": 38150, "loss": 0.1427, "lr": 4.871130747059554e-07, "epoch": 9.43381389252949, "percentage": 94.34, "elapsed_time": "0:59:27", "remaining_time": "0:03:34", "throughput": 2704.59, "total_tokens": 9648360}
7217
+ {"current_steps": 35995, "total_steps": 38150, "loss": 0.1499, "lr": 4.848688792092798e-07, "epoch": 9.435124508519005, "percentage": 94.35, "elapsed_time": "0:59:27", "remaining_time": "0:03:33", "throughput": 2704.63, "total_tokens": 9649608}
7218
+ {"current_steps": 36000, "total_steps": 38150, "loss": 0.2261, "lr": 4.826298146871938e-07, "epoch": 9.436435124508519, "percentage": 94.36, "elapsed_time": "0:59:28", "remaining_time": "0:03:33", "throughput": 2704.65, "total_tokens": 9650840}
7219
+ {"current_steps": 36005, "total_steps": 38150, "loss": 0.1197, "lr": 4.803958816083254e-07, "epoch": 9.437745740498034, "percentage": 94.38, "elapsed_time": "0:59:28", "remaining_time": "0:03:32", "throughput": 2704.63, "total_tokens": 9651928}
7220
+ {"current_steps": 36010, "total_steps": 38150, "loss": 0.1398, "lr": 4.781670804402283e-07, "epoch": 9.43905635648755, "percentage": 94.39, "elapsed_time": "0:59:29", "remaining_time": "0:03:32", "throughput": 2704.66, "total_tokens": 9653192}
7221
+ {"current_steps": 36015, "total_steps": 38150, "loss": 0.1074, "lr": 4.75943411649396e-07, "epoch": 9.440366972477065, "percentage": 94.4, "elapsed_time": "0:59:29", "remaining_time": "0:03:31", "throughput": 2704.62, "total_tokens": 9654152}
7222
+ {"current_steps": 36020, "total_steps": 38150, "loss": 0.1729, "lr": 4.7372487570123135e-07, "epoch": 9.441677588466579, "percentage": 94.42, "elapsed_time": "0:59:29", "remaining_time": "0:03:31", "throughput": 2704.68, "total_tokens": 9655656}
7223
+ {"current_steps": 36025, "total_steps": 38150, "loss": 0.1046, "lr": 4.7151147306007117e-07, "epoch": 9.442988204456094, "percentage": 94.43, "elapsed_time": "0:59:30", "remaining_time": "0:03:30", "throughput": 2704.66, "total_tokens": 9656792}
7224
+ {"current_steps": 36030, "total_steps": 38150, "loss": 0.1735, "lr": 4.6930320418917817e-07, "epoch": 9.44429882044561, "percentage": 94.44, "elapsed_time": "0:59:30", "remaining_time": "0:03:30", "throughput": 2704.67, "total_tokens": 9657928}
7225
+ {"current_steps": 36035, "total_steps": 38150, "loss": 0.1178, "lr": 4.6710006955073826e-07, "epoch": 9.445609436435124, "percentage": 94.46, "elapsed_time": "0:59:31", "remaining_time": "0:03:29", "throughput": 2704.66, "total_tokens": 9659032}
7226
+ {"current_steps": 36040, "total_steps": 38150, "loss": 0.1347, "lr": 4.649020696058659e-07, "epoch": 9.44692005242464, "percentage": 94.47, "elapsed_time": "0:59:31", "remaining_time": "0:03:29", "throughput": 2704.65, "total_tokens": 9660200}
7227
+ {"current_steps": 36045, "total_steps": 38150, "loss": 0.0538, "lr": 4.627092048145959e-07, "epoch": 9.448230668414155, "percentage": 94.48, "elapsed_time": "0:59:32", "remaining_time": "0:03:28", "throughput": 2704.67, "total_tokens": 9661432}
7228
+ {"current_steps": 36050, "total_steps": 38150, "loss": 0.2051, "lr": 4.6052147563589167e-07, "epoch": 9.44954128440367, "percentage": 94.5, "elapsed_time": "0:59:32", "remaining_time": "0:03:28", "throughput": 2704.7, "total_tokens": 9662776}
7229
+ {"current_steps": 36055, "total_steps": 38150, "loss": 0.1153, "lr": 4.583388825276397e-07, "epoch": 9.450851900393184, "percentage": 94.51, "elapsed_time": "0:59:33", "remaining_time": "0:03:27", "throughput": 2704.67, "total_tokens": 9663896}
7230
+ {"current_steps": 36060, "total_steps": 38150, "loss": 0.0696, "lr": 4.561614259466579e-07, "epoch": 9.4521625163827, "percentage": 94.52, "elapsed_time": "0:59:33", "remaining_time": "0:03:27", "throughput": 2704.7, "total_tokens": 9665144}
7231
+ {"current_steps": 36065, "total_steps": 38150, "loss": 0.3006, "lr": 4.53989106348679e-07, "epoch": 9.453473132372215, "percentage": 94.53, "elapsed_time": "0:59:33", "remaining_time": "0:03:26", "throughput": 2704.77, "total_tokens": 9666616}
7232
+ {"current_steps": 36070, "total_steps": 38150, "loss": 0.082, "lr": 4.518219241883698e-07, "epoch": 9.45478374836173, "percentage": 94.55, "elapsed_time": "0:59:34", "remaining_time": "0:03:26", "throughput": 2704.77, "total_tokens": 9667768}
7233
+ {"current_steps": 36075, "total_steps": 38150, "loss": 0.0883, "lr": 4.4965987991931466e-07, "epoch": 9.456094364351245, "percentage": 94.56, "elapsed_time": "0:59:34", "remaining_time": "0:03:25", "throughput": 2704.83, "total_tokens": 9669192}
7234
+ {"current_steps": 36080, "total_steps": 38150, "loss": 0.1166, "lr": 4.475029739940295e-07, "epoch": 9.45740498034076, "percentage": 94.57, "elapsed_time": "0:59:35", "remaining_time": "0:03:25", "throughput": 2704.84, "total_tokens": 9670392}
7235
+ {"current_steps": 36085, "total_steps": 38150, "loss": 0.1345, "lr": 4.4535120686394484e-07, "epoch": 9.458715596330276, "percentage": 94.59, "elapsed_time": "0:59:35", "remaining_time": "0:03:24", "throughput": 2704.86, "total_tokens": 9671656}
7236
+ {"current_steps": 36090, "total_steps": 38150, "loss": 0.0631, "lr": 4.4320457897942815e-07, "epoch": 9.460026212319791, "percentage": 94.6, "elapsed_time": "0:59:36", "remaining_time": "0:03:24", "throughput": 2704.84, "total_tokens": 9672696}
7237
+ {"current_steps": 36095, "total_steps": 38150, "loss": 0.1272, "lr": 4.410630907897617e-07, "epoch": 9.461336828309305, "percentage": 94.61, "elapsed_time": "0:59:36", "remaining_time": "0:03:23", "throughput": 2704.9, "total_tokens": 9674152}
7238
+ {"current_steps": 36100, "total_steps": 38150, "loss": 0.0821, "lr": 4.389267427431565e-07, "epoch": 9.46264744429882, "percentage": 94.63, "elapsed_time": "0:59:36", "remaining_time": "0:03:23", "throughput": 2704.89, "total_tokens": 9675272}
7239
+ {"current_steps": 36105, "total_steps": 38150, "loss": 0.1227, "lr": 4.367955352867437e-07, "epoch": 9.463958060288336, "percentage": 94.64, "elapsed_time": "0:59:37", "remaining_time": "0:03:22", "throughput": 2704.94, "total_tokens": 9676600}
7240
+ {"current_steps": 36110, "total_steps": 38150, "loss": 0.096, "lr": 4.346694688665831e-07, "epoch": 9.46526867627785, "percentage": 94.65, "elapsed_time": "0:59:37", "remaining_time": "0:03:22", "throughput": 2704.92, "total_tokens": 9677656}
7241
+ {"current_steps": 36115, "total_steps": 38150, "loss": 0.0905, "lr": 4.325485439276578e-07, "epoch": 9.466579292267365, "percentage": 94.67, "elapsed_time": "0:59:38", "remaining_time": "0:03:21", "throughput": 2704.9, "total_tokens": 9678696}
7242
+ {"current_steps": 36120, "total_steps": 38150, "loss": 0.1167, "lr": 4.304327609138736e-07, "epoch": 9.46788990825688, "percentage": 94.68, "elapsed_time": "0:59:38", "remaining_time": "0:03:21", "throughput": 2704.98, "total_tokens": 9680280}
7243
+ {"current_steps": 36125, "total_steps": 38150, "loss": 0.1346, "lr": 4.2832212026805986e-07, "epoch": 9.469200524246396, "percentage": 94.69, "elapsed_time": "0:59:39", "remaining_time": "0:03:20", "throughput": 2704.98, "total_tokens": 9681432}
7244
+ {"current_steps": 36130, "total_steps": 38150, "loss": 0.1547, "lr": 4.262166224319686e-07, "epoch": 9.47051114023591, "percentage": 94.71, "elapsed_time": "0:59:39", "remaining_time": "0:03:20", "throughput": 2705.0, "total_tokens": 9682744}
7245
+ {"current_steps": 36135, "total_steps": 38150, "loss": 0.2189, "lr": 4.2411626784628065e-07, "epoch": 9.471821756225426, "percentage": 94.72, "elapsed_time": "0:59:40", "remaining_time": "0:03:19", "throughput": 2705.28, "total_tokens": 9686088}
7246
+ {"current_steps": 36140, "total_steps": 38150, "loss": 0.1486, "lr": 4.220210569505917e-07, "epoch": 9.473132372214941, "percentage": 94.73, "elapsed_time": "0:59:40", "remaining_time": "0:03:19", "throughput": 2705.29, "total_tokens": 9687240}
7247
+ {"current_steps": 36145, "total_steps": 38150, "loss": 0.1304, "lr": 4.199309901834286e-07, "epoch": 9.474442988204457, "percentage": 94.74, "elapsed_time": "0:59:41", "remaining_time": "0:03:18", "throughput": 2705.3, "total_tokens": 9688440}
7248
+ {"current_steps": 36150, "total_steps": 38150, "loss": 0.1937, "lr": 4.178460679822388e-07, "epoch": 9.47575360419397, "percentage": 94.76, "elapsed_time": "0:59:41", "remaining_time": "0:03:18", "throughput": 2705.36, "total_tokens": 9690136}
7249
+ {"current_steps": 36155, "total_steps": 38150, "loss": 0.0815, "lr": 4.157662907833898e-07, "epoch": 9.477064220183486, "percentage": 94.77, "elapsed_time": "0:59:42", "remaining_time": "0:03:17", "throughput": 2705.38, "total_tokens": 9691352}
7250
+ {"current_steps": 36160, "total_steps": 38150, "loss": 0.1974, "lr": 4.1369165902218065e-07, "epoch": 9.478374836173002, "percentage": 94.78, "elapsed_time": "0:59:42", "remaining_time": "0:03:17", "throughput": 2705.44, "total_tokens": 9692760}
7251
+ {"current_steps": 36165, "total_steps": 38150, "loss": 0.1761, "lr": 4.1162217313282236e-07, "epoch": 9.479685452162517, "percentage": 94.8, "elapsed_time": "0:59:43", "remaining_time": "0:03:16", "throughput": 2705.47, "total_tokens": 9694024}
7252
+ {"current_steps": 36170, "total_steps": 38150, "loss": 0.1141, "lr": 4.0955783354846013e-07, "epoch": 9.48099606815203, "percentage": 94.81, "elapsed_time": "0:59:43", "remaining_time": "0:03:16", "throughput": 2705.48, "total_tokens": 9695192}
7253
+ {"current_steps": 36175, "total_steps": 38150, "loss": 0.161, "lr": 4.0749864070115387e-07, "epoch": 9.482306684141546, "percentage": 94.82, "elapsed_time": "0:59:43", "remaining_time": "0:03:15", "throughput": 2705.51, "total_tokens": 9696536}
7254
+ {"current_steps": 36180, "total_steps": 38150, "loss": 0.0644, "lr": 4.0544459502188934e-07, "epoch": 9.483617300131062, "percentage": 94.84, "elapsed_time": "0:59:44", "remaining_time": "0:03:15", "throughput": 2705.54, "total_tokens": 9697928}
7255
+ {"current_steps": 36185, "total_steps": 38150, "loss": 0.187, "lr": 4.0339569694057554e-07, "epoch": 9.484927916120578, "percentage": 94.85, "elapsed_time": "0:59:44", "remaining_time": "0:03:14", "throughput": 2705.59, "total_tokens": 9699272}
7256
+ {"current_steps": 36190, "total_steps": 38150, "loss": 0.223, "lr": 4.013519468860416e-07, "epoch": 9.486238532110091, "percentage": 94.86, "elapsed_time": "0:59:45", "remaining_time": "0:03:14", "throughput": 2705.68, "total_tokens": 9700968}
7257
+ {"current_steps": 36195, "total_steps": 38150, "loss": 0.1992, "lr": 3.993133452860426e-07, "epoch": 9.487549148099607, "percentage": 94.88, "elapsed_time": "0:59:45", "remaining_time": "0:03:13", "throughput": 2705.74, "total_tokens": 9702344}
7258
+ {"current_steps": 36200, "total_steps": 38150, "loss": 0.1482, "lr": 3.9727989256725384e-07, "epoch": 9.488859764089122, "percentage": 94.89, "elapsed_time": "0:59:46", "remaining_time": "0:03:13", "throughput": 2705.7, "total_tokens": 9703352}
7259
+ {"current_steps": 36205, "total_steps": 38150, "loss": 0.1234, "lr": 3.9525158915527383e-07, "epoch": 9.490170380078636, "percentage": 94.9, "elapsed_time": "0:59:46", "remaining_time": "0:03:12", "throughput": 2705.78, "total_tokens": 9704840}
7260
+ {"current_steps": 36210, "total_steps": 38150, "loss": 0.1153, "lr": 3.932284354746213e-07, "epoch": 9.491480996068152, "percentage": 94.91, "elapsed_time": "0:59:47", "remaining_time": "0:03:12", "throughput": 2705.82, "total_tokens": 9706248}
7261
+ {"current_steps": 36215, "total_steps": 38150, "loss": 0.0911, "lr": 3.9121043194874365e-07, "epoch": 9.492791612057667, "percentage": 94.93, "elapsed_time": "0:59:47", "remaining_time": "0:03:11", "throughput": 2705.82, "total_tokens": 9707384}
7262
+ {"current_steps": 36220, "total_steps": 38150, "loss": 0.0947, "lr": 3.8919757899999743e-07, "epoch": 9.494102228047183, "percentage": 94.94, "elapsed_time": "0:59:48", "remaining_time": "0:03:11", "throughput": 2705.92, "total_tokens": 9709208}
7263
+ {"current_steps": 36225, "total_steps": 38150, "loss": 0.1031, "lr": 3.8718987704967345e-07, "epoch": 9.495412844036696, "percentage": 94.95, "elapsed_time": "0:59:48", "remaining_time": "0:03:10", "throughput": 2705.92, "total_tokens": 9710392}
7264
+ {"current_steps": 36230, "total_steps": 38150, "loss": 0.1849, "lr": 3.8518732651798284e-07, "epoch": 9.496723460026212, "percentage": 94.97, "elapsed_time": "0:59:48", "remaining_time": "0:03:10", "throughput": 2705.95, "total_tokens": 9711624}
7265
+ {"current_steps": 36235, "total_steps": 38150, "loss": 0.108, "lr": 3.8318992782405414e-07, "epoch": 9.498034076015728, "percentage": 94.98, "elapsed_time": "0:59:49", "remaining_time": "0:03:09", "throughput": 2705.93, "total_tokens": 9712680}
7266
+ {"current_steps": 36240, "total_steps": 38150, "loss": 0.0862, "lr": 3.811976813859364e-07, "epoch": 9.499344692005243, "percentage": 94.99, "elapsed_time": "0:59:49", "remaining_time": "0:03:09", "throughput": 2705.97, "total_tokens": 9714008}
7267
+ {"current_steps": 36245, "total_steps": 38150, "loss": 0.1308, "lr": 3.792105876206098e-07, "epoch": 9.500655307994757, "percentage": 95.01, "elapsed_time": "0:59:50", "remaining_time": "0:03:08", "throughput": 2705.98, "total_tokens": 9715176}
7268
+ {"current_steps": 36250, "total_steps": 38150, "loss": 0.1008, "lr": 3.7722864694396397e-07, "epoch": 9.501965923984272, "percentage": 95.02, "elapsed_time": "0:59:50", "remaining_time": "0:03:08", "throughput": 2705.97, "total_tokens": 9716296}
7269
+ {"current_steps": 36252, "total_steps": 38150, "eval_loss": 0.9084105491638184, "epoch": 9.502490170380078, "percentage": 95.02, "elapsed_time": "1:00:07", "remaining_time": "0:03:08", "throughput": 2693.48, "total_tokens": 9716920}
7270
+ {"current_steps": 36255, "total_steps": 38150, "loss": 0.1074, "lr": 3.7525185977081713e-07, "epoch": 9.503276539973788, "percentage": 95.03, "elapsed_time": "1:00:10", "remaining_time": "0:03:08", "throughput": 2691.83, "total_tokens": 9719064}
7271
+ {"current_steps": 36260, "total_steps": 38150, "loss": 0.136, "lr": 3.7328022651490767e-07, "epoch": 9.504587155963304, "percentage": 95.05, "elapsed_time": "1:00:11", "remaining_time": "0:03:08", "throughput": 2691.89, "total_tokens": 9720472}
7272
+ {"current_steps": 36265, "total_steps": 38150, "loss": 0.1351, "lr": 3.7131374758889446e-07, "epoch": 9.505897771952817, "percentage": 95.06, "elapsed_time": "1:00:11", "remaining_time": "0:03:07", "throughput": 2691.94, "total_tokens": 9721896}
7273
+ {"current_steps": 36270, "total_steps": 38150, "loss": 0.152, "lr": 3.693524234043594e-07, "epoch": 9.507208387942333, "percentage": 95.07, "elapsed_time": "1:00:11", "remaining_time": "0:03:07", "throughput": 2692.02, "total_tokens": 9723480}
7274
+ {"current_steps": 36275, "total_steps": 38150, "loss": 0.1421, "lr": 3.6739625437180457e-07, "epoch": 9.508519003931848, "percentage": 95.09, "elapsed_time": "1:00:12", "remaining_time": "0:03:06", "throughput": 2692.13, "total_tokens": 9725224}
7275
+ {"current_steps": 36280, "total_steps": 38150, "loss": 0.3595, "lr": 3.654452409006498e-07, "epoch": 9.509829619921362, "percentage": 95.1, "elapsed_time": "1:00:12", "remaining_time": "0:03:06", "throughput": 2692.16, "total_tokens": 9726520}