rbelanec commited on
Commit
178f41a
·
verified ·
1 Parent(s): 622da26

Training in progress, step 34632

Browse files
Files changed (2) hide show
  1. adapter_model.safetensors +1 -1
  2. trainer_log.jsonl +386 -0
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:849d41327d28828dce8ca4960854b0eb391d9a9ff7b731788ac650e5ca2ac1f5
3
  size 798032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c22fd255c6b57f6b4d6cf0e0558d759850872d60ac68b2c2b5dc991239e3ce0c
3
  size 798032
trainer_log.jsonl CHANGED
@@ -6559,3 +6559,389 @@
6559
  {"current_steps": 32710, "total_steps": 38480, "loss": 0.1168, "lr": 3.3482310747393e-06, "epoch": 17.0010395010395, "percentage": 85.01, "elapsed_time": "1:18:50", "remaining_time": "0:13:54", "throughput": 1318.21, "total_tokens": 6235336}
6560
  {"current_steps": 32715, "total_steps": 38480, "loss": 0.1974, "lr": 3.3425645942481126e-06, "epoch": 17.003638253638254, "percentage": 85.02, "elapsed_time": "1:18:50", "remaining_time": "0:13:53", "throughput": 1318.17, "total_tokens": 6236264}
6561
  {"current_steps": 32720, "total_steps": 38480, "loss": 0.0464, "lr": 3.336902569207484e-06, "epoch": 17.006237006237008, "percentage": 85.03, "elapsed_time": "1:18:51", "remaining_time": "0:13:52", "throughput": 1318.18, "total_tokens": 6237224}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6559
  {"current_steps": 32710, "total_steps": 38480, "loss": 0.1168, "lr": 3.3482310747393e-06, "epoch": 17.0010395010395, "percentage": 85.01, "elapsed_time": "1:18:50", "remaining_time": "0:13:54", "throughput": 1318.21, "total_tokens": 6235336}
6560
  {"current_steps": 32715, "total_steps": 38480, "loss": 0.1974, "lr": 3.3425645942481126e-06, "epoch": 17.003638253638254, "percentage": 85.02, "elapsed_time": "1:18:50", "remaining_time": "0:13:53", "throughput": 1318.17, "total_tokens": 6236264}
6561
  {"current_steps": 32720, "total_steps": 38480, "loss": 0.0464, "lr": 3.336902569207484e-06, "epoch": 17.006237006237008, "percentage": 85.03, "elapsed_time": "1:18:51", "remaining_time": "0:13:52", "throughput": 1318.18, "total_tokens": 6237224}
6562
+ {"current_steps": 32725, "total_steps": 38480, "loss": 0.1657, "lr": 3.331245000782221e-06, "epoch": 17.008835758835758, "percentage": 85.04, "elapsed_time": "1:18:52", "remaining_time": "0:13:52", "throughput": 1318.18, "total_tokens": 6238184}
6563
+ {"current_steps": 32730, "total_steps": 38480, "loss": 0.1454, "lr": 3.3255918901362342e-06, "epoch": 17.011434511434512, "percentage": 85.06, "elapsed_time": "1:18:53", "remaining_time": "0:13:51", "throughput": 1318.19, "total_tokens": 6239144}
6564
+ {"current_steps": 32735, "total_steps": 38480, "loss": 0.1595, "lr": 3.3199432384324964e-06, "epoch": 17.014033264033262, "percentage": 85.07, "elapsed_time": "1:18:53", "remaining_time": "0:13:50", "throughput": 1318.19, "total_tokens": 6240104}
6565
+ {"current_steps": 32740, "total_steps": 38480, "loss": 0.1991, "lr": 3.3142990468330636e-06, "epoch": 17.016632016632016, "percentage": 85.08, "elapsed_time": "1:18:54", "remaining_time": "0:13:50", "throughput": 1318.19, "total_tokens": 6241000}
6566
+ {"current_steps": 32745, "total_steps": 38480, "loss": 0.0993, "lr": 3.308659316499094e-06, "epoch": 17.01923076923077, "percentage": 85.1, "elapsed_time": "1:18:55", "remaining_time": "0:13:49", "throughput": 1318.19, "total_tokens": 6241928}
6567
+ {"current_steps": 32750, "total_steps": 38480, "loss": 0.1135, "lr": 3.3030240485908025e-06, "epoch": 17.02182952182952, "percentage": 85.11, "elapsed_time": "1:18:55", "remaining_time": "0:13:48", "throughput": 1318.21, "total_tokens": 6242952}
6568
+ {"current_steps": 32755, "total_steps": 38480, "loss": 0.1664, "lr": 3.297393244267505e-06, "epoch": 17.024428274428274, "percentage": 85.12, "elapsed_time": "1:18:56", "remaining_time": "0:13:47", "throughput": 1318.22, "total_tokens": 6243912}
6569
+ {"current_steps": 32760, "total_steps": 38480, "loss": 0.1327, "lr": 3.2917669046875847e-06, "epoch": 17.027027027027028, "percentage": 85.14, "elapsed_time": "1:18:57", "remaining_time": "0:13:47", "throughput": 1318.22, "total_tokens": 6244872}
6570
+ {"current_steps": 32765, "total_steps": 38480, "loss": 0.1171, "lr": 3.286145031008522e-06, "epoch": 17.02962577962578, "percentage": 85.15, "elapsed_time": "1:18:58", "remaining_time": "0:13:46", "throughput": 1318.22, "total_tokens": 6245800}
6571
+ {"current_steps": 32770, "total_steps": 38480, "loss": 0.1435, "lr": 3.28052762438687e-06, "epoch": 17.032224532224532, "percentage": 85.16, "elapsed_time": "1:18:58", "remaining_time": "0:13:45", "throughput": 1318.24, "total_tokens": 6246824}
6572
+ {"current_steps": 32775, "total_steps": 38480, "loss": 0.0954, "lr": 3.2749146859782587e-06, "epoch": 17.034823284823286, "percentage": 85.17, "elapsed_time": "1:18:59", "remaining_time": "0:13:44", "throughput": 1318.26, "total_tokens": 6247816}
6573
+ {"current_steps": 32780, "total_steps": 38480, "loss": 0.1392, "lr": 3.269306216937404e-06, "epoch": 17.037422037422036, "percentage": 85.19, "elapsed_time": "1:19:00", "remaining_time": "0:13:44", "throughput": 1318.27, "total_tokens": 6248776}
6574
+ {"current_steps": 32785, "total_steps": 38480, "loss": 0.1601, "lr": 3.2637022184181022e-06, "epoch": 17.04002079002079, "percentage": 85.2, "elapsed_time": "1:19:00", "remaining_time": "0:13:43", "throughput": 1318.26, "total_tokens": 6249704}
6575
+ {"current_steps": 32790, "total_steps": 38480, "loss": 0.1791, "lr": 3.258102691573231e-06, "epoch": 17.042619542619544, "percentage": 85.21, "elapsed_time": "1:19:01", "remaining_time": "0:13:42", "throughput": 1318.28, "total_tokens": 6250696}
6576
+ {"current_steps": 32795, "total_steps": 38480, "loss": 0.231, "lr": 3.2525076375547387e-06, "epoch": 17.045218295218294, "percentage": 85.23, "elapsed_time": "1:19:02", "remaining_time": "0:13:42", "throughput": 1318.28, "total_tokens": 6251624}
6577
+ {"current_steps": 32800, "total_steps": 38480, "loss": 0.0842, "lr": 3.246917057513671e-06, "epoch": 17.04781704781705, "percentage": 85.24, "elapsed_time": "1:19:02", "remaining_time": "0:13:41", "throughput": 1318.29, "total_tokens": 6252584}
6578
+ {"current_steps": 32805, "total_steps": 38480, "loss": 0.0403, "lr": 3.2413309526001413e-06, "epoch": 17.050415800415802, "percentage": 85.25, "elapsed_time": "1:19:03", "remaining_time": "0:13:40", "throughput": 1318.29, "total_tokens": 6253512}
6579
+ {"current_steps": 32810, "total_steps": 38480, "loss": 0.1917, "lr": 3.235749323963344e-06, "epoch": 17.053014553014552, "percentage": 85.27, "elapsed_time": "1:19:04", "remaining_time": "0:13:39", "throughput": 1318.3, "total_tokens": 6254504}
6580
+ {"current_steps": 32815, "total_steps": 38480, "loss": 0.0736, "lr": 3.2301721727515442e-06, "epoch": 17.055613305613306, "percentage": 85.28, "elapsed_time": "1:19:05", "remaining_time": "0:13:39", "throughput": 1318.3, "total_tokens": 6255432}
6581
+ {"current_steps": 32820, "total_steps": 38480, "loss": 0.0658, "lr": 3.2245995001121106e-06, "epoch": 17.058212058212057, "percentage": 85.29, "elapsed_time": "1:19:05", "remaining_time": "0:13:38", "throughput": 1318.3, "total_tokens": 6256328}
6582
+ {"current_steps": 32825, "total_steps": 38480, "loss": 0.1309, "lr": 3.219031307191467e-06, "epoch": 17.06081081081081, "percentage": 85.3, "elapsed_time": "1:19:06", "remaining_time": "0:13:37", "throughput": 1318.3, "total_tokens": 6257288}
6583
+ {"current_steps": 32830, "total_steps": 38480, "loss": 0.0707, "lr": 3.213467595135122e-06, "epoch": 17.063409563409564, "percentage": 85.32, "elapsed_time": "1:19:07", "remaining_time": "0:13:36", "throughput": 1318.31, "total_tokens": 6258248}
6584
+ {"current_steps": 32835, "total_steps": 38480, "loss": 0.0815, "lr": 3.207908365087661e-06, "epoch": 17.066008316008315, "percentage": 85.33, "elapsed_time": "1:19:07", "remaining_time": "0:13:36", "throughput": 1318.33, "total_tokens": 6259272}
6585
+ {"current_steps": 32840, "total_steps": 38480, "loss": 0.0663, "lr": 3.2023536181927632e-06, "epoch": 17.06860706860707, "percentage": 85.34, "elapsed_time": "1:19:08", "remaining_time": "0:13:35", "throughput": 1318.33, "total_tokens": 6260168}
6586
+ {"current_steps": 32845, "total_steps": 38480, "loss": 0.4234, "lr": 3.1968033555931586e-06, "epoch": 17.071205821205822, "percentage": 85.36, "elapsed_time": "1:19:09", "remaining_time": "0:13:34", "throughput": 1318.34, "total_tokens": 6261160}
6587
+ {"current_steps": 32850, "total_steps": 38480, "loss": 0.0953, "lr": 3.1912575784306704e-06, "epoch": 17.073804573804573, "percentage": 85.37, "elapsed_time": "1:19:09", "remaining_time": "0:13:34", "throughput": 1318.34, "total_tokens": 6262056}
6588
+ {"current_steps": 32855, "total_steps": 38480, "loss": 0.1524, "lr": 3.185716287846202e-06, "epoch": 17.076403326403327, "percentage": 85.38, "elapsed_time": "1:19:10", "remaining_time": "0:13:33", "throughput": 1318.35, "total_tokens": 6263048}
6589
+ {"current_steps": 32860, "total_steps": 38480, "loss": 0.0407, "lr": 3.1801794849797285e-06, "epoch": 17.07900207900208, "percentage": 85.4, "elapsed_time": "1:19:11", "remaining_time": "0:13:32", "throughput": 1318.35, "total_tokens": 6263944}
6590
+ {"current_steps": 32865, "total_steps": 38480, "loss": 0.1851, "lr": 3.1746471709702964e-06, "epoch": 17.08160083160083, "percentage": 85.41, "elapsed_time": "1:19:12", "remaining_time": "0:13:31", "throughput": 1318.36, "total_tokens": 6264936}
6591
+ {"current_steps": 32870, "total_steps": 38480, "loss": 0.1014, "lr": 3.1691193469560426e-06, "epoch": 17.084199584199585, "percentage": 85.42, "elapsed_time": "1:19:12", "remaining_time": "0:13:31", "throughput": 1318.37, "total_tokens": 6265896}
6592
+ {"current_steps": 32875, "total_steps": 38480, "loss": 0.1383, "lr": 3.163596014074169e-06, "epoch": 17.08679833679834, "percentage": 85.43, "elapsed_time": "1:19:13", "remaining_time": "0:13:30", "throughput": 1318.36, "total_tokens": 6266792}
6593
+ {"current_steps": 32880, "total_steps": 38480, "loss": 0.1869, "lr": 3.158077173460955e-06, "epoch": 17.08939708939709, "percentage": 85.45, "elapsed_time": "1:19:14", "remaining_time": "0:13:29", "throughput": 1318.36, "total_tokens": 6267720}
6594
+ {"current_steps": 32885, "total_steps": 38480, "loss": 0.0476, "lr": 3.1525628262517536e-06, "epoch": 17.091995841995843, "percentage": 85.46, "elapsed_time": "1:19:14", "remaining_time": "0:13:28", "throughput": 1318.37, "total_tokens": 6268648}
6595
+ {"current_steps": 32890, "total_steps": 38480, "loss": 0.1416, "lr": 3.1470529735810077e-06, "epoch": 17.094594594594593, "percentage": 85.47, "elapsed_time": "1:19:15", "remaining_time": "0:13:28", "throughput": 1318.38, "total_tokens": 6269640}
6596
+ {"current_steps": 32895, "total_steps": 38480, "loss": 0.0726, "lr": 3.1415476165822215e-06, "epoch": 17.097193347193347, "percentage": 85.49, "elapsed_time": "1:19:16", "remaining_time": "0:13:27", "throughput": 1318.39, "total_tokens": 6270600}
6597
+ {"current_steps": 32900, "total_steps": 38480, "loss": 0.1795, "lr": 3.1360467563879753e-06, "epoch": 17.0997920997921, "percentage": 85.5, "elapsed_time": "1:19:16", "remaining_time": "0:13:26", "throughput": 1318.4, "total_tokens": 6271560}
6598
+ {"current_steps": 32905, "total_steps": 38480, "loss": 0.0518, "lr": 3.13055039412993e-06, "epoch": 17.10239085239085, "percentage": 85.51, "elapsed_time": "1:19:17", "remaining_time": "0:13:26", "throughput": 1318.4, "total_tokens": 6272488}
6599
+ {"current_steps": 32910, "total_steps": 38480, "loss": 0.1865, "lr": 3.125058530938818e-06, "epoch": 17.104989604989605, "percentage": 85.52, "elapsed_time": "1:19:18", "remaining_time": "0:13:25", "throughput": 1318.39, "total_tokens": 6273384}
6600
+ {"current_steps": 32915, "total_steps": 38480, "loss": 0.116, "lr": 3.1195711679444433e-06, "epoch": 17.10758835758836, "percentage": 85.54, "elapsed_time": "1:19:19", "remaining_time": "0:13:24", "throughput": 1318.4, "total_tokens": 6274344}
6601
+ {"current_steps": 32920, "total_steps": 38480, "loss": 0.093, "lr": 3.1140883062756835e-06, "epoch": 17.11018711018711, "percentage": 85.55, "elapsed_time": "1:19:19", "remaining_time": "0:13:23", "throughput": 1318.41, "total_tokens": 6275304}
6602
+ {"current_steps": 32925, "total_steps": 38480, "loss": 0.1295, "lr": 3.108609947060506e-06, "epoch": 17.112785862785863, "percentage": 85.56, "elapsed_time": "1:19:20", "remaining_time": "0:13:23", "throughput": 1318.4, "total_tokens": 6276200}
6603
+ {"current_steps": 32930, "total_steps": 38480, "loss": 0.1506, "lr": 3.1031360914259345e-06, "epoch": 17.115384615384617, "percentage": 85.58, "elapsed_time": "1:19:21", "remaining_time": "0:13:22", "throughput": 1318.41, "total_tokens": 6277160}
6604
+ {"current_steps": 32935, "total_steps": 38480, "loss": 0.1139, "lr": 3.0976667404980703e-06, "epoch": 17.117983367983367, "percentage": 85.59, "elapsed_time": "1:19:21", "remaining_time": "0:13:21", "throughput": 1318.43, "total_tokens": 6278152}
6605
+ {"current_steps": 32940, "total_steps": 38480, "loss": 0.1256, "lr": 3.092201895402083e-06, "epoch": 17.12058212058212, "percentage": 85.6, "elapsed_time": "1:19:22", "remaining_time": "0:13:20", "throughput": 1318.44, "total_tokens": 6279112}
6606
+ {"current_steps": 32945, "total_steps": 38480, "loss": 0.1428, "lr": 3.086741557262235e-06, "epoch": 17.123180873180875, "percentage": 85.62, "elapsed_time": "1:19:23", "remaining_time": "0:13:20", "throughput": 1318.45, "total_tokens": 6280104}
6607
+ {"current_steps": 32950, "total_steps": 38480, "loss": 0.1229, "lr": 3.081285727201841e-06, "epoch": 17.125779625779625, "percentage": 85.63, "elapsed_time": "1:19:23", "remaining_time": "0:13:19", "throughput": 1318.45, "total_tokens": 6281000}
6608
+ {"current_steps": 32955, "total_steps": 38480, "loss": 0.22, "lr": 3.0758344063432922e-06, "epoch": 17.12837837837838, "percentage": 85.64, "elapsed_time": "1:19:24", "remaining_time": "0:13:18", "throughput": 1318.47, "total_tokens": 6282024}
6609
+ {"current_steps": 32960, "total_steps": 38480, "loss": 0.096, "lr": 3.0703875958080553e-06, "epoch": 17.13097713097713, "percentage": 85.65, "elapsed_time": "1:19:25", "remaining_time": "0:13:18", "throughput": 1318.47, "total_tokens": 6282920}
6610
+ {"current_steps": 32965, "total_steps": 38480, "loss": 0.1316, "lr": 3.0649452967166785e-06, "epoch": 17.133575883575883, "percentage": 85.67, "elapsed_time": "1:19:26", "remaining_time": "0:13:17", "throughput": 1318.48, "total_tokens": 6283912}
6611
+ {"current_steps": 32970, "total_steps": 38480, "loss": 0.1558, "lr": 3.0595075101887694e-06, "epoch": 17.136174636174637, "percentage": 85.68, "elapsed_time": "1:19:26", "remaining_time": "0:13:16", "throughput": 1318.49, "total_tokens": 6284872}
6612
+ {"current_steps": 32975, "total_steps": 38480, "loss": 0.1461, "lr": 3.0540742373429964e-06, "epoch": 17.138773388773387, "percentage": 85.69, "elapsed_time": "1:19:27", "remaining_time": "0:13:15", "throughput": 1318.5, "total_tokens": 6285832}
6613
+ {"current_steps": 32980, "total_steps": 38480, "loss": 0.1376, "lr": 3.048645479297127e-06, "epoch": 17.14137214137214, "percentage": 85.71, "elapsed_time": "1:19:28", "remaining_time": "0:13:15", "throughput": 1318.51, "total_tokens": 6286792}
6614
+ {"current_steps": 32985, "total_steps": 38480, "loss": 0.1536, "lr": 3.0432212371679867e-06, "epoch": 17.143970893970895, "percentage": 85.72, "elapsed_time": "1:19:28", "remaining_time": "0:13:14", "throughput": 1318.53, "total_tokens": 6287816}
6615
+ {"current_steps": 32990, "total_steps": 38480, "loss": 0.0974, "lr": 3.0378015120714586e-06, "epoch": 17.146569646569645, "percentage": 85.73, "elapsed_time": "1:19:29", "remaining_time": "0:13:13", "throughput": 1318.54, "total_tokens": 6288776}
6616
+ {"current_steps": 32995, "total_steps": 38480, "loss": 0.1714, "lr": 3.0323863051225256e-06, "epoch": 17.1491683991684, "percentage": 85.75, "elapsed_time": "1:19:30", "remaining_time": "0:13:12", "throughput": 1318.53, "total_tokens": 6289640}
6617
+ {"current_steps": 33000, "total_steps": 38480, "loss": 0.0774, "lr": 3.0269756174352152e-06, "epoch": 17.151767151767153, "percentage": 85.76, "elapsed_time": "1:19:30", "remaining_time": "0:13:12", "throughput": 1318.54, "total_tokens": 6290600}
6618
+ {"current_steps": 33005, "total_steps": 38480, "loss": 0.181, "lr": 3.0215694501226384e-06, "epoch": 17.154365904365903, "percentage": 85.77, "elapsed_time": "1:19:31", "remaining_time": "0:13:11", "throughput": 1318.55, "total_tokens": 6291592}
6619
+ {"current_steps": 33010, "total_steps": 38480, "loss": 0.1515, "lr": 3.0161678042969627e-06, "epoch": 17.156964656964657, "percentage": 85.78, "elapsed_time": "1:19:32", "remaining_time": "0:13:10", "throughput": 1318.56, "total_tokens": 6292552}
6620
+ {"current_steps": 33015, "total_steps": 38480, "loss": 0.1056, "lr": 3.0107706810694523e-06, "epoch": 17.15956340956341, "percentage": 85.8, "elapsed_time": "1:19:32", "remaining_time": "0:13:10", "throughput": 1318.57, "total_tokens": 6293512}
6621
+ {"current_steps": 33020, "total_steps": 38480, "loss": 0.1067, "lr": 3.005378081550414e-06, "epoch": 17.16216216216216, "percentage": 85.81, "elapsed_time": "1:19:33", "remaining_time": "0:13:09", "throughput": 1318.59, "total_tokens": 6294536}
6622
+ {"current_steps": 33025, "total_steps": 38480, "loss": 0.1216, "lr": 2.9999900068492374e-06, "epoch": 17.164760914760915, "percentage": 85.82, "elapsed_time": "1:19:34", "remaining_time": "0:13:08", "throughput": 1318.58, "total_tokens": 6295432}
6623
+ {"current_steps": 33030, "total_steps": 38480, "loss": 0.2168, "lr": 2.994606458074367e-06, "epoch": 17.16735966735967, "percentage": 85.84, "elapsed_time": "1:19:35", "remaining_time": "0:13:07", "throughput": 1318.59, "total_tokens": 6296392}
6624
+ {"current_steps": 33035, "total_steps": 38480, "loss": 0.1081, "lr": 2.98922743633335e-06, "epoch": 17.16995841995842, "percentage": 85.85, "elapsed_time": "1:19:35", "remaining_time": "0:13:07", "throughput": 1318.59, "total_tokens": 6297320}
6625
+ {"current_steps": 33040, "total_steps": 38480, "loss": 0.1069, "lr": 2.9838529427327594e-06, "epoch": 17.172557172557173, "percentage": 85.86, "elapsed_time": "1:19:36", "remaining_time": "0:13:06", "throughput": 1318.61, "total_tokens": 6298312}
6626
+ {"current_steps": 33045, "total_steps": 38480, "loss": 0.1861, "lr": 2.978482978378258e-06, "epoch": 17.175155925155924, "percentage": 85.88, "elapsed_time": "1:19:37", "remaining_time": "0:13:05", "throughput": 1318.62, "total_tokens": 6299272}
6627
+ {"current_steps": 33050, "total_steps": 38480, "loss": 0.0851, "lr": 2.9731175443745818e-06, "epoch": 17.177754677754677, "percentage": 85.89, "elapsed_time": "1:19:37", "remaining_time": "0:13:04", "throughput": 1318.62, "total_tokens": 6300232}
6628
+ {"current_steps": 33055, "total_steps": 38480, "loss": 0.1556, "lr": 2.967756641825531e-06, "epoch": 17.18035343035343, "percentage": 85.9, "elapsed_time": "1:19:38", "remaining_time": "0:13:04", "throughput": 1318.63, "total_tokens": 6301192}
6629
+ {"current_steps": 33060, "total_steps": 38480, "loss": 0.0775, "lr": 2.962400271833965e-06, "epoch": 17.18295218295218, "percentage": 85.91, "elapsed_time": "1:19:39", "remaining_time": "0:13:03", "throughput": 1318.65, "total_tokens": 6302216}
6630
+ {"current_steps": 33065, "total_steps": 38480, "loss": 0.1791, "lr": 2.957048435501811e-06, "epoch": 17.185550935550935, "percentage": 85.93, "elapsed_time": "1:19:39", "remaining_time": "0:13:02", "throughput": 1318.65, "total_tokens": 6303144}
6631
+ {"current_steps": 33070, "total_steps": 38480, "loss": 0.1199, "lr": 2.9517011339300848e-06, "epoch": 17.18814968814969, "percentage": 85.94, "elapsed_time": "1:19:40", "remaining_time": "0:13:02", "throughput": 1318.65, "total_tokens": 6304040}
6632
+ {"current_steps": 33075, "total_steps": 38480, "loss": 0.2419, "lr": 2.9463583682188433e-06, "epoch": 17.19074844074844, "percentage": 85.95, "elapsed_time": "1:19:41", "remaining_time": "0:13:01", "throughput": 1318.65, "total_tokens": 6305000}
6633
+ {"current_steps": 33080, "total_steps": 38480, "loss": 0.0931, "lr": 2.9410201394672265e-06, "epoch": 17.193347193347194, "percentage": 85.97, "elapsed_time": "1:19:42", "remaining_time": "0:13:00", "throughput": 1318.67, "total_tokens": 6305992}
6634
+ {"current_steps": 33085, "total_steps": 38480, "loss": 0.0903, "lr": 2.9356864487734254e-06, "epoch": 17.195945945945947, "percentage": 85.98, "elapsed_time": "1:19:42", "remaining_time": "0:12:59", "throughput": 1318.66, "total_tokens": 6306888}
6635
+ {"current_steps": 33090, "total_steps": 38480, "loss": 0.1108, "lr": 2.930357297234718e-06, "epoch": 17.198544698544698, "percentage": 85.99, "elapsed_time": "1:19:43", "remaining_time": "0:12:59", "throughput": 1318.68, "total_tokens": 6307880}
6636
+ {"current_steps": 33095, "total_steps": 38480, "loss": 0.1373, "lr": 2.925032685947432e-06, "epoch": 17.20114345114345, "percentage": 86.01, "elapsed_time": "1:19:44", "remaining_time": "0:12:58", "throughput": 1318.69, "total_tokens": 6308840}
6637
+ {"current_steps": 33100, "total_steps": 38480, "loss": 0.0943, "lr": 2.91971261600697e-06, "epoch": 17.203742203742205, "percentage": 86.02, "elapsed_time": "1:19:44", "remaining_time": "0:12:57", "throughput": 1318.71, "total_tokens": 6309864}
6638
+ {"current_steps": 33105, "total_steps": 38480, "loss": 0.151, "lr": 2.914397088507795e-06, "epoch": 17.206340956340956, "percentage": 86.03, "elapsed_time": "1:19:45", "remaining_time": "0:12:56", "throughput": 1318.71, "total_tokens": 6310824}
6639
+ {"current_steps": 33110, "total_steps": 38480, "loss": 0.1461, "lr": 2.9090861045434405e-06, "epoch": 17.20893970893971, "percentage": 86.04, "elapsed_time": "1:19:46", "remaining_time": "0:12:56", "throughput": 1318.72, "total_tokens": 6311784}
6640
+ {"current_steps": 33115, "total_steps": 38480, "loss": 0.1885, "lr": 2.903779665206491e-06, "epoch": 17.21153846153846, "percentage": 86.06, "elapsed_time": "1:19:46", "remaining_time": "0:12:55", "throughput": 1318.72, "total_tokens": 6312712}
6641
+ {"current_steps": 33120, "total_steps": 38480, "loss": 0.0528, "lr": 2.898477771588626e-06, "epoch": 17.214137214137214, "percentage": 86.07, "elapsed_time": "1:19:47", "remaining_time": "0:12:54", "throughput": 1318.73, "total_tokens": 6313672}
6642
+ {"current_steps": 33125, "total_steps": 38480, "loss": 0.0765, "lr": 2.893180424780559e-06, "epoch": 17.216735966735968, "percentage": 86.08, "elapsed_time": "1:19:48", "remaining_time": "0:12:54", "throughput": 1318.73, "total_tokens": 6314600}
6643
+ {"current_steps": 33130, "total_steps": 38480, "loss": 0.0573, "lr": 2.887887625872085e-06, "epoch": 17.219334719334718, "percentage": 86.1, "elapsed_time": "1:19:49", "remaining_time": "0:12:53", "throughput": 1318.73, "total_tokens": 6315496}
6644
+ {"current_steps": 33135, "total_steps": 38480, "loss": 0.1907, "lr": 2.88259937595205e-06, "epoch": 17.221933471933472, "percentage": 86.11, "elapsed_time": "1:19:49", "remaining_time": "0:12:52", "throughput": 1318.73, "total_tokens": 6316456}
6645
+ {"current_steps": 33140, "total_steps": 38480, "loss": 0.1397, "lr": 2.877315676108386e-06, "epoch": 17.224532224532226, "percentage": 86.12, "elapsed_time": "1:19:50", "remaining_time": "0:12:51", "throughput": 1318.73, "total_tokens": 6317352}
6646
+ {"current_steps": 33145, "total_steps": 38480, "loss": 0.215, "lr": 2.8720365274280738e-06, "epoch": 17.227130977130976, "percentage": 86.14, "elapsed_time": "1:19:51", "remaining_time": "0:12:51", "throughput": 1318.74, "total_tokens": 6318312}
6647
+ {"current_steps": 33150, "total_steps": 38480, "loss": 0.1588, "lr": 2.8667619309971526e-06, "epoch": 17.22972972972973, "percentage": 86.15, "elapsed_time": "1:19:51", "remaining_time": "0:12:50", "throughput": 1318.73, "total_tokens": 6319176}
6648
+ {"current_steps": 33155, "total_steps": 38480, "loss": 0.0841, "lr": 2.861491887900733e-06, "epoch": 17.232328482328484, "percentage": 86.16, "elapsed_time": "1:19:52", "remaining_time": "0:12:49", "throughput": 1318.73, "total_tokens": 6320104}
6649
+ {"current_steps": 33160, "total_steps": 38480, "loss": 0.0531, "lr": 2.856226399222997e-06, "epoch": 17.234927234927234, "percentage": 86.17, "elapsed_time": "1:19:53", "remaining_time": "0:12:49", "throughput": 1318.75, "total_tokens": 6321128}
6650
+ {"current_steps": 33165, "total_steps": 38480, "loss": 0.0998, "lr": 2.850965466047181e-06, "epoch": 17.237525987525988, "percentage": 86.19, "elapsed_time": "1:19:53", "remaining_time": "0:12:48", "throughput": 1318.76, "total_tokens": 6322088}
6651
+ {"current_steps": 33170, "total_steps": 38480, "loss": 0.0766, "lr": 2.84570908945557e-06, "epoch": 17.24012474012474, "percentage": 86.2, "elapsed_time": "1:19:54", "remaining_time": "0:12:47", "throughput": 1318.77, "total_tokens": 6323080}
6652
+ {"current_steps": 33175, "total_steps": 38480, "loss": 0.0813, "lr": 2.8404572705295395e-06, "epoch": 17.242723492723492, "percentage": 86.21, "elapsed_time": "1:19:55", "remaining_time": "0:12:46", "throughput": 1318.78, "total_tokens": 6324040}
6653
+ {"current_steps": 33180, "total_steps": 38480, "loss": 0.0966, "lr": 2.835210010349512e-06, "epoch": 17.245322245322246, "percentage": 86.23, "elapsed_time": "1:19:56", "remaining_time": "0:12:46", "throughput": 1318.8, "total_tokens": 6325064}
6654
+ {"current_steps": 33185, "total_steps": 38480, "loss": 0.1527, "lr": 2.8299673099949684e-06, "epoch": 17.247920997920996, "percentage": 86.24, "elapsed_time": "1:19:56", "remaining_time": "0:12:45", "throughput": 1318.81, "total_tokens": 6326024}
6655
+ {"current_steps": 33190, "total_steps": 38480, "loss": 0.1719, "lr": 2.8247291705444575e-06, "epoch": 17.25051975051975, "percentage": 86.25, "elapsed_time": "1:19:57", "remaining_time": "0:12:44", "throughput": 1318.81, "total_tokens": 6326952}
6656
+ {"current_steps": 33195, "total_steps": 38480, "loss": 0.1469, "lr": 2.8194955930755967e-06, "epoch": 17.253118503118504, "percentage": 86.27, "elapsed_time": "1:19:58", "remaining_time": "0:12:43", "throughput": 1318.82, "total_tokens": 6327912}
6657
+ {"current_steps": 33200, "total_steps": 38480, "loss": 0.1419, "lr": 2.8142665786650534e-06, "epoch": 17.255717255717254, "percentage": 86.28, "elapsed_time": "1:19:58", "remaining_time": "0:12:43", "throughput": 1318.83, "total_tokens": 6328904}
6658
+ {"current_steps": 33205, "total_steps": 38480, "loss": 0.1807, "lr": 2.8090421283885586e-06, "epoch": 17.258316008316008, "percentage": 86.29, "elapsed_time": "1:19:59", "remaining_time": "0:12:42", "throughput": 1318.83, "total_tokens": 6329832}
6659
+ {"current_steps": 33210, "total_steps": 38480, "loss": 0.0774, "lr": 2.8038222433209057e-06, "epoch": 17.260914760914762, "percentage": 86.3, "elapsed_time": "1:20:00", "remaining_time": "0:12:41", "throughput": 1318.83, "total_tokens": 6330728}
6660
+ {"current_steps": 33215, "total_steps": 38480, "loss": 0.0908, "lr": 2.798606924535954e-06, "epoch": 17.263513513513512, "percentage": 86.32, "elapsed_time": "1:20:00", "remaining_time": "0:12:41", "throughput": 1318.83, "total_tokens": 6331656}
6661
+ {"current_steps": 33220, "total_steps": 38480, "loss": 0.1604, "lr": 2.793396173106619e-06, "epoch": 17.266112266112266, "percentage": 86.33, "elapsed_time": "1:20:01", "remaining_time": "0:12:40", "throughput": 1318.84, "total_tokens": 6332616}
6662
+ {"current_steps": 33225, "total_steps": 38480, "loss": 0.1427, "lr": 2.788189990104867e-06, "epoch": 17.26871101871102, "percentage": 86.34, "elapsed_time": "1:20:02", "remaining_time": "0:12:39", "throughput": 1318.85, "total_tokens": 6333576}
6663
+ {"current_steps": 33230, "total_steps": 38480, "loss": 0.1382, "lr": 2.78298837660175e-06, "epoch": 17.27130977130977, "percentage": 86.36, "elapsed_time": "1:20:03", "remaining_time": "0:12:38", "throughput": 1318.87, "total_tokens": 6334600}
6664
+ {"current_steps": 33235, "total_steps": 38480, "loss": 0.0945, "lr": 2.777791333667351e-06, "epoch": 17.273908523908524, "percentage": 86.37, "elapsed_time": "1:20:03", "remaining_time": "0:12:38", "throughput": 1318.87, "total_tokens": 6335560}
6665
+ {"current_steps": 33240, "total_steps": 38480, "loss": 0.224, "lr": 2.772598862370826e-06, "epoch": 17.276507276507278, "percentage": 86.38, "elapsed_time": "1:20:04", "remaining_time": "0:12:37", "throughput": 1318.87, "total_tokens": 6336456}
6666
+ {"current_steps": 33245, "total_steps": 38480, "loss": 0.0881, "lr": 2.767410963780395e-06, "epoch": 17.27910602910603, "percentage": 86.4, "elapsed_time": "1:20:05", "remaining_time": "0:12:36", "throughput": 1318.88, "total_tokens": 6337448}
6667
+ {"current_steps": 33250, "total_steps": 38480, "loss": 0.1471, "lr": 2.7622276389633328e-06, "epoch": 17.281704781704782, "percentage": 86.41, "elapsed_time": "1:20:05", "remaining_time": "0:12:35", "throughput": 1318.91, "total_tokens": 6338472}
6668
+ {"current_steps": 33255, "total_steps": 38480, "loss": 0.2124, "lr": 2.757048888985969e-06, "epoch": 17.284303534303533, "percentage": 86.42, "elapsed_time": "1:20:06", "remaining_time": "0:12:35", "throughput": 1318.91, "total_tokens": 6339432}
6669
+ {"current_steps": 33260, "total_steps": 38480, "loss": 0.0359, "lr": 2.751874714913694e-06, "epoch": 17.286902286902286, "percentage": 86.43, "elapsed_time": "1:20:07", "remaining_time": "0:12:34", "throughput": 1318.92, "total_tokens": 6340392}
6670
+ {"current_steps": 33265, "total_steps": 38480, "loss": 0.0786, "lr": 2.7467051178109685e-06, "epoch": 17.28950103950104, "percentage": 86.45, "elapsed_time": "1:20:07", "remaining_time": "0:12:33", "throughput": 1318.94, "total_tokens": 6341384}
6671
+ {"current_steps": 33270, "total_steps": 38480, "loss": 0.168, "lr": 2.7415400987412935e-06, "epoch": 17.29209979209979, "percentage": 86.46, "elapsed_time": "1:20:08", "remaining_time": "0:12:33", "throughput": 1318.92, "total_tokens": 6342248}
6672
+ {"current_steps": 33275, "total_steps": 38480, "loss": 0.1015, "lr": 2.736379658767238e-06, "epoch": 17.294698544698544, "percentage": 86.47, "elapsed_time": "1:20:09", "remaining_time": "0:12:32", "throughput": 1318.93, "total_tokens": 6343208}
6673
+ {"current_steps": 33280, "total_steps": 38480, "loss": 0.1734, "lr": 2.7312237989504247e-06, "epoch": 17.2972972972973, "percentage": 86.49, "elapsed_time": "1:20:10", "remaining_time": "0:12:31", "throughput": 1318.93, "total_tokens": 6344136}
6674
+ {"current_steps": 33285, "total_steps": 38480, "loss": 0.0849, "lr": 2.726072520351544e-06, "epoch": 17.29989604989605, "percentage": 86.5, "elapsed_time": "1:20:10", "remaining_time": "0:12:30", "throughput": 1318.94, "total_tokens": 6345096}
6675
+ {"current_steps": 33290, "total_steps": 38480, "loss": 0.1857, "lr": 2.720925824030335e-06, "epoch": 17.302494802494802, "percentage": 86.51, "elapsed_time": "1:20:11", "remaining_time": "0:12:30", "throughput": 1318.96, "total_tokens": 6346120}
6676
+ {"current_steps": 33295, "total_steps": 38480, "loss": 0.2038, "lr": 2.7157837110455913e-06, "epoch": 17.305093555093556, "percentage": 86.53, "elapsed_time": "1:20:12", "remaining_time": "0:12:29", "throughput": 1318.97, "total_tokens": 6347080}
6677
+ {"current_steps": 33300, "total_steps": 38480, "loss": 0.0738, "lr": 2.710646182455173e-06, "epoch": 17.307692307692307, "percentage": 86.54, "elapsed_time": "1:20:12", "remaining_time": "0:12:28", "throughput": 1318.97, "total_tokens": 6348008}
6678
+ {"current_steps": 33305, "total_steps": 38480, "loss": 0.0776, "lr": 2.705513239315988e-06, "epoch": 17.31029106029106, "percentage": 86.55, "elapsed_time": "1:20:13", "remaining_time": "0:12:27", "throughput": 1318.99, "total_tokens": 6349032}
6679
+ {"current_steps": 33310, "total_steps": 38480, "loss": 0.1583, "lr": 2.7003848826840067e-06, "epoch": 17.312889812889814, "percentage": 86.56, "elapsed_time": "1:20:14", "remaining_time": "0:12:27", "throughput": 1319.0, "total_tokens": 6349960}
6680
+ {"current_steps": 33315, "total_steps": 38480, "loss": 0.1293, "lr": 2.6952611136142477e-06, "epoch": 17.315488565488565, "percentage": 86.58, "elapsed_time": "1:20:14", "remaining_time": "0:12:26", "throughput": 1318.99, "total_tokens": 6350856}
6681
+ {"current_steps": 33320, "total_steps": 38480, "loss": 0.1131, "lr": 2.6901419331608057e-06, "epoch": 17.31808731808732, "percentage": 86.59, "elapsed_time": "1:20:15", "remaining_time": "0:12:25", "throughput": 1319.01, "total_tokens": 6351912}
6682
+ {"current_steps": 33325, "total_steps": 38480, "loss": 0.1406, "lr": 2.6850273423768115e-06, "epoch": 17.320686070686072, "percentage": 86.6, "elapsed_time": "1:20:16", "remaining_time": "0:12:25", "throughput": 1319.03, "total_tokens": 6352904}
6683
+ {"current_steps": 33330, "total_steps": 38480, "loss": 0.1159, "lr": 2.6799173423144584e-06, "epoch": 17.323284823284823, "percentage": 86.62, "elapsed_time": "1:20:17", "remaining_time": "0:12:24", "throughput": 1319.04, "total_tokens": 6353864}
6684
+ {"current_steps": 33335, "total_steps": 38480, "loss": 0.1723, "lr": 2.67481193402499e-06, "epoch": 17.325883575883577, "percentage": 86.63, "elapsed_time": "1:20:17", "remaining_time": "0:12:23", "throughput": 1319.04, "total_tokens": 6354792}
6685
+ {"current_steps": 33340, "total_steps": 38480, "loss": 0.1511, "lr": 2.669711118558718e-06, "epoch": 17.328482328482327, "percentage": 86.64, "elapsed_time": "1:20:18", "remaining_time": "0:12:22", "throughput": 1319.06, "total_tokens": 6355816}
6686
+ {"current_steps": 33345, "total_steps": 38480, "loss": 0.2352, "lr": 2.664614896965001e-06, "epoch": 17.33108108108108, "percentage": 86.66, "elapsed_time": "1:20:19", "remaining_time": "0:12:22", "throughput": 1319.08, "total_tokens": 6356840}
6687
+ {"current_steps": 33350, "total_steps": 38480, "loss": 0.1009, "lr": 2.659523270292244e-06, "epoch": 17.333679833679835, "percentage": 86.67, "elapsed_time": "1:20:19", "remaining_time": "0:12:21", "throughput": 1319.09, "total_tokens": 6357800}
6688
+ {"current_steps": 33355, "total_steps": 38480, "loss": 0.1423, "lr": 2.6544362395879317e-06, "epoch": 17.336278586278585, "percentage": 86.68, "elapsed_time": "1:20:20", "remaining_time": "0:12:20", "throughput": 1319.09, "total_tokens": 6358728}
6689
+ {"current_steps": 33360, "total_steps": 38480, "loss": 0.0572, "lr": 2.6493538058985775e-06, "epoch": 17.33887733887734, "percentage": 86.69, "elapsed_time": "1:20:21", "remaining_time": "0:12:19", "throughput": 1319.1, "total_tokens": 6359720}
6690
+ {"current_steps": 33365, "total_steps": 38480, "loss": 0.1425, "lr": 2.6442759702697606e-06, "epoch": 17.341476091476093, "percentage": 86.71, "elapsed_time": "1:20:21", "remaining_time": "0:12:19", "throughput": 1319.09, "total_tokens": 6360584}
6691
+ {"current_steps": 33370, "total_steps": 38480, "loss": 0.0865, "lr": 2.639202733746113e-06, "epoch": 17.344074844074843, "percentage": 86.72, "elapsed_time": "1:20:22", "remaining_time": "0:12:18", "throughput": 1319.11, "total_tokens": 6361576}
6692
+ {"current_steps": 33375, "total_steps": 38480, "loss": 0.0978, "lr": 2.6341340973713187e-06, "epoch": 17.346673596673597, "percentage": 86.73, "elapsed_time": "1:20:23", "remaining_time": "0:12:17", "throughput": 1319.11, "total_tokens": 6362504}
6693
+ {"current_steps": 33380, "total_steps": 38480, "loss": 0.0318, "lr": 2.629070062188119e-06, "epoch": 17.34927234927235, "percentage": 86.75, "elapsed_time": "1:20:24", "remaining_time": "0:12:17", "throughput": 1319.1, "total_tokens": 6363400}
6694
+ {"current_steps": 33385, "total_steps": 38480, "loss": 0.0999, "lr": 2.6240106292383022e-06, "epoch": 17.3518711018711, "percentage": 86.76, "elapsed_time": "1:20:24", "remaining_time": "0:12:16", "throughput": 1319.11, "total_tokens": 6364328}
6695
+ {"current_steps": 33390, "total_steps": 38480, "loss": 0.13, "lr": 2.618955799562722e-06, "epoch": 17.354469854469855, "percentage": 86.77, "elapsed_time": "1:20:25", "remaining_time": "0:12:15", "throughput": 1319.11, "total_tokens": 6365288}
6696
+ {"current_steps": 33395, "total_steps": 38480, "loss": 0.1355, "lr": 2.613905574201275e-06, "epoch": 17.35706860706861, "percentage": 86.79, "elapsed_time": "1:20:26", "remaining_time": "0:12:14", "throughput": 1319.13, "total_tokens": 6366280}
6697
+ {"current_steps": 33400, "total_steps": 38480, "loss": 0.072, "lr": 2.6088599541929104e-06, "epoch": 17.35966735966736, "percentage": 86.8, "elapsed_time": "1:20:26", "remaining_time": "0:12:14", "throughput": 1319.14, "total_tokens": 6367240}
6698
+ {"current_steps": 33405, "total_steps": 38480, "loss": 0.2421, "lr": 2.603818940575628e-06, "epoch": 17.362266112266113, "percentage": 86.81, "elapsed_time": "1:20:27", "remaining_time": "0:12:13", "throughput": 1319.14, "total_tokens": 6368168}
6699
+ {"current_steps": 33410, "total_steps": 38480, "loss": 0.085, "lr": 2.5987825343864956e-06, "epoch": 17.364864864864863, "percentage": 86.82, "elapsed_time": "1:20:28", "remaining_time": "0:12:12", "throughput": 1319.15, "total_tokens": 6369128}
6700
+ {"current_steps": 33415, "total_steps": 38480, "loss": 0.1869, "lr": 2.5937507366616154e-06, "epoch": 17.367463617463617, "percentage": 86.84, "elapsed_time": "1:20:28", "remaining_time": "0:12:11", "throughput": 1319.15, "total_tokens": 6370056}
6701
+ {"current_steps": 33420, "total_steps": 38480, "loss": 0.0808, "lr": 2.5887235484361506e-06, "epoch": 17.37006237006237, "percentage": 86.85, "elapsed_time": "1:20:29", "remaining_time": "0:12:11", "throughput": 1319.17, "total_tokens": 6371080}
6702
+ {"current_steps": 33425, "total_steps": 38480, "loss": 0.1231, "lr": 2.5837009707443042e-06, "epoch": 17.37266112266112, "percentage": 86.86, "elapsed_time": "1:20:30", "remaining_time": "0:12:10", "throughput": 1319.18, "total_tokens": 6372072}
6703
+ {"current_steps": 33430, "total_steps": 38480, "loss": 0.1329, "lr": 2.5786830046193587e-06, "epoch": 17.375259875259875, "percentage": 86.88, "elapsed_time": "1:20:31", "remaining_time": "0:12:09", "throughput": 1319.2, "total_tokens": 6373064}
6704
+ {"current_steps": 33435, "total_steps": 38480, "loss": 0.1675, "lr": 2.573669651093616e-06, "epoch": 17.37785862785863, "percentage": 86.89, "elapsed_time": "1:20:31", "remaining_time": "0:12:09", "throughput": 1319.2, "total_tokens": 6373992}
6705
+ {"current_steps": 33440, "total_steps": 38480, "loss": 0.1652, "lr": 2.5686609111984368e-06, "epoch": 17.38045738045738, "percentage": 86.9, "elapsed_time": "1:20:32", "remaining_time": "0:12:08", "throughput": 1319.21, "total_tokens": 6374952}
6706
+ {"current_steps": 33445, "total_steps": 38480, "loss": 0.1347, "lr": 2.5636567859642525e-06, "epoch": 17.383056133056133, "percentage": 86.92, "elapsed_time": "1:20:33", "remaining_time": "0:12:07", "throughput": 1319.22, "total_tokens": 6375944}
6707
+ {"current_steps": 33450, "total_steps": 38480, "loss": 0.2083, "lr": 2.558657276420526e-06, "epoch": 17.385654885654887, "percentage": 86.93, "elapsed_time": "1:20:33", "remaining_time": "0:12:06", "throughput": 1319.22, "total_tokens": 6376872}
6708
+ {"current_steps": 33455, "total_steps": 38480, "loss": 0.09, "lr": 2.5536623835957727e-06, "epoch": 17.388253638253637, "percentage": 86.94, "elapsed_time": "1:20:34", "remaining_time": "0:12:06", "throughput": 1319.24, "total_tokens": 6377864}
6709
+ {"current_steps": 33460, "total_steps": 38480, "loss": 0.1319, "lr": 2.548672108517558e-06, "epoch": 17.39085239085239, "percentage": 86.95, "elapsed_time": "1:20:35", "remaining_time": "0:12:05", "throughput": 1319.23, "total_tokens": 6378760}
6710
+ {"current_steps": 33465, "total_steps": 38480, "loss": 0.1061, "lr": 2.54368645221251e-06, "epoch": 17.393451143451145, "percentage": 86.97, "elapsed_time": "1:20:35", "remaining_time": "0:12:04", "throughput": 1319.23, "total_tokens": 6379656}
6711
+ {"current_steps": 33470, "total_steps": 38480, "loss": 0.1478, "lr": 2.538705415706291e-06, "epoch": 17.396049896049895, "percentage": 86.98, "elapsed_time": "1:20:36", "remaining_time": "0:12:03", "throughput": 1319.23, "total_tokens": 6380584}
6712
+ {"current_steps": 33475, "total_steps": 38480, "loss": 0.2079, "lr": 2.533729000023616e-06, "epoch": 17.39864864864865, "percentage": 86.99, "elapsed_time": "1:20:37", "remaining_time": "0:12:03", "throughput": 1319.23, "total_tokens": 6381512}
6713
+ {"current_steps": 33480, "total_steps": 38480, "loss": 0.1044, "lr": 2.5287572061882618e-06, "epoch": 17.401247401247403, "percentage": 87.01, "elapsed_time": "1:20:38", "remaining_time": "0:12:02", "throughput": 1319.24, "total_tokens": 6382504}
6714
+ {"current_steps": 33485, "total_steps": 38480, "loss": 0.2152, "lr": 2.5237900352230393e-06, "epoch": 17.403846153846153, "percentage": 87.02, "elapsed_time": "1:20:38", "remaining_time": "0:12:01", "throughput": 1319.26, "total_tokens": 6383496}
6715
+ {"current_steps": 33490, "total_steps": 38480, "loss": 0.1351, "lr": 2.5188274881498136e-06, "epoch": 17.406444906444907, "percentage": 87.03, "elapsed_time": "1:20:39", "remaining_time": "0:12:01", "throughput": 1319.26, "total_tokens": 6384424}
6716
+ {"current_steps": 33495, "total_steps": 38480, "loss": 0.1929, "lr": 2.513869565989499e-06, "epoch": 17.409043659043657, "percentage": 87.05, "elapsed_time": "1:20:40", "remaining_time": "0:12:00", "throughput": 1319.27, "total_tokens": 6385384}
6717
+ {"current_steps": 33500, "total_steps": 38480, "loss": 0.1064, "lr": 2.508916269762057e-06, "epoch": 17.41164241164241, "percentage": 87.06, "elapsed_time": "1:20:40", "remaining_time": "0:11:59", "throughput": 1319.27, "total_tokens": 6386312}
6718
+ {"current_steps": 33505, "total_steps": 38480, "loss": 0.0937, "lr": 2.503967600486504e-06, "epoch": 17.414241164241165, "percentage": 87.07, "elapsed_time": "1:20:41", "remaining_time": "0:11:58", "throughput": 1319.27, "total_tokens": 6387240}
6719
+ {"current_steps": 33510, "total_steps": 38480, "loss": 0.2095, "lr": 2.499023559180888e-06, "epoch": 17.416839916839916, "percentage": 87.08, "elapsed_time": "1:20:42", "remaining_time": "0:11:58", "throughput": 1319.26, "total_tokens": 6388136}
6720
+ {"current_steps": 33515, "total_steps": 38480, "loss": 0.1336, "lr": 2.4940841468623272e-06, "epoch": 17.41943866943867, "percentage": 87.1, "elapsed_time": "1:20:42", "remaining_time": "0:11:57", "throughput": 1319.25, "total_tokens": 6389000}
6721
+ {"current_steps": 33520, "total_steps": 38480, "loss": 0.0941, "lr": 2.489149364546975e-06, "epoch": 17.422037422037423, "percentage": 87.11, "elapsed_time": "1:20:43", "remaining_time": "0:11:56", "throughput": 1319.26, "total_tokens": 6389960}
6722
+ {"current_steps": 33525, "total_steps": 38480, "loss": 0.0919, "lr": 2.48421921325003e-06, "epoch": 17.424636174636174, "percentage": 87.12, "elapsed_time": "1:20:44", "remaining_time": "0:11:55", "throughput": 1319.28, "total_tokens": 6390952}
6723
+ {"current_steps": 33530, "total_steps": 38480, "loss": 0.1158, "lr": 2.47929369398574e-06, "epoch": 17.427234927234927, "percentage": 87.14, "elapsed_time": "1:20:44", "remaining_time": "0:11:55", "throughput": 1319.29, "total_tokens": 6391912}
6724
+ {"current_steps": 33535, "total_steps": 38480, "loss": 0.0853, "lr": 2.474372807767408e-06, "epoch": 17.42983367983368, "percentage": 87.15, "elapsed_time": "1:20:45", "remaining_time": "0:11:54", "throughput": 1319.28, "total_tokens": 6392808}
6725
+ {"current_steps": 33540, "total_steps": 38480, "loss": 0.0404, "lr": 2.4694565556073746e-06, "epoch": 17.43243243243243, "percentage": 87.16, "elapsed_time": "1:20:46", "remaining_time": "0:11:53", "throughput": 1319.27, "total_tokens": 6393704}
6726
+ {"current_steps": 33545, "total_steps": 38480, "loss": 0.1636, "lr": 2.46454493851703e-06, "epoch": 17.435031185031185, "percentage": 87.18, "elapsed_time": "1:20:47", "remaining_time": "0:11:53", "throughput": 1319.29, "total_tokens": 6394696}
6727
+ {"current_steps": 33550, "total_steps": 38480, "loss": 0.0661, "lr": 2.459637957506808e-06, "epoch": 17.43762993762994, "percentage": 87.19, "elapsed_time": "1:20:47", "remaining_time": "0:11:52", "throughput": 1319.29, "total_tokens": 6395592}
6728
+ {"current_steps": 33555, "total_steps": 38480, "loss": 0.0871, "lr": 2.4547356135861954e-06, "epoch": 17.44022869022869, "percentage": 87.2, "elapsed_time": "1:20:48", "remaining_time": "0:11:51", "throughput": 1319.29, "total_tokens": 6396520}
6729
+ {"current_steps": 33560, "total_steps": 38480, "loss": 0.103, "lr": 2.4498379077637275e-06, "epoch": 17.442827442827443, "percentage": 87.21, "elapsed_time": "1:20:49", "remaining_time": "0:11:50", "throughput": 1319.31, "total_tokens": 6397544}
6730
+ {"current_steps": 33565, "total_steps": 38480, "loss": 0.2517, "lr": 2.4449448410469616e-06, "epoch": 17.445426195426194, "percentage": 87.23, "elapsed_time": "1:20:49", "remaining_time": "0:11:50", "throughput": 1319.32, "total_tokens": 6398504}
6731
+ {"current_steps": 33570, "total_steps": 38480, "loss": 0.1719, "lr": 2.440056414442532e-06, "epoch": 17.448024948024948, "percentage": 87.24, "elapsed_time": "1:20:50", "remaining_time": "0:11:49", "throughput": 1319.33, "total_tokens": 6399464}
6732
+ {"current_steps": 33575, "total_steps": 38480, "loss": 0.0506, "lr": 2.4351726289561035e-06, "epoch": 17.4506237006237, "percentage": 87.25, "elapsed_time": "1:20:51", "remaining_time": "0:11:48", "throughput": 1319.33, "total_tokens": 6400392}
6733
+ {"current_steps": 33580, "total_steps": 38480, "loss": 0.0402, "lr": 2.4302934855923804e-06, "epoch": 17.453222453222452, "percentage": 87.27, "elapsed_time": "1:20:51", "remaining_time": "0:11:47", "throughput": 1319.34, "total_tokens": 6401384}
6734
+ {"current_steps": 33585, "total_steps": 38480, "loss": 0.0905, "lr": 2.4254189853551214e-06, "epoch": 17.455821205821206, "percentage": 87.28, "elapsed_time": "1:20:52", "remaining_time": "0:11:47", "throughput": 1319.35, "total_tokens": 6402344}
6735
+ {"current_steps": 33590, "total_steps": 38480, "loss": 0.0804, "lr": 2.420549129247132e-06, "epoch": 17.45841995841996, "percentage": 87.29, "elapsed_time": "1:20:53", "remaining_time": "0:11:46", "throughput": 1319.34, "total_tokens": 6403208}
6736
+ {"current_steps": 33595, "total_steps": 38480, "loss": 0.09, "lr": 2.4156839182702534e-06, "epoch": 17.46101871101871, "percentage": 87.31, "elapsed_time": "1:20:54", "remaining_time": "0:11:45", "throughput": 1319.35, "total_tokens": 6404168}
6737
+ {"current_steps": 33600, "total_steps": 38480, "loss": 0.1519, "lr": 2.4108233534253734e-06, "epoch": 17.463617463617464, "percentage": 87.32, "elapsed_time": "1:20:54", "remaining_time": "0:11:45", "throughput": 1319.39, "total_tokens": 6405288}
6738
+ {"current_steps": 33605, "total_steps": 38480, "loss": 0.216, "lr": 2.4059674357124335e-06, "epoch": 17.466216216216218, "percentage": 87.33, "elapsed_time": "1:20:55", "remaining_time": "0:11:44", "throughput": 1319.39, "total_tokens": 6406216}
6739
+ {"current_steps": 33610, "total_steps": 38480, "loss": 0.1062, "lr": 2.4011161661304072e-06, "epoch": 17.468814968814968, "percentage": 87.34, "elapsed_time": "1:20:56", "remaining_time": "0:11:43", "throughput": 1319.39, "total_tokens": 6407176}
6740
+ {"current_steps": 33615, "total_steps": 38480, "loss": 0.096, "lr": 2.3962695456773153e-06, "epoch": 17.47141372141372, "percentage": 87.36, "elapsed_time": "1:20:56", "remaining_time": "0:11:42", "throughput": 1319.4, "total_tokens": 6408136}
6741
+ {"current_steps": 33620, "total_steps": 38480, "loss": 0.0812, "lr": 2.391427575350219e-06, "epoch": 17.474012474012476, "percentage": 87.37, "elapsed_time": "1:20:57", "remaining_time": "0:11:42", "throughput": 1319.41, "total_tokens": 6409096}
6742
+ {"current_steps": 33625, "total_steps": 38480, "loss": 0.2186, "lr": 2.3865902561452437e-06, "epoch": 17.476611226611226, "percentage": 87.38, "elapsed_time": "1:20:58", "remaining_time": "0:11:41", "throughput": 1319.42, "total_tokens": 6410056}
6743
+ {"current_steps": 33630, "total_steps": 38480, "loss": 0.1674, "lr": 2.3817575890575233e-06, "epoch": 17.47920997920998, "percentage": 87.4, "elapsed_time": "1:20:58", "remaining_time": "0:11:40", "throughput": 1319.43, "total_tokens": 6411048}
6744
+ {"current_steps": 33635, "total_steps": 38480, "loss": 0.1668, "lr": 2.376929575081252e-06, "epoch": 17.48180873180873, "percentage": 87.41, "elapsed_time": "1:20:59", "remaining_time": "0:11:40", "throughput": 1319.44, "total_tokens": 6412008}
6745
+ {"current_steps": 33640, "total_steps": 38480, "loss": 0.0569, "lr": 2.3721062152096825e-06, "epoch": 17.484407484407484, "percentage": 87.42, "elapsed_time": "1:21:00", "remaining_time": "0:11:39", "throughput": 1319.44, "total_tokens": 6412936}
6746
+ {"current_steps": 33645, "total_steps": 38480, "loss": 0.1625, "lr": 2.3672875104350844e-06, "epoch": 17.487006237006238, "percentage": 87.44, "elapsed_time": "1:21:01", "remaining_time": "0:11:38", "throughput": 1319.46, "total_tokens": 6413928}
6747
+ {"current_steps": 33650, "total_steps": 38480, "loss": 0.0588, "lr": 2.3624734617487848e-06, "epoch": 17.489604989604988, "percentage": 87.45, "elapsed_time": "1:21:01", "remaining_time": "0:11:37", "throughput": 1319.46, "total_tokens": 6414856}
6748
+ {"current_steps": 33655, "total_steps": 38480, "loss": 0.1275, "lr": 2.3576640701411383e-06, "epoch": 17.492203742203742, "percentage": 87.46, "elapsed_time": "1:21:02", "remaining_time": "0:11:37", "throughput": 1319.48, "total_tokens": 6415880}
6749
+ {"current_steps": 33660, "total_steps": 38480, "loss": 0.1164, "lr": 2.352859336601565e-06, "epoch": 17.494802494802496, "percentage": 87.47, "elapsed_time": "1:21:03", "remaining_time": "0:11:36", "throughput": 1319.49, "total_tokens": 6416840}
6750
+ {"current_steps": 33665, "total_steps": 38480, "loss": 0.0898, "lr": 2.348059262118507e-06, "epoch": 17.497401247401246, "percentage": 87.49, "elapsed_time": "1:21:03", "remaining_time": "0:11:35", "throughput": 1319.5, "total_tokens": 6417800}
6751
+ {"current_steps": 33670, "total_steps": 38480, "loss": 0.0488, "lr": 2.343263847679453e-06, "epoch": 17.5, "percentage": 87.5, "elapsed_time": "1:21:04", "remaining_time": "0:11:34", "throughput": 1319.5, "total_tokens": 6418696}
6752
+ {"current_steps": 33675, "total_steps": 38480, "loss": 0.0396, "lr": 2.338473094270929e-06, "epoch": 17.502598752598754, "percentage": 87.51, "elapsed_time": "1:21:05", "remaining_time": "0:11:34", "throughput": 1319.49, "total_tokens": 6419592}
6753
+ {"current_steps": 33680, "total_steps": 38480, "loss": 0.0831, "lr": 2.333687002878521e-06, "epoch": 17.505197505197504, "percentage": 87.53, "elapsed_time": "1:21:05", "remaining_time": "0:11:33", "throughput": 1319.5, "total_tokens": 6420584}
6754
+ {"current_steps": 33685, "total_steps": 38480, "loss": 0.1663, "lr": 2.328905574486831e-06, "epoch": 17.507796257796258, "percentage": 87.54, "elapsed_time": "1:21:06", "remaining_time": "0:11:32", "throughput": 1319.52, "total_tokens": 6421576}
6755
+ {"current_steps": 33690, "total_steps": 38480, "loss": 0.2633, "lr": 2.3241288100795178e-06, "epoch": 17.510395010395012, "percentage": 87.55, "elapsed_time": "1:21:07", "remaining_time": "0:11:32", "throughput": 1319.52, "total_tokens": 6422504}
6756
+ {"current_steps": 33695, "total_steps": 38480, "loss": 0.1577, "lr": 2.3193567106392728e-06, "epoch": 17.512993762993762, "percentage": 87.56, "elapsed_time": "1:21:07", "remaining_time": "0:11:31", "throughput": 1319.53, "total_tokens": 6423464}
6757
+ {"current_steps": 33700, "total_steps": 38480, "loss": 0.1435, "lr": 2.31458927714783e-06, "epoch": 17.515592515592516, "percentage": 87.58, "elapsed_time": "1:21:08", "remaining_time": "0:11:30", "throughput": 1319.53, "total_tokens": 6424360}
6758
+ {"current_steps": 33705, "total_steps": 38480, "loss": 0.0858, "lr": 2.309826510585966e-06, "epoch": 17.518191268191266, "percentage": 87.59, "elapsed_time": "1:21:09", "remaining_time": "0:11:29", "throughput": 1319.53, "total_tokens": 6425288}
6759
+ {"current_steps": 33710, "total_steps": 38480, "loss": 0.1473, "lr": 2.305068411933492e-06, "epoch": 17.52079002079002, "percentage": 87.6, "elapsed_time": "1:21:10", "remaining_time": "0:11:29", "throughput": 1319.54, "total_tokens": 6426280}
6760
+ {"current_steps": 33715, "total_steps": 38480, "loss": 0.101, "lr": 2.3003149821692687e-06, "epoch": 17.523388773388774, "percentage": 87.62, "elapsed_time": "1:21:10", "remaining_time": "0:11:28", "throughput": 1319.56, "total_tokens": 6427272}
6761
+ {"current_steps": 33720, "total_steps": 38480, "loss": 0.1795, "lr": 2.295566222271189e-06, "epoch": 17.525987525987524, "percentage": 87.63, "elapsed_time": "1:21:11", "remaining_time": "0:11:27", "throughput": 1319.57, "total_tokens": 6428232}
6762
+ {"current_steps": 33725, "total_steps": 38480, "loss": 0.0402, "lr": 2.290822133216175e-06, "epoch": 17.52858627858628, "percentage": 87.64, "elapsed_time": "1:21:12", "remaining_time": "0:11:26", "throughput": 1319.57, "total_tokens": 6429192}
6763
+ {"current_steps": 33730, "total_steps": 38480, "loss": 0.0848, "lr": 2.2860827159802146e-06, "epoch": 17.531185031185032, "percentage": 87.66, "elapsed_time": "1:21:12", "remaining_time": "0:11:26", "throughput": 1319.59, "total_tokens": 6430184}
6764
+ {"current_steps": 33735, "total_steps": 38480, "loss": 0.1429, "lr": 2.281347971538311e-06, "epoch": 17.533783783783782, "percentage": 87.67, "elapsed_time": "1:21:13", "remaining_time": "0:11:25", "throughput": 1319.59, "total_tokens": 6431112}
6765
+ {"current_steps": 33740, "total_steps": 38480, "loss": 0.0966, "lr": 2.276617900864514e-06, "epoch": 17.536382536382536, "percentage": 87.68, "elapsed_time": "1:21:14", "remaining_time": "0:11:24", "throughput": 1319.58, "total_tokens": 6432008}
6766
+ {"current_steps": 33745, "total_steps": 38480, "loss": 0.0386, "lr": 2.271892504931905e-06, "epoch": 17.53898128898129, "percentage": 87.69, "elapsed_time": "1:21:14", "remaining_time": "0:11:24", "throughput": 1319.59, "total_tokens": 6432968}
6767
+ {"current_steps": 33750, "total_steps": 38480, "loss": 0.1087, "lr": 2.267171784712627e-06, "epoch": 17.54158004158004, "percentage": 87.71, "elapsed_time": "1:21:15", "remaining_time": "0:11:23", "throughput": 1319.6, "total_tokens": 6433928}
6768
+ {"current_steps": 33755, "total_steps": 38480, "loss": 0.264, "lr": 2.262455741177838e-06, "epoch": 17.544178794178794, "percentage": 87.72, "elapsed_time": "1:21:16", "remaining_time": "0:11:22", "throughput": 1319.62, "total_tokens": 6434952}
6769
+ {"current_steps": 33760, "total_steps": 38480, "loss": 0.1126, "lr": 2.2577443752977297e-06, "epoch": 17.546777546777548, "percentage": 87.73, "elapsed_time": "1:21:17", "remaining_time": "0:11:21", "throughput": 1319.64, "total_tokens": 6435944}
6770
+ {"current_steps": 33765, "total_steps": 38480, "loss": 0.153, "lr": 2.253037688041554e-06, "epoch": 17.5493762993763, "percentage": 87.75, "elapsed_time": "1:21:17", "remaining_time": "0:11:21", "throughput": 1319.65, "total_tokens": 6436936}
6771
+ {"current_steps": 33770, "total_steps": 38480, "loss": 0.1818, "lr": 2.248335680377586e-06, "epoch": 17.551975051975052, "percentage": 87.76, "elapsed_time": "1:21:18", "remaining_time": "0:11:20", "throughput": 1319.66, "total_tokens": 6437896}
6772
+ {"current_steps": 33775, "total_steps": 38480, "loss": 0.1119, "lr": 2.243638353273139e-06, "epoch": 17.554573804573806, "percentage": 87.77, "elapsed_time": "1:21:19", "remaining_time": "0:11:19", "throughput": 1319.69, "total_tokens": 6438984}
6773
+ {"current_steps": 33780, "total_steps": 38480, "loss": 0.1278, "lr": 2.238945707694562e-06, "epoch": 17.557172557172557, "percentage": 87.79, "elapsed_time": "1:21:19", "remaining_time": "0:11:18", "throughput": 1319.7, "total_tokens": 6439944}
6774
+ {"current_steps": 33785, "total_steps": 38480, "loss": 0.1278, "lr": 2.2342577446072544e-06, "epoch": 17.55977130977131, "percentage": 87.8, "elapsed_time": "1:21:20", "remaining_time": "0:11:18", "throughput": 1319.72, "total_tokens": 6440968}
6775
+ {"current_steps": 33790, "total_steps": 38480, "loss": 0.111, "lr": 2.229574464975634e-06, "epoch": 17.56237006237006, "percentage": 87.81, "elapsed_time": "1:21:21", "remaining_time": "0:11:17", "throughput": 1319.73, "total_tokens": 6441960}
6776
+ {"current_steps": 33795, "total_steps": 38480, "loss": 0.1066, "lr": 2.2248958697631634e-06, "epoch": 17.564968814968815, "percentage": 87.82, "elapsed_time": "1:21:21", "remaining_time": "0:11:16", "throughput": 1319.71, "total_tokens": 6442792}
6777
+ {"current_steps": 33800, "total_steps": 38480, "loss": 0.2212, "lr": 2.2202219599323394e-06, "epoch": 17.56756756756757, "percentage": 87.84, "elapsed_time": "1:21:22", "remaining_time": "0:11:16", "throughput": 1319.71, "total_tokens": 6443688}
6778
+ {"current_steps": 33805, "total_steps": 38480, "loss": 0.105, "lr": 2.2155527364447064e-06, "epoch": 17.57016632016632, "percentage": 87.85, "elapsed_time": "1:21:23", "remaining_time": "0:11:15", "throughput": 1319.73, "total_tokens": 6444712}
6779
+ {"current_steps": 33810, "total_steps": 38480, "loss": 0.1033, "lr": 2.2108882002608246e-06, "epoch": 17.572765072765073, "percentage": 87.86, "elapsed_time": "1:21:24", "remaining_time": "0:11:14", "throughput": 1319.74, "total_tokens": 6445704}
6780
+ {"current_steps": 33815, "total_steps": 38480, "loss": 0.1036, "lr": 2.206228352340306e-06, "epoch": 17.575363825363826, "percentage": 87.88, "elapsed_time": "1:21:24", "remaining_time": "0:11:13", "throughput": 1319.75, "total_tokens": 6446664}
6781
+ {"current_steps": 33820, "total_steps": 38480, "loss": 0.0883, "lr": 2.20157319364179e-06, "epoch": 17.577962577962577, "percentage": 87.89, "elapsed_time": "1:21:25", "remaining_time": "0:11:13", "throughput": 1319.75, "total_tokens": 6447592}
6782
+ {"current_steps": 33825, "total_steps": 38480, "loss": 0.2043, "lr": 2.1969227251229547e-06, "epoch": 17.58056133056133, "percentage": 87.9, "elapsed_time": "1:21:26", "remaining_time": "0:11:12", "throughput": 1319.75, "total_tokens": 6448520}
6783
+ {"current_steps": 33830, "total_steps": 38480, "loss": 0.1, "lr": 2.1922769477405097e-06, "epoch": 17.583160083160084, "percentage": 87.92, "elapsed_time": "1:21:26", "remaining_time": "0:11:11", "throughput": 1319.75, "total_tokens": 6449448}
6784
+ {"current_steps": 33835, "total_steps": 38480, "loss": 0.0768, "lr": 2.187635862450199e-06, "epoch": 17.585758835758835, "percentage": 87.93, "elapsed_time": "1:21:27", "remaining_time": "0:11:10", "throughput": 1319.76, "total_tokens": 6450408}
6785
+ {"current_steps": 33840, "total_steps": 38480, "loss": 0.1102, "lr": 2.1829994702068146e-06, "epoch": 17.58835758835759, "percentage": 87.94, "elapsed_time": "1:21:28", "remaining_time": "0:11:10", "throughput": 1319.75, "total_tokens": 6451304}
6786
+ {"current_steps": 33845, "total_steps": 38480, "loss": 0.1633, "lr": 2.178367771964168e-06, "epoch": 17.590956340956343, "percentage": 87.95, "elapsed_time": "1:21:28", "remaining_time": "0:11:09", "throughput": 1319.76, "total_tokens": 6452264}
6787
+ {"current_steps": 33850, "total_steps": 38480, "loss": 0.1786, "lr": 2.173740768675106e-06, "epoch": 17.593555093555093, "percentage": 87.97, "elapsed_time": "1:21:29", "remaining_time": "0:11:08", "throughput": 1319.78, "total_tokens": 6453256}
6788
+ {"current_steps": 33855, "total_steps": 38480, "loss": 0.1945, "lr": 2.1691184612915196e-06, "epoch": 17.596153846153847, "percentage": 87.98, "elapsed_time": "1:21:30", "remaining_time": "0:11:08", "throughput": 1319.77, "total_tokens": 6454152}
6789
+ {"current_steps": 33860, "total_steps": 38480, "loss": 0.2106, "lr": 2.1645008507643254e-06, "epoch": 17.598752598752597, "percentage": 87.99, "elapsed_time": "1:21:31", "remaining_time": "0:11:07", "throughput": 1319.77, "total_tokens": 6455048}
6790
+ {"current_steps": 33865, "total_steps": 38480, "loss": 0.1387, "lr": 2.1598879380434756e-06, "epoch": 17.60135135135135, "percentage": 88.01, "elapsed_time": "1:21:31", "remaining_time": "0:11:06", "throughput": 1319.77, "total_tokens": 6456008}
6791
+ {"current_steps": 33870, "total_steps": 38480, "loss": 0.1421, "lr": 2.155279724077952e-06, "epoch": 17.603950103950105, "percentage": 88.02, "elapsed_time": "1:21:32", "remaining_time": "0:11:05", "throughput": 1319.77, "total_tokens": 6456904}
6792
+ {"current_steps": 33875, "total_steps": 38480, "loss": 0.1249, "lr": 2.150676209815783e-06, "epoch": 17.606548856548855, "percentage": 88.03, "elapsed_time": "1:21:33", "remaining_time": "0:11:05", "throughput": 1319.77, "total_tokens": 6457832}
6793
+ {"current_steps": 33880, "total_steps": 38480, "loss": 0.0816, "lr": 2.1460773962040158e-06, "epoch": 17.60914760914761, "percentage": 88.05, "elapsed_time": "1:21:33", "remaining_time": "0:11:04", "throughput": 1319.77, "total_tokens": 6458728}
6794
+ {"current_steps": 33885, "total_steps": 38480, "loss": 0.0863, "lr": 2.141483284188736e-06, "epoch": 17.611746361746363, "percentage": 88.06, "elapsed_time": "1:21:34", "remaining_time": "0:11:03", "throughput": 1319.77, "total_tokens": 6459656}
6795
+ {"current_steps": 33890, "total_steps": 38480, "loss": 0.1894, "lr": 2.136893874715065e-06, "epoch": 17.614345114345113, "percentage": 88.07, "elapsed_time": "1:21:35", "remaining_time": "0:11:03", "throughput": 1319.78, "total_tokens": 6460648}
6796
+ {"current_steps": 33895, "total_steps": 38480, "loss": 0.1695, "lr": 2.132309168727148e-06, "epoch": 17.616943866943867, "percentage": 88.08, "elapsed_time": "1:21:35", "remaining_time": "0:11:02", "throughput": 1319.8, "total_tokens": 6461640}
6797
+ {"current_steps": 33900, "total_steps": 38480, "loss": 0.0929, "lr": 2.1277291671681716e-06, "epoch": 17.61954261954262, "percentage": 88.1, "elapsed_time": "1:21:36", "remaining_time": "0:11:01", "throughput": 1319.81, "total_tokens": 6462600}
6798
+ {"current_steps": 33905, "total_steps": 38480, "loss": 0.1298, "lr": 2.1231538709803487e-06, "epoch": 17.62214137214137, "percentage": 88.11, "elapsed_time": "1:21:37", "remaining_time": "0:11:00", "throughput": 1319.81, "total_tokens": 6463560}
6799
+ {"current_steps": 33910, "total_steps": 38480, "loss": 0.1643, "lr": 2.1185832811049327e-06, "epoch": 17.624740124740125, "percentage": 88.12, "elapsed_time": "1:21:38", "remaining_time": "0:11:00", "throughput": 1319.81, "total_tokens": 6464456}
6800
+ {"current_steps": 33915, "total_steps": 38480, "loss": 0.0829, "lr": 2.1140173984821987e-06, "epoch": 17.62733887733888, "percentage": 88.14, "elapsed_time": "1:21:38", "remaining_time": "0:10:59", "throughput": 1319.83, "total_tokens": 6465448}
6801
+ {"current_steps": 33920, "total_steps": 38480, "loss": 0.1057, "lr": 2.1094562240514563e-06, "epoch": 17.62993762993763, "percentage": 88.15, "elapsed_time": "1:21:39", "remaining_time": "0:10:58", "throughput": 1319.85, "total_tokens": 6466504}
6802
+ {"current_steps": 33925, "total_steps": 38480, "loss": 0.1211, "lr": 2.1048997587510473e-06, "epoch": 17.632536382536383, "percentage": 88.16, "elapsed_time": "1:21:40", "remaining_time": "0:10:57", "throughput": 1319.86, "total_tokens": 6467432}
6803
+ {"current_steps": 33930, "total_steps": 38480, "loss": 0.2286, "lr": 2.1003480035183513e-06, "epoch": 17.635135135135137, "percentage": 88.18, "elapsed_time": "1:21:40", "remaining_time": "0:10:57", "throughput": 1319.88, "total_tokens": 6468456}
6804
+ {"current_steps": 33935, "total_steps": 38480, "loss": 0.1041, "lr": 2.0958009592897705e-06, "epoch": 17.637733887733887, "percentage": 88.19, "elapsed_time": "1:21:41", "remaining_time": "0:10:56", "throughput": 1319.89, "total_tokens": 6469448}
6805
+ {"current_steps": 33940, "total_steps": 38480, "loss": 0.1874, "lr": 2.0912586270007388e-06, "epoch": 17.64033264033264, "percentage": 88.2, "elapsed_time": "1:21:42", "remaining_time": "0:10:55", "throughput": 1319.89, "total_tokens": 6470376}
6806
+ {"current_steps": 33945, "total_steps": 38480, "loss": 0.1768, "lr": 2.086721007585715e-06, "epoch": 17.64293139293139, "percentage": 88.21, "elapsed_time": "1:21:42", "remaining_time": "0:10:55", "throughput": 1319.91, "total_tokens": 6471368}
6807
+ {"current_steps": 33950, "total_steps": 38480, "loss": 0.1092, "lr": 2.0821881019782134e-06, "epoch": 17.645530145530145, "percentage": 88.23, "elapsed_time": "1:21:43", "remaining_time": "0:10:54", "throughput": 1319.91, "total_tokens": 6472296}
6808
+ {"current_steps": 33955, "total_steps": 38480, "loss": 0.1234, "lr": 2.0776599111107466e-06, "epoch": 17.6481288981289, "percentage": 88.24, "elapsed_time": "1:21:44", "remaining_time": "0:10:53", "throughput": 1319.93, "total_tokens": 6473320}
6809
+ {"current_steps": 33960, "total_steps": 38480, "loss": 0.068, "lr": 2.0731364359148687e-06, "epoch": 17.65072765072765, "percentage": 88.25, "elapsed_time": "1:21:45", "remaining_time": "0:10:52", "throughput": 1319.94, "total_tokens": 6474312}
6810
+ {"current_steps": 33965, "total_steps": 38480, "loss": 0.0687, "lr": 2.0686176773211776e-06, "epoch": 17.653326403326403, "percentage": 88.27, "elapsed_time": "1:21:45", "remaining_time": "0:10:52", "throughput": 1319.94, "total_tokens": 6475240}
6811
+ {"current_steps": 33970, "total_steps": 38480, "loss": 0.1341, "lr": 2.0641036362592818e-06, "epoch": 17.655925155925157, "percentage": 88.28, "elapsed_time": "1:21:46", "remaining_time": "0:10:51", "throughput": 1319.96, "total_tokens": 6476232}
6812
+ {"current_steps": 33975, "total_steps": 38480, "loss": 0.046, "lr": 2.0595943136578284e-06, "epoch": 17.658523908523907, "percentage": 88.29, "elapsed_time": "1:21:47", "remaining_time": "0:10:50", "throughput": 1319.95, "total_tokens": 6477128}
6813
+ {"current_steps": 33980, "total_steps": 38480, "loss": 0.0937, "lr": 2.055089710444497e-06, "epoch": 17.66112266112266, "percentage": 88.31, "elapsed_time": "1:21:47", "remaining_time": "0:10:49", "throughput": 1319.96, "total_tokens": 6478120}
6814
+ {"current_steps": 33985, "total_steps": 38480, "loss": 0.068, "lr": 2.05058982754599e-06, "epoch": 17.663721413721415, "percentage": 88.32, "elapsed_time": "1:21:48", "remaining_time": "0:10:49", "throughput": 1319.96, "total_tokens": 6479016}
6815
+ {"current_steps": 33990, "total_steps": 38480, "loss": 0.156, "lr": 2.0460946658880376e-06, "epoch": 17.666320166320165, "percentage": 88.33, "elapsed_time": "1:21:49", "remaining_time": "0:10:48", "throughput": 1319.98, "total_tokens": 6480040}
6816
+ {"current_steps": 33995, "total_steps": 38480, "loss": 0.0885, "lr": 2.0416042263953996e-06, "epoch": 17.66891891891892, "percentage": 88.34, "elapsed_time": "1:21:49", "remaining_time": "0:10:47", "throughput": 1319.99, "total_tokens": 6481000}
6817
+ {"current_steps": 34000, "total_steps": 38480, "loss": 0.3068, "lr": 2.0371185099918756e-06, "epoch": 17.671517671517673, "percentage": 88.36, "elapsed_time": "1:21:50", "remaining_time": "0:10:47", "throughput": 1320.0, "total_tokens": 6481992}
6818
+ {"current_steps": 34005, "total_steps": 38480, "loss": 0.0763, "lr": 2.0326375176002777e-06, "epoch": 17.674116424116423, "percentage": 88.37, "elapsed_time": "1:21:51", "remaining_time": "0:10:46", "throughput": 1320.02, "total_tokens": 6482984}
6819
+ {"current_steps": 34010, "total_steps": 38480, "loss": 0.2626, "lr": 2.0281612501424546e-06, "epoch": 17.676715176715177, "percentage": 88.38, "elapsed_time": "1:21:51", "remaining_time": "0:10:45", "throughput": 1320.02, "total_tokens": 6483912}
6820
+ {"current_steps": 34015, "total_steps": 38480, "loss": 0.0888, "lr": 2.023689708539278e-06, "epoch": 17.679313929313928, "percentage": 88.4, "elapsed_time": "1:21:52", "remaining_time": "0:10:44", "throughput": 1320.01, "total_tokens": 6484776}
6821
+ {"current_steps": 34020, "total_steps": 38480, "loss": 0.1527, "lr": 2.019222893710657e-06, "epoch": 17.68191268191268, "percentage": 88.41, "elapsed_time": "1:21:53", "remaining_time": "0:10:44", "throughput": 1320.01, "total_tokens": 6485704}
6822
+ {"current_steps": 34025, "total_steps": 38480, "loss": 0.2162, "lr": 2.014760806575516e-06, "epoch": 17.684511434511435, "percentage": 88.42, "elapsed_time": "1:21:54", "remaining_time": "0:10:43", "throughput": 1320.02, "total_tokens": 6486664}
6823
+ {"current_steps": 34030, "total_steps": 38480, "loss": 0.1425, "lr": 2.010303448051809e-06, "epoch": 17.687110187110186, "percentage": 88.44, "elapsed_time": "1:21:54", "remaining_time": "0:10:42", "throughput": 1320.03, "total_tokens": 6487624}
6824
+ {"current_steps": 34035, "total_steps": 38480, "loss": 0.2093, "lr": 2.0058508190565315e-06, "epoch": 17.68970893970894, "percentage": 88.45, "elapsed_time": "1:21:55", "remaining_time": "0:10:41", "throughput": 1320.03, "total_tokens": 6488584}
6825
+ {"current_steps": 34040, "total_steps": 38480, "loss": 0.0616, "lr": 2.0014029205056935e-06, "epoch": 17.692307692307693, "percentage": 88.46, "elapsed_time": "1:21:56", "remaining_time": "0:10:41", "throughput": 1320.04, "total_tokens": 6489544}
6826
+ {"current_steps": 34045, "total_steps": 38480, "loss": 0.0407, "lr": 1.9969597533143285e-06, "epoch": 17.694906444906444, "percentage": 88.47, "elapsed_time": "1:21:56", "remaining_time": "0:10:40", "throughput": 1320.05, "total_tokens": 6490504}
6827
+ {"current_steps": 34050, "total_steps": 38480, "loss": 0.1402, "lr": 1.992521318396498e-06, "epoch": 17.697505197505198, "percentage": 88.49, "elapsed_time": "1:21:57", "remaining_time": "0:10:39", "throughput": 1320.06, "total_tokens": 6491464}
6828
+ {"current_steps": 34055, "total_steps": 38480, "loss": 0.0331, "lr": 1.988087616665307e-06, "epoch": 17.70010395010395, "percentage": 88.5, "elapsed_time": "1:21:58", "remaining_time": "0:10:39", "throughput": 1320.06, "total_tokens": 6492392}
6829
+ {"current_steps": 34060, "total_steps": 38480, "loss": 0.0281, "lr": 1.9836586490328627e-06, "epoch": 17.7027027027027, "percentage": 88.51, "elapsed_time": "1:21:58", "remaining_time": "0:10:38", "throughput": 1320.05, "total_tokens": 6493256}
6830
+ {"current_steps": 34065, "total_steps": 38480, "loss": 0.2953, "lr": 1.9792344164103167e-06, "epoch": 17.705301455301456, "percentage": 88.53, "elapsed_time": "1:21:59", "remaining_time": "0:10:37", "throughput": 1320.06, "total_tokens": 6494248}
6831
+ {"current_steps": 34070, "total_steps": 38480, "loss": 0.1263, "lr": 1.974814919707829e-06, "epoch": 17.70790020790021, "percentage": 88.54, "elapsed_time": "1:22:00", "remaining_time": "0:10:36", "throughput": 1320.05, "total_tokens": 6495112}
6832
+ {"current_steps": 34075, "total_steps": 38480, "loss": 0.2709, "lr": 1.9704001598346066e-06, "epoch": 17.71049896049896, "percentage": 88.55, "elapsed_time": "1:22:01", "remaining_time": "0:10:36", "throughput": 1320.05, "total_tokens": 6496040}
6833
+ {"current_steps": 34080, "total_steps": 38480, "loss": 0.1396, "lr": 1.965990137698867e-06, "epoch": 17.713097713097714, "percentage": 88.57, "elapsed_time": "1:22:01", "remaining_time": "0:10:35", "throughput": 1320.05, "total_tokens": 6496936}
6834
+ {"current_steps": 34085, "total_steps": 38480, "loss": 0.1008, "lr": 1.9615848542078567e-06, "epoch": 17.715696465696467, "percentage": 88.58, "elapsed_time": "1:22:02", "remaining_time": "0:10:34", "throughput": 1320.04, "total_tokens": 6497832}
6835
+ {"current_steps": 34090, "total_steps": 38480, "loss": 0.2372, "lr": 1.9571843102678455e-06, "epoch": 17.718295218295218, "percentage": 88.59, "elapsed_time": "1:22:03", "remaining_time": "0:10:33", "throughput": 1320.08, "total_tokens": 6498920}
6836
+ {"current_steps": 34095, "total_steps": 38480, "loss": 0.0805, "lr": 1.9527885067841282e-06, "epoch": 17.72089397089397, "percentage": 88.6, "elapsed_time": "1:22:03", "remaining_time": "0:10:33", "throughput": 1320.06, "total_tokens": 6499752}
6837
+ {"current_steps": 34100, "total_steps": 38480, "loss": 0.0893, "lr": 1.948397444661029e-06, "epoch": 17.723492723492722, "percentage": 88.62, "elapsed_time": "1:22:04", "remaining_time": "0:10:32", "throughput": 1320.07, "total_tokens": 6500712}
6838
+ {"current_steps": 34105, "total_steps": 38480, "loss": 0.1765, "lr": 1.9440111248018976e-06, "epoch": 17.726091476091476, "percentage": 88.63, "elapsed_time": "1:22:05", "remaining_time": "0:10:31", "throughput": 1320.07, "total_tokens": 6501672}
6839
+ {"current_steps": 34110, "total_steps": 38480, "loss": 0.2215, "lr": 1.939629548109101e-06, "epoch": 17.72869022869023, "percentage": 88.64, "elapsed_time": "1:22:05", "remaining_time": "0:10:31", "throughput": 1320.09, "total_tokens": 6502664}
6840
+ {"current_steps": 34115, "total_steps": 38480, "loss": 0.1151, "lr": 1.9352527154840345e-06, "epoch": 17.73128898128898, "percentage": 88.66, "elapsed_time": "1:22:06", "remaining_time": "0:10:30", "throughput": 1320.1, "total_tokens": 6503624}
6841
+ {"current_steps": 34120, "total_steps": 38480, "loss": 0.0929, "lr": 1.9308806278271143e-06, "epoch": 17.733887733887734, "percentage": 88.67, "elapsed_time": "1:22:07", "remaining_time": "0:10:29", "throughput": 1320.09, "total_tokens": 6504520}
6842
+ {"current_steps": 34125, "total_steps": 38480, "loss": 0.0731, "lr": 1.9265132860377872e-06, "epoch": 17.736486486486488, "percentage": 88.68, "elapsed_time": "1:22:08", "remaining_time": "0:10:28", "throughput": 1320.1, "total_tokens": 6505512}
6843
+ {"current_steps": 34130, "total_steps": 38480, "loss": 0.1284, "lr": 1.922150691014518e-06, "epoch": 17.739085239085238, "percentage": 88.7, "elapsed_time": "1:22:08", "remaining_time": "0:10:28", "throughput": 1320.11, "total_tokens": 6506472}
6844
+ {"current_steps": 34135, "total_steps": 38480, "loss": 0.1393, "lr": 1.9177928436547946e-06, "epoch": 17.741683991683992, "percentage": 88.71, "elapsed_time": "1:22:09", "remaining_time": "0:10:27", "throughput": 1320.11, "total_tokens": 6507400}
6845
+ {"current_steps": 34140, "total_steps": 38480, "loss": 0.1167, "lr": 1.9134397448551305e-06, "epoch": 17.744282744282746, "percentage": 88.72, "elapsed_time": "1:22:10", "remaining_time": "0:10:26", "throughput": 1320.09, "total_tokens": 6508232}
6846
+ {"current_steps": 34145, "total_steps": 38480, "loss": 0.0848, "lr": 1.9090913955110644e-06, "epoch": 17.746881496881496, "percentage": 88.73, "elapsed_time": "1:22:10", "remaining_time": "0:10:26", "throughput": 1320.1, "total_tokens": 6509192}
6847
+ {"current_steps": 34150, "total_steps": 38480, "loss": 0.1169, "lr": 1.9047477965171618e-06, "epoch": 17.74948024948025, "percentage": 88.75, "elapsed_time": "1:22:11", "remaining_time": "0:10:25", "throughput": 1320.1, "total_tokens": 6510120}
6848
+ {"current_steps": 34155, "total_steps": 38480, "loss": 0.1444, "lr": 1.9004089487669853e-06, "epoch": 17.752079002079, "percentage": 88.76, "elapsed_time": "1:22:12", "remaining_time": "0:10:24", "throughput": 1320.1, "total_tokens": 6511048}
6849
+ {"current_steps": 34160, "total_steps": 38480, "loss": 0.0922, "lr": 1.8960748531531542e-06, "epoch": 17.754677754677754, "percentage": 88.77, "elapsed_time": "1:22:12", "remaining_time": "0:10:23", "throughput": 1320.12, "total_tokens": 6512040}
6850
+ {"current_steps": 34165, "total_steps": 38480, "loss": 0.2737, "lr": 1.8917455105672943e-06, "epoch": 17.757276507276508, "percentage": 88.79, "elapsed_time": "1:22:13", "remaining_time": "0:10:23", "throughput": 1320.13, "total_tokens": 6513032}
6851
+ {"current_steps": 34170, "total_steps": 38480, "loss": 0.1289, "lr": 1.8874209219000488e-06, "epoch": 17.75987525987526, "percentage": 88.8, "elapsed_time": "1:22:14", "remaining_time": "0:10:22", "throughput": 1320.13, "total_tokens": 6513928}
6852
+ {"current_steps": 34175, "total_steps": 38480, "loss": 0.1116, "lr": 1.8831010880410893e-06, "epoch": 17.762474012474012, "percentage": 88.81, "elapsed_time": "1:22:15", "remaining_time": "0:10:21", "throughput": 1320.14, "total_tokens": 6514920}
6853
+ {"current_steps": 34180, "total_steps": 38480, "loss": 0.0794, "lr": 1.878786009879116e-06, "epoch": 17.765072765072766, "percentage": 88.83, "elapsed_time": "1:22:15", "remaining_time": "0:10:20", "throughput": 1320.14, "total_tokens": 6515816}
6854
+ {"current_steps": 34185, "total_steps": 38480, "loss": 0.1542, "lr": 1.8744756883018354e-06, "epoch": 17.767671517671516, "percentage": 88.84, "elapsed_time": "1:22:16", "remaining_time": "0:10:20", "throughput": 1320.16, "total_tokens": 6516840}
6855
+ {"current_steps": 34190, "total_steps": 38480, "loss": 0.1549, "lr": 1.8701701241959885e-06, "epoch": 17.77027027027027, "percentage": 88.85, "elapsed_time": "1:22:17", "remaining_time": "0:10:19", "throughput": 1320.16, "total_tokens": 6517800}
6856
+ {"current_steps": 34195, "total_steps": 38480, "loss": 0.1156, "lr": 1.8658693184473275e-06, "epoch": 17.772869022869024, "percentage": 88.86, "elapsed_time": "1:22:17", "remaining_time": "0:10:18", "throughput": 1320.17, "total_tokens": 6518728}
6857
+ {"current_steps": 34200, "total_steps": 38480, "loss": 0.1117, "lr": 1.8615732719406337e-06, "epoch": 17.775467775467774, "percentage": 88.88, "elapsed_time": "1:22:18", "remaining_time": "0:10:18", "throughput": 1320.17, "total_tokens": 6519688}
6858
+ {"current_steps": 34205, "total_steps": 38480, "loss": 0.1157, "lr": 1.8572819855597085e-06, "epoch": 17.778066528066528, "percentage": 88.89, "elapsed_time": "1:22:19", "remaining_time": "0:10:17", "throughput": 1320.17, "total_tokens": 6520616}
6859
+ {"current_steps": 34210, "total_steps": 38480, "loss": 0.1451, "lr": 1.852995460187365e-06, "epoch": 17.780665280665282, "percentage": 88.9, "elapsed_time": "1:22:19", "remaining_time": "0:10:16", "throughput": 1320.19, "total_tokens": 6521640}
6860
+ {"current_steps": 34215, "total_steps": 38480, "loss": 0.1224, "lr": 1.8487136967054564e-06, "epoch": 17.783264033264032, "percentage": 88.92, "elapsed_time": "1:22:20", "remaining_time": "0:10:15", "throughput": 1320.19, "total_tokens": 6522536}
6861
+ {"current_steps": 34220, "total_steps": 38480, "loss": 0.0898, "lr": 1.8444366959948278e-06, "epoch": 17.785862785862786, "percentage": 88.93, "elapsed_time": "1:22:21", "remaining_time": "0:10:15", "throughput": 1320.19, "total_tokens": 6523464}
6862
+ {"current_steps": 34225, "total_steps": 38480, "loss": 0.109, "lr": 1.8401644589353673e-06, "epoch": 17.78846153846154, "percentage": 88.94, "elapsed_time": "1:22:22", "remaining_time": "0:10:14", "throughput": 1320.19, "total_tokens": 6524392}
6863
+ {"current_steps": 34230, "total_steps": 38480, "loss": 0.1149, "lr": 1.8358969864059771e-06, "epoch": 17.79106029106029, "percentage": 88.96, "elapsed_time": "1:22:22", "remaining_time": "0:10:13", "throughput": 1320.2, "total_tokens": 6525352}
6864
+ {"current_steps": 34235, "total_steps": 38480, "loss": 0.1256, "lr": 1.8316342792845776e-06, "epoch": 17.793659043659044, "percentage": 88.97, "elapsed_time": "1:22:23", "remaining_time": "0:10:12", "throughput": 1320.18, "total_tokens": 6526216}
6865
+ {"current_steps": 34240, "total_steps": 38480, "loss": 0.1345, "lr": 1.8273763384481086e-06, "epoch": 17.796257796257795, "percentage": 88.98, "elapsed_time": "1:22:24", "remaining_time": "0:10:12", "throughput": 1320.19, "total_tokens": 6527176}
6866
+ {"current_steps": 34245, "total_steps": 38480, "loss": 0.1008, "lr": 1.8231231647725221e-06, "epoch": 17.79885654885655, "percentage": 88.99, "elapsed_time": "1:22:24", "remaining_time": "0:10:11", "throughput": 1320.2, "total_tokens": 6528136}
6867
+ {"current_steps": 34250, "total_steps": 38480, "loss": 0.1185, "lr": 1.8188747591328132e-06, "epoch": 17.801455301455302, "percentage": 89.01, "elapsed_time": "1:22:25", "remaining_time": "0:10:10", "throughput": 1320.21, "total_tokens": 6529128}
6868
+ {"current_steps": 34255, "total_steps": 38480, "loss": 0.0573, "lr": 1.814631122402971e-06, "epoch": 17.804054054054053, "percentage": 89.02, "elapsed_time": "1:22:26", "remaining_time": "0:10:10", "throughput": 1320.21, "total_tokens": 6530056}
6869
+ {"current_steps": 34260, "total_steps": 38480, "loss": 0.0951, "lr": 1.8103922554560114e-06, "epoch": 17.806652806652806, "percentage": 89.03, "elapsed_time": "1:22:26", "remaining_time": "0:10:09", "throughput": 1320.21, "total_tokens": 6530984}
6870
+ {"current_steps": 34265, "total_steps": 38480, "loss": 0.0446, "lr": 1.8061581591639705e-06, "epoch": 17.80925155925156, "percentage": 89.05, "elapsed_time": "1:22:27", "remaining_time": "0:10:08", "throughput": 1320.23, "total_tokens": 6531976}
6871
+ {"current_steps": 34270, "total_steps": 38480, "loss": 0.0821, "lr": 1.801928834397909e-06, "epoch": 17.81185031185031, "percentage": 89.06, "elapsed_time": "1:22:28", "remaining_time": "0:10:07", "throughput": 1320.24, "total_tokens": 6532968}
6872
+ {"current_steps": 34275, "total_steps": 38480, "loss": 0.1564, "lr": 1.7977042820278956e-06, "epoch": 17.814449064449065, "percentage": 89.07, "elapsed_time": "1:22:29", "remaining_time": "0:10:07", "throughput": 1320.25, "total_tokens": 6533928}
6873
+ {"current_steps": 34280, "total_steps": 38480, "loss": 0.1296, "lr": 1.7934845029230207e-06, "epoch": 17.81704781704782, "percentage": 89.09, "elapsed_time": "1:22:29", "remaining_time": "0:10:06", "throughput": 1320.26, "total_tokens": 6534920}
6874
+ {"current_steps": 34285, "total_steps": 38480, "loss": 0.2616, "lr": 1.789269497951393e-06, "epoch": 17.81964656964657, "percentage": 89.1, "elapsed_time": "1:22:30", "remaining_time": "0:10:05", "throughput": 1320.27, "total_tokens": 6535912}
6875
+ {"current_steps": 34290, "total_steps": 38480, "loss": 0.2627, "lr": 1.7850592679801438e-06, "epoch": 17.822245322245323, "percentage": 89.11, "elapsed_time": "1:22:31", "remaining_time": "0:10:04", "throughput": 1320.29, "total_tokens": 6536904}
6876
+ {"current_steps": 34295, "total_steps": 38480, "loss": 0.0938, "lr": 1.7808538138754132e-06, "epoch": 17.824844074844076, "percentage": 89.12, "elapsed_time": "1:22:31", "remaining_time": "0:10:04", "throughput": 1320.29, "total_tokens": 6537864}
6877
+ {"current_steps": 34300, "total_steps": 38480, "loss": 0.0564, "lr": 1.7766531365023597e-06, "epoch": 17.827442827442827, "percentage": 89.14, "elapsed_time": "1:22:32", "remaining_time": "0:10:03", "throughput": 1320.31, "total_tokens": 6538888}
6878
+ {"current_steps": 34305, "total_steps": 38480, "loss": 0.0919, "lr": 1.7724572367251723e-06, "epoch": 17.83004158004158, "percentage": 89.15, "elapsed_time": "1:22:33", "remaining_time": "0:10:02", "throughput": 1320.3, "total_tokens": 6539752}
6879
+ {"current_steps": 34310, "total_steps": 38480, "loss": 0.0942, "lr": 1.7682661154070411e-06, "epoch": 17.83264033264033, "percentage": 89.16, "elapsed_time": "1:22:33", "remaining_time": "0:10:02", "throughput": 1320.32, "total_tokens": 6540744}
6880
+ {"current_steps": 34315, "total_steps": 38480, "loss": 0.1835, "lr": 1.764079773410185e-06, "epoch": 17.835239085239085, "percentage": 89.18, "elapsed_time": "1:22:34", "remaining_time": "0:10:01", "throughput": 1320.31, "total_tokens": 6541640}
6881
+ {"current_steps": 34320, "total_steps": 38480, "loss": 0.19, "lr": 1.759898211595823e-06, "epoch": 17.83783783783784, "percentage": 89.19, "elapsed_time": "1:22:35", "remaining_time": "0:10:00", "throughput": 1320.3, "total_tokens": 6542536}
6882
+ {"current_steps": 34325, "total_steps": 38480, "loss": 0.2351, "lr": 1.755721430824217e-06, "epoch": 17.84043659043659, "percentage": 89.2, "elapsed_time": "1:22:36", "remaining_time": "0:09:59", "throughput": 1320.29, "total_tokens": 6543400}
6883
+ {"current_steps": 34330, "total_steps": 38480, "loss": 0.2396, "lr": 1.7515494319546215e-06, "epoch": 17.843035343035343, "percentage": 89.22, "elapsed_time": "1:22:36", "remaining_time": "0:09:59", "throughput": 1320.3, "total_tokens": 6544360}
6884
+ {"current_steps": 34335, "total_steps": 38480, "loss": 0.0691, "lr": 1.747382215845314e-06, "epoch": 17.845634095634097, "percentage": 89.23, "elapsed_time": "1:22:37", "remaining_time": "0:09:58", "throughput": 1320.31, "total_tokens": 6545352}
6885
+ {"current_steps": 34340, "total_steps": 38480, "loss": 0.2042, "lr": 1.7432197833535975e-06, "epoch": 17.848232848232847, "percentage": 89.24, "elapsed_time": "1:22:38", "remaining_time": "0:09:57", "throughput": 1320.31, "total_tokens": 6546280}
6886
+ {"current_steps": 34345, "total_steps": 38480, "loss": 0.1465, "lr": 1.739062135335784e-06, "epoch": 17.8508316008316, "percentage": 89.25, "elapsed_time": "1:22:38", "remaining_time": "0:09:57", "throughput": 1320.32, "total_tokens": 6547208}
6887
+ {"current_steps": 34350, "total_steps": 38480, "loss": 0.1039, "lr": 1.7349092726471894e-06, "epoch": 17.853430353430355, "percentage": 89.27, "elapsed_time": "1:22:39", "remaining_time": "0:09:56", "throughput": 1320.31, "total_tokens": 6548104}
6888
+ {"current_steps": 34355, "total_steps": 38480, "loss": 0.1177, "lr": 1.7307611961421665e-06, "epoch": 17.856029106029105, "percentage": 89.28, "elapsed_time": "1:22:40", "remaining_time": "0:09:55", "throughput": 1320.32, "total_tokens": 6549064}
6889
+ {"current_steps": 34360, "total_steps": 38480, "loss": 0.0634, "lr": 1.7266179066740718e-06, "epoch": 17.85862785862786, "percentage": 89.29, "elapsed_time": "1:22:40", "remaining_time": "0:09:54", "throughput": 1320.32, "total_tokens": 6549992}
6890
+ {"current_steps": 34365, "total_steps": 38480, "loss": 0.1812, "lr": 1.7224794050952787e-06, "epoch": 17.861226611226613, "percentage": 89.31, "elapsed_time": "1:22:41", "remaining_time": "0:09:54", "throughput": 1320.32, "total_tokens": 6550920}
6891
+ {"current_steps": 34370, "total_steps": 38480, "loss": 0.0905, "lr": 1.71834569225717e-06, "epoch": 17.863825363825363, "percentage": 89.32, "elapsed_time": "1:22:42", "remaining_time": "0:09:53", "throughput": 1320.32, "total_tokens": 6551848}
6892
+ {"current_steps": 34375, "total_steps": 38480, "loss": 0.1308, "lr": 1.7142167690101574e-06, "epoch": 17.866424116424117, "percentage": 89.33, "elapsed_time": "1:22:43", "remaining_time": "0:09:52", "throughput": 1320.34, "total_tokens": 6552840}
6893
+ {"current_steps": 34380, "total_steps": 38480, "loss": 0.1477, "lr": 1.7100926362036529e-06, "epoch": 17.86902286902287, "percentage": 89.35, "elapsed_time": "1:22:43", "remaining_time": "0:09:51", "throughput": 1320.35, "total_tokens": 6553800}
6894
+ {"current_steps": 34385, "total_steps": 38480, "loss": 0.1926, "lr": 1.7059732946860945e-06, "epoch": 17.87162162162162, "percentage": 89.36, "elapsed_time": "1:22:44", "remaining_time": "0:09:51", "throughput": 1320.37, "total_tokens": 6554824}
6895
+ {"current_steps": 34390, "total_steps": 38480, "loss": 0.1881, "lr": 1.7018587453049184e-06, "epoch": 17.874220374220375, "percentage": 89.37, "elapsed_time": "1:22:45", "remaining_time": "0:09:50", "throughput": 1320.37, "total_tokens": 6555752}
6896
+ {"current_steps": 34395, "total_steps": 38480, "loss": 0.1135, "lr": 1.6977489889065973e-06, "epoch": 17.876819126819125, "percentage": 89.38, "elapsed_time": "1:22:45", "remaining_time": "0:09:49", "throughput": 1320.37, "total_tokens": 6556712}
6897
+ {"current_steps": 34400, "total_steps": 38480, "loss": 0.1031, "lr": 1.6936440263366022e-06, "epoch": 17.87941787941788, "percentage": 89.4, "elapsed_time": "1:22:46", "remaining_time": "0:09:49", "throughput": 1320.37, "total_tokens": 6557640}
6898
+ {"current_steps": 34405, "total_steps": 38480, "loss": 0.1593, "lr": 1.6895438584394186e-06, "epoch": 17.882016632016633, "percentage": 89.41, "elapsed_time": "1:22:47", "remaining_time": "0:09:48", "throughput": 1320.39, "total_tokens": 6558632}
6899
+ {"current_steps": 34410, "total_steps": 38480, "loss": 0.1056, "lr": 1.6854484860585523e-06, "epoch": 17.884615384615383, "percentage": 89.42, "elapsed_time": "1:22:47", "remaining_time": "0:09:47", "throughput": 1320.42, "total_tokens": 6559720}
6900
+ {"current_steps": 34415, "total_steps": 38480, "loss": 0.2466, "lr": 1.6813579100365185e-06, "epoch": 17.887214137214137, "percentage": 89.44, "elapsed_time": "1:22:48", "remaining_time": "0:09:46", "throughput": 1320.44, "total_tokens": 6560712}
6901
+ {"current_steps": 34420, "total_steps": 38480, "loss": 0.146, "lr": 1.6772721312148438e-06, "epoch": 17.88981288981289, "percentage": 89.45, "elapsed_time": "1:22:49", "remaining_time": "0:09:46", "throughput": 1320.45, "total_tokens": 6561672}
6902
+ {"current_steps": 34425, "total_steps": 38480, "loss": 0.1839, "lr": 1.673191150434067e-06, "epoch": 17.89241164241164, "percentage": 89.46, "elapsed_time": "1:22:49", "remaining_time": "0:09:45", "throughput": 1320.45, "total_tokens": 6562600}
6903
+ {"current_steps": 34430, "total_steps": 38480, "loss": 0.1684, "lr": 1.6691149685337526e-06, "epoch": 17.895010395010395, "percentage": 89.48, "elapsed_time": "1:22:50", "remaining_time": "0:09:44", "throughput": 1320.44, "total_tokens": 6563464}
6904
+ {"current_steps": 34435, "total_steps": 38480, "loss": 0.1044, "lr": 1.6650435863524632e-06, "epoch": 17.89760914760915, "percentage": 89.49, "elapsed_time": "1:22:51", "remaining_time": "0:09:43", "throughput": 1320.44, "total_tokens": 6564392}
6905
+ {"current_steps": 34440, "total_steps": 38480, "loss": 0.0766, "lr": 1.660977004727779e-06, "epoch": 17.9002079002079, "percentage": 89.5, "elapsed_time": "1:22:52", "remaining_time": "0:09:43", "throughput": 1320.44, "total_tokens": 6565320}
6906
+ {"current_steps": 34445, "total_steps": 38480, "loss": 0.1422, "lr": 1.6569152244962888e-06, "epoch": 17.902806652806653, "percentage": 89.51, "elapsed_time": "1:22:52", "remaining_time": "0:09:42", "throughput": 1320.45, "total_tokens": 6566312}
6907
+ {"current_steps": 34450, "total_steps": 38480, "loss": 0.0345, "lr": 1.6528582464936076e-06, "epoch": 17.905405405405407, "percentage": 89.53, "elapsed_time": "1:22:53", "remaining_time": "0:09:41", "throughput": 1320.45, "total_tokens": 6567208}
6908
+ {"current_steps": 34455, "total_steps": 38480, "loss": 0.1358, "lr": 1.6488060715543457e-06, "epoch": 17.908004158004157, "percentage": 89.54, "elapsed_time": "1:22:54", "remaining_time": "0:09:41", "throughput": 1320.46, "total_tokens": 6568168}
6909
+ {"current_steps": 34460, "total_steps": 38480, "loss": 0.0801, "lr": 1.6447587005121307e-06, "epoch": 17.91060291060291, "percentage": 89.55, "elapsed_time": "1:22:54", "remaining_time": "0:09:40", "throughput": 1320.45, "total_tokens": 6569064}
6910
+ {"current_steps": 34465, "total_steps": 38480, "loss": 0.0701, "lr": 1.6407161341996074e-06, "epoch": 17.91320166320166, "percentage": 89.57, "elapsed_time": "1:22:55", "remaining_time": "0:09:39", "throughput": 1320.47, "total_tokens": 6570088}
6911
+ {"current_steps": 34470, "total_steps": 38480, "loss": 0.0948, "lr": 1.6366783734484276e-06, "epoch": 17.915800415800415, "percentage": 89.58, "elapsed_time": "1:22:56", "remaining_time": "0:09:38", "throughput": 1320.46, "total_tokens": 6570952}
6912
+ {"current_steps": 34475, "total_steps": 38480, "loss": 0.0987, "lr": 1.6326454190892516e-06, "epoch": 17.91839916839917, "percentage": 89.59, "elapsed_time": "1:22:56", "remaining_time": "0:09:38", "throughput": 1320.46, "total_tokens": 6571848}
6913
+ {"current_steps": 34480, "total_steps": 38480, "loss": 0.2215, "lr": 1.6286172719517601e-06, "epoch": 17.92099792099792, "percentage": 89.6, "elapsed_time": "1:22:57", "remaining_time": "0:09:37", "throughput": 1320.47, "total_tokens": 6572808}
6914
+ {"current_steps": 34485, "total_steps": 38480, "loss": 0.0714, "lr": 1.624593932864632e-06, "epoch": 17.923596673596673, "percentage": 89.62, "elapsed_time": "1:22:58", "remaining_time": "0:09:36", "throughput": 1320.47, "total_tokens": 6573736}
6915
+ {"current_steps": 34490, "total_steps": 38480, "loss": 0.155, "lr": 1.6205754026555664e-06, "epoch": 17.926195426195427, "percentage": 89.63, "elapsed_time": "1:22:59", "remaining_time": "0:09:36", "throughput": 1320.46, "total_tokens": 6574600}
6916
+ {"current_steps": 34495, "total_steps": 38480, "loss": 0.0505, "lr": 1.6165616821512714e-06, "epoch": 17.928794178794178, "percentage": 89.64, "elapsed_time": "1:22:59", "remaining_time": "0:09:35", "throughput": 1320.47, "total_tokens": 6575560}
6917
+ {"current_steps": 34500, "total_steps": 38480, "loss": 0.128, "lr": 1.612552772177467e-06, "epoch": 17.93139293139293, "percentage": 89.66, "elapsed_time": "1:23:00", "remaining_time": "0:09:34", "throughput": 1320.48, "total_tokens": 6576552}
6918
+ {"current_steps": 34505, "total_steps": 38480, "loss": 0.0544, "lr": 1.6085486735588768e-06, "epoch": 17.933991683991685, "percentage": 89.67, "elapsed_time": "1:23:01", "remaining_time": "0:09:33", "throughput": 1320.49, "total_tokens": 6577512}
6919
+ {"current_steps": 34510, "total_steps": 38480, "loss": 0.1165, "lr": 1.6045493871192446e-06, "epoch": 17.936590436590436, "percentage": 89.68, "elapsed_time": "1:23:01", "remaining_time": "0:09:33", "throughput": 1320.5, "total_tokens": 6578504}
6920
+ {"current_steps": 34515, "total_steps": 38480, "loss": 0.1762, "lr": 1.6005549136813124e-06, "epoch": 17.93918918918919, "percentage": 89.7, "elapsed_time": "1:23:02", "remaining_time": "0:09:32", "throughput": 1320.51, "total_tokens": 6579464}
6921
+ {"current_steps": 34520, "total_steps": 38480, "loss": 0.0637, "lr": 1.5965652540668479e-06, "epoch": 17.941787941787943, "percentage": 89.71, "elapsed_time": "1:23:03", "remaining_time": "0:09:31", "throughput": 1320.51, "total_tokens": 6580424}
6922
+ {"current_steps": 34525, "total_steps": 38480, "loss": 0.2097, "lr": 1.5925804090966113e-06, "epoch": 17.944386694386694, "percentage": 89.72, "elapsed_time": "1:23:03", "remaining_time": "0:09:30", "throughput": 1320.53, "total_tokens": 6581416}
6923
+ {"current_steps": 34530, "total_steps": 38480, "loss": 0.107, "lr": 1.5886003795903853e-06, "epoch": 17.946985446985448, "percentage": 89.73, "elapsed_time": "1:23:04", "remaining_time": "0:09:30", "throughput": 1320.55, "total_tokens": 6582440}
6924
+ {"current_steps": 34535, "total_steps": 38480, "loss": 0.1273, "lr": 1.5846251663669487e-06, "epoch": 17.9495841995842, "percentage": 89.75, "elapsed_time": "1:23:05", "remaining_time": "0:09:29", "throughput": 1320.57, "total_tokens": 6583464}
6925
+ {"current_steps": 34540, "total_steps": 38480, "loss": 0.0943, "lr": 1.5806547702441083e-06, "epoch": 17.95218295218295, "percentage": 89.76, "elapsed_time": "1:23:06", "remaining_time": "0:09:28", "throughput": 1320.56, "total_tokens": 6584328}
6926
+ {"current_steps": 34545, "total_steps": 38480, "loss": 0.0972, "lr": 1.5766891920386633e-06, "epoch": 17.954781704781706, "percentage": 89.77, "elapsed_time": "1:23:06", "remaining_time": "0:09:28", "throughput": 1320.56, "total_tokens": 6585256}
6927
+ {"current_steps": 34550, "total_steps": 38480, "loss": 0.1851, "lr": 1.5727284325664253e-06, "epoch": 17.957380457380456, "percentage": 89.79, "elapsed_time": "1:23:07", "remaining_time": "0:09:27", "throughput": 1320.58, "total_tokens": 6586280}
6928
+ {"current_steps": 34555, "total_steps": 38480, "loss": 0.0812, "lr": 1.5687724926422205e-06, "epoch": 17.95997920997921, "percentage": 89.8, "elapsed_time": "1:23:08", "remaining_time": "0:09:26", "throughput": 1320.59, "total_tokens": 6587272}
6929
+ {"current_steps": 34560, "total_steps": 38480, "loss": 0.0707, "lr": 1.5648213730798805e-06, "epoch": 17.962577962577964, "percentage": 89.81, "elapsed_time": "1:23:08", "remaining_time": "0:09:25", "throughput": 1320.6, "total_tokens": 6588232}
6930
+ {"current_steps": 34565, "total_steps": 38480, "loss": 0.0815, "lr": 1.5608750746922446e-06, "epoch": 17.965176715176714, "percentage": 89.83, "elapsed_time": "1:23:09", "remaining_time": "0:09:25", "throughput": 1320.62, "total_tokens": 6589224}
6931
+ {"current_steps": 34570, "total_steps": 38480, "loss": 0.095, "lr": 1.5569335982911549e-06, "epoch": 17.967775467775468, "percentage": 89.84, "elapsed_time": "1:23:10", "remaining_time": "0:09:24", "throughput": 1320.63, "total_tokens": 6590216}
6932
+ {"current_steps": 34575, "total_steps": 38480, "loss": 0.1634, "lr": 1.5529969446874764e-06, "epoch": 17.97037422037422, "percentage": 89.85, "elapsed_time": "1:23:10", "remaining_time": "0:09:23", "throughput": 1320.62, "total_tokens": 6591112}
6933
+ {"current_steps": 34580, "total_steps": 38480, "loss": 0.0602, "lr": 1.5490651146910673e-06, "epoch": 17.972972972972972, "percentage": 89.86, "elapsed_time": "1:23:11", "remaining_time": "0:09:22", "throughput": 1320.64, "total_tokens": 6592104}
6934
+ {"current_steps": 34585, "total_steps": 38480, "loss": 0.1213, "lr": 1.5451381091107941e-06, "epoch": 17.975571725571726, "percentage": 89.88, "elapsed_time": "1:23:12", "remaining_time": "0:09:22", "throughput": 1320.65, "total_tokens": 6593096}
6935
+ {"current_steps": 34590, "total_steps": 38480, "loss": 0.1078, "lr": 1.5412159287545468e-06, "epoch": 17.97817047817048, "percentage": 89.89, "elapsed_time": "1:23:12", "remaining_time": "0:09:21", "throughput": 1320.67, "total_tokens": 6594120}
6936
+ {"current_steps": 34595, "total_steps": 38480, "loss": 0.1878, "lr": 1.5372985744292024e-06, "epoch": 17.98076923076923, "percentage": 89.9, "elapsed_time": "1:23:13", "remaining_time": "0:09:20", "throughput": 1320.68, "total_tokens": 6595048}
6937
+ {"current_steps": 34600, "total_steps": 38480, "loss": 0.1169, "lr": 1.5333860469406574e-06, "epoch": 17.983367983367984, "percentage": 89.92, "elapsed_time": "1:23:14", "remaining_time": "0:09:20", "throughput": 1320.66, "total_tokens": 6595912}
6938
+ {"current_steps": 34605, "total_steps": 38480, "loss": 0.0903, "lr": 1.529478347093813e-06, "epoch": 17.985966735966738, "percentage": 89.93, "elapsed_time": "1:23:15", "remaining_time": "0:09:19", "throughput": 1320.66, "total_tokens": 6596808}
6939
+ {"current_steps": 34610, "total_steps": 38480, "loss": 0.1952, "lr": 1.5255754756925728e-06, "epoch": 17.988565488565488, "percentage": 89.94, "elapsed_time": "1:23:15", "remaining_time": "0:09:18", "throughput": 1320.67, "total_tokens": 6597800}
6940
+ {"current_steps": 34615, "total_steps": 38480, "loss": 0.0934, "lr": 1.5216774335398532e-06, "epoch": 17.991164241164242, "percentage": 89.96, "elapsed_time": "1:23:16", "remaining_time": "0:09:17", "throughput": 1320.69, "total_tokens": 6598792}
6941
+ {"current_steps": 34620, "total_steps": 38480, "loss": 0.1356, "lr": 1.517784221437568e-06, "epoch": 17.993762993762992, "percentage": 89.97, "elapsed_time": "1:23:17", "remaining_time": "0:09:17", "throughput": 1320.69, "total_tokens": 6599752}
6942
+ {"current_steps": 34625, "total_steps": 38480, "loss": 0.0888, "lr": 1.513895840186655e-06, "epoch": 17.996361746361746, "percentage": 89.98, "elapsed_time": "1:23:17", "remaining_time": "0:09:16", "throughput": 1320.71, "total_tokens": 6600744}
6943
+ {"current_steps": 34630, "total_steps": 38480, "loss": 0.109, "lr": 1.5100122905870373e-06, "epoch": 17.9989604989605, "percentage": 89.99, "elapsed_time": "1:23:18", "remaining_time": "0:09:15", "throughput": 1320.7, "total_tokens": 6601608}
6944
+ {"current_steps": 34632, "total_steps": 38480, "eval_loss": 0.14623308181762695, "epoch": 18.0, "percentage": 90.0, "elapsed_time": "1:23:26", "remaining_time": "0:09:16", "throughput": 1318.56, "total_tokens": 6601944}
6945
+ {"current_steps": 34635, "total_steps": 38480, "loss": 0.0397, "lr": 1.5061335734376598e-06, "epoch": 18.00155925155925, "percentage": 90.01, "elapsed_time": "1:23:28", "remaining_time": "0:09:16", "throughput": 1318.26, "total_tokens": 6602488}
6946
+ {"current_steps": 34640, "total_steps": 38480, "loss": 0.0782, "lr": 1.5022596895364615e-06, "epoch": 18.004158004158004, "percentage": 90.02, "elapsed_time": "1:23:29", "remaining_time": "0:09:15", "throughput": 1318.22, "total_tokens": 6603416}
6947
+ {"current_steps": 34645, "total_steps": 38480, "loss": 0.173, "lr": 1.4983906396803972e-06, "epoch": 18.006756756756758, "percentage": 90.03, "elapsed_time": "1:23:30", "remaining_time": "0:09:14", "throughput": 1318.22, "total_tokens": 6604376}