rbelanec commited on
Commit
8efb310
·
verified ·
1 Parent(s): a5e60e5

Training in progress, step 18278

Browse files
Files changed (2) hide show
  1. adapter_model.safetensors +1 -1
  2. trainer_log.jsonl +193 -0
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f0f9a6139b7ed36547fe76b39a31fc4c429c47fb4931abd890eb56cc2d4aaec6
3
  size 798032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b67333603f23b0b76cffb29c42fcfd470165a07dcc0d61ba36e0374ef0069ae2
3
  size 798032
trainer_log.jsonl CHANGED
@@ -3481,3 +3481,196 @@
3481
  {"current_steps": 17316, "total_steps": 19240, "eval_loss": 0.14598700404167175, "epoch": 9.0, "percentage": 90.0, "elapsed_time": "0:44:35", "remaining_time": "0:04:57", "throughput": 1232.51, "total_tokens": 3297136}
3482
  {"current_steps": 17320, "total_steps": 19240, "loss": 0.2028, "lr": 1.5030340796185787e-06, "epoch": 9.002079002079002, "percentage": 90.02, "elapsed_time": "0:44:36", "remaining_time": "0:04:56", "throughput": 1232.13, "total_tokens": 3298000}
3483
  {"current_steps": 17325, "total_steps": 19240, "loss": 0.1081, "lr": 1.4952988808429575e-06, "epoch": 9.004677754677754, "percentage": 90.05, "elapsed_time": "0:44:37", "remaining_time": "0:04:55", "throughput": 1232.14, "total_tokens": 3298928}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3481
  {"current_steps": 17316, "total_steps": 19240, "eval_loss": 0.14598700404167175, "epoch": 9.0, "percentage": 90.0, "elapsed_time": "0:44:35", "remaining_time": "0:04:57", "throughput": 1232.51, "total_tokens": 3297136}
3482
  {"current_steps": 17320, "total_steps": 19240, "loss": 0.2028, "lr": 1.5030340796185787e-06, "epoch": 9.002079002079002, "percentage": 90.02, "elapsed_time": "0:44:36", "remaining_time": "0:04:56", "throughput": 1232.13, "total_tokens": 3298000}
3483
  {"current_steps": 17325, "total_steps": 19240, "loss": 0.1081, "lr": 1.4952988808429575e-06, "epoch": 9.004677754677754, "percentage": 90.05, "elapsed_time": "0:44:37", "remaining_time": "0:04:55", "throughput": 1232.14, "total_tokens": 3298928}
3484
+ {"current_steps": 17330, "total_steps": 19240, "loss": 0.2353, "lr": 1.4875830239748867e-06, "epoch": 9.007276507276508, "percentage": 90.07, "elapsed_time": "0:44:38", "remaining_time": "0:04:55", "throughput": 1232.18, "total_tokens": 3299952}
3485
+ {"current_steps": 17335, "total_steps": 19240, "loss": 0.09, "lr": 1.4798865153637097e-06, "epoch": 9.00987525987526, "percentage": 90.1, "elapsed_time": "0:44:38", "remaining_time": "0:04:54", "throughput": 1232.19, "total_tokens": 3300880}
3486
+ {"current_steps": 17340, "total_steps": 19240, "loss": 0.1035, "lr": 1.472209361342844e-06, "epoch": 9.012474012474012, "percentage": 90.12, "elapsed_time": "0:44:39", "remaining_time": "0:04:53", "throughput": 1232.19, "total_tokens": 3301776}
3487
+ {"current_steps": 17345, "total_steps": 19240, "loss": 0.0936, "lr": 1.4645515682297911e-06, "epoch": 9.015072765072764, "percentage": 90.15, "elapsed_time": "0:44:40", "remaining_time": "0:04:52", "throughput": 1232.24, "total_tokens": 3302800}
3488
+ {"current_steps": 17350, "total_steps": 19240, "loss": 0.1279, "lr": 1.456913142326108e-06, "epoch": 9.017671517671518, "percentage": 90.18, "elapsed_time": "0:44:41", "remaining_time": "0:04:52", "throughput": 1232.29, "total_tokens": 3303824}
3489
+ {"current_steps": 17355, "total_steps": 19240, "loss": 0.1139, "lr": 1.4492940899174134e-06, "epoch": 9.02027027027027, "percentage": 90.2, "elapsed_time": "0:44:41", "remaining_time": "0:04:51", "throughput": 1232.29, "total_tokens": 3304720}
3490
+ {"current_steps": 17360, "total_steps": 19240, "loss": 0.1629, "lr": 1.441694417273401e-06, "epoch": 9.022869022869022, "percentage": 90.23, "elapsed_time": "0:44:42", "remaining_time": "0:04:50", "throughput": 1232.33, "total_tokens": 3305712}
3491
+ {"current_steps": 17365, "total_steps": 19240, "loss": 0.1714, "lr": 1.4341141306477957e-06, "epoch": 9.025467775467776, "percentage": 90.25, "elapsed_time": "0:44:43", "remaining_time": "0:04:49", "throughput": 1232.35, "total_tokens": 3306672}
3492
+ {"current_steps": 17370, "total_steps": 19240, "loss": 0.0624, "lr": 1.4265532362783884e-06, "epoch": 9.028066528066528, "percentage": 90.28, "elapsed_time": "0:44:43", "remaining_time": "0:04:48", "throughput": 1232.37, "total_tokens": 3307600}
3493
+ {"current_steps": 17375, "total_steps": 19240, "loss": 0.0456, "lr": 1.4190117403869968e-06, "epoch": 9.03066528066528, "percentage": 90.31, "elapsed_time": "0:44:44", "remaining_time": "0:04:48", "throughput": 1232.4, "total_tokens": 3308592}
3494
+ {"current_steps": 17380, "total_steps": 19240, "loss": 0.075, "lr": 1.4114896491794816e-06, "epoch": 9.033264033264032, "percentage": 90.33, "elapsed_time": "0:44:45", "remaining_time": "0:04:47", "throughput": 1232.43, "total_tokens": 3309552}
3495
+ {"current_steps": 17385, "total_steps": 19240, "loss": 0.0879, "lr": 1.4039869688457414e-06, "epoch": 9.035862785862786, "percentage": 90.36, "elapsed_time": "0:44:46", "remaining_time": "0:04:46", "throughput": 1232.48, "total_tokens": 3310576}
3496
+ {"current_steps": 17390, "total_steps": 19240, "loss": 0.0524, "lr": 1.396503705559693e-06, "epoch": 9.038461538461538, "percentage": 90.38, "elapsed_time": "0:44:46", "remaining_time": "0:04:45", "throughput": 1232.53, "total_tokens": 3311600}
3497
+ {"current_steps": 17395, "total_steps": 19240, "loss": 0.1394, "lr": 1.3890398654792803e-06, "epoch": 9.04106029106029, "percentage": 90.41, "elapsed_time": "0:44:47", "remaining_time": "0:04:45", "throughput": 1232.52, "total_tokens": 3312496}
3498
+ {"current_steps": 17400, "total_steps": 19240, "loss": 0.113, "lr": 1.3815954547464565e-06, "epoch": 9.043659043659044, "percentage": 90.44, "elapsed_time": "0:44:48", "remaining_time": "0:04:44", "throughput": 1232.55, "total_tokens": 3313456}
3499
+ {"current_steps": 17405, "total_steps": 19240, "loss": 0.3064, "lr": 1.3741704794872024e-06, "epoch": 9.046257796257796, "percentage": 90.46, "elapsed_time": "0:44:49", "remaining_time": "0:04:43", "throughput": 1232.57, "total_tokens": 3314416}
3500
+ {"current_steps": 17410, "total_steps": 19240, "loss": 0.1607, "lr": 1.3667649458114857e-06, "epoch": 9.048856548856548, "percentage": 90.49, "elapsed_time": "0:44:49", "remaining_time": "0:04:42", "throughput": 1232.6, "total_tokens": 3315376}
3501
+ {"current_steps": 17415, "total_steps": 19240, "loss": 0.1454, "lr": 1.3593788598132928e-06, "epoch": 9.051455301455302, "percentage": 90.51, "elapsed_time": "0:44:50", "remaining_time": "0:04:41", "throughput": 1232.6, "total_tokens": 3316272}
3502
+ {"current_steps": 17420, "total_steps": 19240, "loss": 0.1297, "lr": 1.3520122275705871e-06, "epoch": 9.054054054054054, "percentage": 90.54, "elapsed_time": "0:44:51", "remaining_time": "0:04:41", "throughput": 1232.61, "total_tokens": 3317200}
3503
+ {"current_steps": 17425, "total_steps": 19240, "loss": 0.1354, "lr": 1.344665055145347e-06, "epoch": 9.056652806652806, "percentage": 90.57, "elapsed_time": "0:44:51", "remaining_time": "0:04:40", "throughput": 1232.66, "total_tokens": 3318224}
3504
+ {"current_steps": 17430, "total_steps": 19240, "loss": 0.0501, "lr": 1.3373373485835227e-06, "epoch": 9.059251559251559, "percentage": 90.59, "elapsed_time": "0:44:52", "remaining_time": "0:04:39", "throughput": 1232.67, "total_tokens": 3319152}
3505
+ {"current_steps": 17435, "total_steps": 19240, "loss": 0.0459, "lr": 1.3300291139150461e-06, "epoch": 9.061850311850312, "percentage": 90.62, "elapsed_time": "0:44:53", "remaining_time": "0:04:38", "throughput": 1232.7, "total_tokens": 3320112}
3506
+ {"current_steps": 17440, "total_steps": 19240, "loss": 0.136, "lr": 1.3227403571538398e-06, "epoch": 9.064449064449065, "percentage": 90.64, "elapsed_time": "0:44:54", "remaining_time": "0:04:38", "throughput": 1232.73, "total_tokens": 3321104}
3507
+ {"current_steps": 17445, "total_steps": 19240, "loss": 0.1384, "lr": 1.3154710842977703e-06, "epoch": 9.067047817047817, "percentage": 90.67, "elapsed_time": "0:44:54", "remaining_time": "0:04:37", "throughput": 1232.73, "total_tokens": 3322000}
3508
+ {"current_steps": 17450, "total_steps": 19240, "loss": 0.2099, "lr": 1.3082213013286993e-06, "epoch": 9.06964656964657, "percentage": 90.7, "elapsed_time": "0:44:55", "remaining_time": "0:04:36", "throughput": 1232.75, "total_tokens": 3322928}
3509
+ {"current_steps": 17455, "total_steps": 19240, "loss": 0.1171, "lr": 1.3009910142124354e-06, "epoch": 9.072245322245323, "percentage": 90.72, "elapsed_time": "0:44:56", "remaining_time": "0:04:35", "throughput": 1232.75, "total_tokens": 3323824}
3510
+ {"current_steps": 17460, "total_steps": 19240, "loss": 0.1027, "lr": 1.2937802288987499e-06, "epoch": 9.074844074844075, "percentage": 90.75, "elapsed_time": "0:44:56", "remaining_time": "0:04:34", "throughput": 1232.76, "total_tokens": 3324752}
3511
+ {"current_steps": 17465, "total_steps": 19240, "loss": 0.0893, "lr": 1.286588951321363e-06, "epoch": 9.077442827442827, "percentage": 90.77, "elapsed_time": "0:44:57", "remaining_time": "0:04:34", "throughput": 1232.75, "total_tokens": 3325616}
3512
+ {"current_steps": 17470, "total_steps": 19240, "loss": 0.1067, "lr": 1.2794171873979439e-06, "epoch": 9.08004158004158, "percentage": 90.8, "elapsed_time": "0:44:58", "remaining_time": "0:04:33", "throughput": 1232.79, "total_tokens": 3326608}
3513
+ {"current_steps": 17475, "total_steps": 19240, "loss": 0.0916, "lr": 1.272264943030102e-06, "epoch": 9.082640332640333, "percentage": 90.83, "elapsed_time": "0:44:59", "remaining_time": "0:04:32", "throughput": 1232.79, "total_tokens": 3327504}
3514
+ {"current_steps": 17480, "total_steps": 19240, "loss": 0.1475, "lr": 1.2651322241033825e-06, "epoch": 9.085239085239085, "percentage": 90.85, "elapsed_time": "0:44:59", "remaining_time": "0:04:31", "throughput": 1232.82, "total_tokens": 3328464}
3515
+ {"current_steps": 17485, "total_steps": 19240, "loss": 0.1135, "lr": 1.2580190364872706e-06, "epoch": 9.087837837837839, "percentage": 90.88, "elapsed_time": "0:45:00", "remaining_time": "0:04:31", "throughput": 1232.81, "total_tokens": 3329328}
3516
+ {"current_steps": 17490, "total_steps": 19240, "loss": 0.155, "lr": 1.2509253860351732e-06, "epoch": 9.09043659043659, "percentage": 90.9, "elapsed_time": "0:45:01", "remaining_time": "0:04:30", "throughput": 1232.82, "total_tokens": 3330256}
3517
+ {"current_steps": 17495, "total_steps": 19240, "loss": 0.1372, "lr": 1.2438512785844237e-06, "epoch": 9.093035343035343, "percentage": 90.93, "elapsed_time": "0:45:02", "remaining_time": "0:04:29", "throughput": 1232.82, "total_tokens": 3331152}
3518
+ {"current_steps": 17500, "total_steps": 19240, "loss": 0.1241, "lr": 1.236796719956268e-06, "epoch": 9.095634095634095, "percentage": 90.96, "elapsed_time": "0:45:02", "remaining_time": "0:04:28", "throughput": 1232.84, "total_tokens": 3332080}
3519
+ {"current_steps": 17505, "total_steps": 19240, "loss": 0.2225, "lr": 1.229761715955874e-06, "epoch": 9.098232848232849, "percentage": 90.98, "elapsed_time": "0:45:03", "remaining_time": "0:04:27", "throughput": 1232.86, "total_tokens": 3333040}
3520
+ {"current_steps": 17510, "total_steps": 19240, "loss": 0.2676, "lr": 1.2227462723723077e-06, "epoch": 9.1008316008316, "percentage": 91.01, "elapsed_time": "0:45:04", "remaining_time": "0:04:27", "throughput": 1232.91, "total_tokens": 3334064}
3521
+ {"current_steps": 17515, "total_steps": 19240, "loss": 0.1238, "lr": 1.2157503949785487e-06, "epoch": 9.103430353430353, "percentage": 91.03, "elapsed_time": "0:45:04", "remaining_time": "0:04:26", "throughput": 1232.94, "total_tokens": 3335024}
3522
+ {"current_steps": 17520, "total_steps": 19240, "loss": 0.0856, "lr": 1.2087740895314697e-06, "epoch": 9.106029106029107, "percentage": 91.06, "elapsed_time": "0:45:05", "remaining_time": "0:04:25", "throughput": 1232.94, "total_tokens": 3335920}
3523
+ {"current_steps": 17525, "total_steps": 19240, "loss": 0.092, "lr": 1.201817361771837e-06, "epoch": 9.108627858627859, "percentage": 91.09, "elapsed_time": "0:45:06", "remaining_time": "0:04:24", "throughput": 1232.98, "total_tokens": 3336912}
3524
+ {"current_steps": 17530, "total_steps": 19240, "loss": 0.1456, "lr": 1.1948802174243158e-06, "epoch": 9.111226611226611, "percentage": 91.11, "elapsed_time": "0:45:07", "remaining_time": "0:04:24", "throughput": 1233.01, "total_tokens": 3337904}
3525
+ {"current_steps": 17535, "total_steps": 19240, "loss": 0.1006, "lr": 1.187962662197442e-06, "epoch": 9.113825363825363, "percentage": 91.14, "elapsed_time": "0:45:07", "remaining_time": "0:04:23", "throughput": 1233.04, "total_tokens": 3338864}
3526
+ {"current_steps": 17540, "total_steps": 19240, "loss": 0.1811, "lr": 1.181064701783649e-06, "epoch": 9.116424116424117, "percentage": 91.16, "elapsed_time": "0:45:08", "remaining_time": "0:04:22", "throughput": 1233.1, "total_tokens": 3339920}
3527
+ {"current_steps": 17545, "total_steps": 19240, "loss": 0.1869, "lr": 1.174186341859221e-06, "epoch": 9.119022869022869, "percentage": 91.19, "elapsed_time": "0:45:09", "remaining_time": "0:04:21", "throughput": 1233.13, "total_tokens": 3340912}
3528
+ {"current_steps": 17550, "total_steps": 19240, "loss": 0.1719, "lr": 1.1673275880843382e-06, "epoch": 9.121621621621621, "percentage": 91.22, "elapsed_time": "0:45:10", "remaining_time": "0:04:20", "throughput": 1233.18, "total_tokens": 3341936}
3529
+ {"current_steps": 17555, "total_steps": 19240, "loss": 0.2034, "lr": 1.1604884461030392e-06, "epoch": 9.124220374220375, "percentage": 91.24, "elapsed_time": "0:45:10", "remaining_time": "0:04:20", "throughput": 1233.21, "total_tokens": 3342928}
3530
+ {"current_steps": 17560, "total_steps": 19240, "loss": 0.1423, "lr": 1.1536689215432106e-06, "epoch": 9.126819126819127, "percentage": 91.27, "elapsed_time": "0:45:11", "remaining_time": "0:04:19", "throughput": 1233.19, "total_tokens": 3343760}
3531
+ {"current_steps": 17565, "total_steps": 19240, "loss": 0.1827, "lr": 1.1468690200166193e-06, "epoch": 9.129417879417879, "percentage": 91.29, "elapsed_time": "0:45:12", "remaining_time": "0:04:18", "throughput": 1233.23, "total_tokens": 3344784}
3532
+ {"current_steps": 17570, "total_steps": 19240, "loss": 0.1448, "lr": 1.1400887471188614e-06, "epoch": 9.132016632016631, "percentage": 91.32, "elapsed_time": "0:45:12", "remaining_time": "0:04:17", "throughput": 1233.24, "total_tokens": 3345712}
3533
+ {"current_steps": 17575, "total_steps": 19240, "loss": 0.1287, "lr": 1.1333281084294045e-06, "epoch": 9.134615384615385, "percentage": 91.35, "elapsed_time": "0:45:13", "remaining_time": "0:04:17", "throughput": 1233.27, "total_tokens": 3346672}
3534
+ {"current_steps": 17580, "total_steps": 19240, "loss": 0.1037, "lr": 1.1265871095115315e-06, "epoch": 9.137214137214137, "percentage": 91.37, "elapsed_time": "0:45:14", "remaining_time": "0:04:16", "throughput": 1233.3, "total_tokens": 3347664}
3535
+ {"current_steps": 17585, "total_steps": 19240, "loss": 0.1624, "lr": 1.1198657559123888e-06, "epoch": 9.13981288981289, "percentage": 91.4, "elapsed_time": "0:45:15", "remaining_time": "0:04:15", "throughput": 1233.33, "total_tokens": 3348624}
3536
+ {"current_steps": 17590, "total_steps": 19240, "loss": 0.1071, "lr": 1.1131640531629377e-06, "epoch": 9.142411642411643, "percentage": 91.42, "elapsed_time": "0:45:15", "remaining_time": "0:04:14", "throughput": 1233.32, "total_tokens": 3349520}
3537
+ {"current_steps": 17595, "total_steps": 19240, "loss": 0.1229, "lr": 1.1064820067779897e-06, "epoch": 9.145010395010395, "percentage": 91.45, "elapsed_time": "0:45:16", "remaining_time": "0:04:13", "throughput": 1233.35, "total_tokens": 3350480}
3538
+ {"current_steps": 17600, "total_steps": 19240, "loss": 0.1358, "lr": 1.0998196222561568e-06, "epoch": 9.147609147609147, "percentage": 91.48, "elapsed_time": "0:45:17", "remaining_time": "0:04:13", "throughput": 1233.35, "total_tokens": 3351376}
3539
+ {"current_steps": 17605, "total_steps": 19240, "loss": 0.0697, "lr": 1.093176905079893e-06, "epoch": 9.1502079002079, "percentage": 91.5, "elapsed_time": "0:45:18", "remaining_time": "0:04:12", "throughput": 1233.32, "total_tokens": 3352208}
3540
+ {"current_steps": 17610, "total_steps": 19240, "loss": 0.1346, "lr": 1.0865538607154557e-06, "epoch": 9.152806652806653, "percentage": 91.53, "elapsed_time": "0:45:18", "remaining_time": "0:04:11", "throughput": 1233.36, "total_tokens": 3353200}
3541
+ {"current_steps": 17615, "total_steps": 19240, "loss": 0.1145, "lr": 1.0799504946129135e-06, "epoch": 9.155405405405405, "percentage": 91.55, "elapsed_time": "0:45:19", "remaining_time": "0:04:10", "throughput": 1233.4, "total_tokens": 3354192}
3542
+ {"current_steps": 17620, "total_steps": 19240, "loss": 0.0518, "lr": 1.0733668122061503e-06, "epoch": 9.158004158004157, "percentage": 91.58, "elapsed_time": "0:45:20", "remaining_time": "0:04:10", "throughput": 1233.41, "total_tokens": 3355120}
3543
+ {"current_steps": 17625, "total_steps": 19240, "loss": 0.1163, "lr": 1.0668028189128431e-06, "epoch": 9.160602910602911, "percentage": 91.61, "elapsed_time": "0:45:20", "remaining_time": "0:04:09", "throughput": 1233.43, "total_tokens": 3356080}
3544
+ {"current_steps": 17630, "total_steps": 19240, "loss": 0.2295, "lr": 1.0602585201344772e-06, "epoch": 9.163201663201663, "percentage": 91.63, "elapsed_time": "0:45:21", "remaining_time": "0:04:08", "throughput": 1233.44, "total_tokens": 3357008}
3545
+ {"current_steps": 17635, "total_steps": 19240, "loss": 0.0837, "lr": 1.053733921256317e-06, "epoch": 9.165800415800415, "percentage": 91.66, "elapsed_time": "0:45:22", "remaining_time": "0:04:07", "throughput": 1233.49, "total_tokens": 3358032}
3546
+ {"current_steps": 17640, "total_steps": 19240, "loss": 0.1395, "lr": 1.0472290276474312e-06, "epoch": 9.16839916839917, "percentage": 91.68, "elapsed_time": "0:45:23", "remaining_time": "0:04:06", "throughput": 1233.53, "total_tokens": 3359024}
3547
+ {"current_steps": 17645, "total_steps": 19240, "loss": 0.1138, "lr": 1.0407438446606633e-06, "epoch": 9.170997920997921, "percentage": 91.71, "elapsed_time": "0:45:23", "remaining_time": "0:04:06", "throughput": 1233.55, "total_tokens": 3359984}
3548
+ {"current_steps": 17650, "total_steps": 19240, "loss": 0.2555, "lr": 1.034278377632636e-06, "epoch": 9.173596673596673, "percentage": 91.74, "elapsed_time": "0:45:24", "remaining_time": "0:04:05", "throughput": 1233.59, "total_tokens": 3361008}
3549
+ {"current_steps": 17655, "total_steps": 19240, "loss": 0.0563, "lr": 1.0278326318837571e-06, "epoch": 9.176195426195425, "percentage": 91.76, "elapsed_time": "0:45:25", "remaining_time": "0:04:04", "throughput": 1233.6, "total_tokens": 3361936}
3550
+ {"current_steps": 17660, "total_steps": 19240, "loss": 0.1211, "lr": 1.0214066127181953e-06, "epoch": 9.17879417879418, "percentage": 91.79, "elapsed_time": "0:45:26", "remaining_time": "0:04:03", "throughput": 1233.63, "total_tokens": 3362896}
3551
+ {"current_steps": 17665, "total_steps": 19240, "loss": 0.1917, "lr": 1.0150003254238983e-06, "epoch": 9.181392931392931, "percentage": 91.81, "elapsed_time": "0:45:26", "remaining_time": "0:04:03", "throughput": 1233.62, "total_tokens": 3363792}
3552
+ {"current_steps": 17670, "total_steps": 19240, "loss": 0.0987, "lr": 1.0086137752725655e-06, "epoch": 9.183991683991684, "percentage": 91.84, "elapsed_time": "0:45:27", "remaining_time": "0:04:02", "throughput": 1233.65, "total_tokens": 3364752}
3553
+ {"current_steps": 17675, "total_steps": 19240, "loss": 0.1702, "lr": 1.0022469675196572e-06, "epoch": 9.186590436590437, "percentage": 91.87, "elapsed_time": "0:45:28", "remaining_time": "0:04:01", "throughput": 1233.66, "total_tokens": 3365680}
3554
+ {"current_steps": 17680, "total_steps": 19240, "loss": 0.1476, "lr": 9.958999074043935e-07, "epoch": 9.18918918918919, "percentage": 91.89, "elapsed_time": "0:45:28", "remaining_time": "0:04:00", "throughput": 1233.68, "total_tokens": 3366640}
3555
+ {"current_steps": 17685, "total_steps": 19240, "loss": 0.1991, "lr": 9.895726001497352e-07, "epoch": 9.191787941787942, "percentage": 91.92, "elapsed_time": "0:45:29", "remaining_time": "0:04:00", "throughput": 1233.73, "total_tokens": 3367664}
3556
+ {"current_steps": 17690, "total_steps": 19240, "loss": 0.1142, "lr": 9.83265050962398e-07, "epoch": 9.194386694386694, "percentage": 91.94, "elapsed_time": "0:45:30", "remaining_time": "0:03:59", "throughput": 1233.73, "total_tokens": 3368560}
3557
+ {"current_steps": 17695, "total_steps": 19240, "loss": 0.1297, "lr": 9.769772650328328e-07, "epoch": 9.196985446985448, "percentage": 91.97, "elapsed_time": "0:45:31", "remaining_time": "0:03:58", "throughput": 1233.74, "total_tokens": 3369488}
3558
+ {"current_steps": 17700, "total_steps": 19240, "loss": 0.1257, "lr": 9.707092475352285e-07, "epoch": 9.1995841995842, "percentage": 92.0, "elapsed_time": "0:45:31", "remaining_time": "0:03:57", "throughput": 1233.74, "total_tokens": 3370416}
3559
+ {"current_steps": 17705, "total_steps": 19240, "loss": 0.1332, "lr": 9.644610036275093e-07, "epoch": 9.202182952182952, "percentage": 92.02, "elapsed_time": "0:45:32", "remaining_time": "0:03:56", "throughput": 1233.76, "total_tokens": 3371344}
3560
+ {"current_steps": 17710, "total_steps": 19240, "loss": 0.1221, "lr": 9.58232538451323e-07, "epoch": 9.204781704781706, "percentage": 92.05, "elapsed_time": "0:45:33", "remaining_time": "0:03:56", "throughput": 1233.78, "total_tokens": 3372304}
3561
+ {"current_steps": 17715, "total_steps": 19240, "loss": 0.1727, "lr": 9.520238571320423e-07, "epoch": 9.207380457380458, "percentage": 92.07, "elapsed_time": "0:45:34", "remaining_time": "0:03:55", "throughput": 1233.81, "total_tokens": 3373264}
3562
+ {"current_steps": 17720, "total_steps": 19240, "loss": 0.0528, "lr": 9.458349647787662e-07, "epoch": 9.20997920997921, "percentage": 92.1, "elapsed_time": "0:45:34", "remaining_time": "0:03:54", "throughput": 1233.81, "total_tokens": 3374160}
3563
+ {"current_steps": 17725, "total_steps": 19240, "loss": 0.1245, "lr": 9.396658664843017e-07, "epoch": 9.212577962577962, "percentage": 92.13, "elapsed_time": "0:45:35", "remaining_time": "0:03:53", "throughput": 1233.81, "total_tokens": 3375056}
3564
+ {"current_steps": 17730, "total_steps": 19240, "loss": 0.1411, "lr": 9.335165673251739e-07, "epoch": 9.215176715176716, "percentage": 92.15, "elapsed_time": "0:45:36", "remaining_time": "0:03:53", "throughput": 1233.84, "total_tokens": 3376016}
3565
+ {"current_steps": 17735, "total_steps": 19240, "loss": 0.1198, "lr": 9.273870723616129e-07, "epoch": 9.217775467775468, "percentage": 92.18, "elapsed_time": "0:45:36", "remaining_time": "0:03:52", "throughput": 1233.86, "total_tokens": 3376976}
3566
+ {"current_steps": 17740, "total_steps": 19240, "loss": 0.2093, "lr": 9.212773866375424e-07, "epoch": 9.22037422037422, "percentage": 92.2, "elapsed_time": "0:45:37", "remaining_time": "0:03:51", "throughput": 1233.89, "total_tokens": 3377936}
3567
+ {"current_steps": 17745, "total_steps": 19240, "loss": 0.1335, "lr": 9.151875151806044e-07, "epoch": 9.222972972972974, "percentage": 92.23, "elapsed_time": "0:45:38", "remaining_time": "0:03:50", "throughput": 1233.94, "total_tokens": 3378960}
3568
+ {"current_steps": 17750, "total_steps": 19240, "loss": 0.1458, "lr": 9.091174630021182e-07, "epoch": 9.225571725571726, "percentage": 92.26, "elapsed_time": "0:45:39", "remaining_time": "0:03:49", "throughput": 1233.98, "total_tokens": 3379952}
3569
+ {"current_steps": 17755, "total_steps": 19240, "loss": 0.1217, "lr": 9.030672350971076e-07, "epoch": 9.228170478170478, "percentage": 92.28, "elapsed_time": "0:45:39", "remaining_time": "0:03:49", "throughput": 1234.0, "total_tokens": 3380912}
3570
+ {"current_steps": 17760, "total_steps": 19240, "loss": 0.159, "lr": 8.970368364442705e-07, "epoch": 9.23076923076923, "percentage": 92.31, "elapsed_time": "0:45:40", "remaining_time": "0:03:48", "throughput": 1234.01, "total_tokens": 3381808}
3571
+ {"current_steps": 17765, "total_steps": 19240, "loss": 0.2691, "lr": 8.910262720059959e-07, "epoch": 9.233367983367984, "percentage": 92.33, "elapsed_time": "0:45:41", "remaining_time": "0:03:47", "throughput": 1234.05, "total_tokens": 3382800}
3572
+ {"current_steps": 17770, "total_steps": 19240, "loss": 0.1106, "lr": 8.850355467283494e-07, "epoch": 9.235966735966736, "percentage": 92.36, "elapsed_time": "0:45:41", "remaining_time": "0:03:46", "throughput": 1234.07, "total_tokens": 3383760}
3573
+ {"current_steps": 17775, "total_steps": 19240, "loss": 0.239, "lr": 8.790646655410684e-07, "epoch": 9.238565488565488, "percentage": 92.39, "elapsed_time": "0:45:42", "remaining_time": "0:03:46", "throughput": 1234.11, "total_tokens": 3384784}
3574
+ {"current_steps": 17780, "total_steps": 19240, "loss": 0.0808, "lr": 8.731136333575668e-07, "epoch": 9.241164241164242, "percentage": 92.41, "elapsed_time": "0:45:43", "remaining_time": "0:03:45", "throughput": 1234.16, "total_tokens": 3385808}
3575
+ {"current_steps": 17785, "total_steps": 19240, "loss": 0.2068, "lr": 8.671824550749164e-07, "epoch": 9.243762993762994, "percentage": 92.44, "elapsed_time": "0:45:44", "remaining_time": "0:03:44", "throughput": 1234.18, "total_tokens": 3386736}
3576
+ {"current_steps": 17790, "total_steps": 19240, "loss": 0.1051, "lr": 8.612711355738601e-07, "epoch": 9.246361746361746, "percentage": 92.46, "elapsed_time": "0:45:44", "remaining_time": "0:03:43", "throughput": 1234.21, "total_tokens": 3387728}
3577
+ {"current_steps": 17795, "total_steps": 19240, "loss": 0.1726, "lr": 8.553796797187902e-07, "epoch": 9.248960498960498, "percentage": 92.49, "elapsed_time": "0:45:45", "remaining_time": "0:03:42", "throughput": 1234.24, "total_tokens": 3388688}
3578
+ {"current_steps": 17800, "total_steps": 19240, "loss": 0.1076, "lr": 8.495080923577619e-07, "epoch": 9.251559251559252, "percentage": 92.52, "elapsed_time": "0:45:46", "remaining_time": "0:03:42", "throughput": 1234.27, "total_tokens": 3389680}
3579
+ {"current_steps": 17805, "total_steps": 19240, "loss": 0.1169, "lr": 8.436563783224744e-07, "epoch": 9.254158004158004, "percentage": 92.54, "elapsed_time": "0:45:47", "remaining_time": "0:03:41", "throughput": 1234.29, "total_tokens": 3390608}
3580
+ {"current_steps": 17810, "total_steps": 19240, "loss": 0.1335, "lr": 8.378245424282755e-07, "epoch": 9.256756756756756, "percentage": 92.57, "elapsed_time": "0:45:47", "remaining_time": "0:03:40", "throughput": 1234.31, "total_tokens": 3391568}
3581
+ {"current_steps": 17815, "total_steps": 19240, "loss": 0.1148, "lr": 8.320125894741598e-07, "epoch": 9.25935550935551, "percentage": 92.59, "elapsed_time": "0:45:48", "remaining_time": "0:03:39", "throughput": 1234.3, "total_tokens": 3392432}
3582
+ {"current_steps": 17820, "total_steps": 19240, "loss": 0.2471, "lr": 8.262205242427462e-07, "epoch": 9.261954261954262, "percentage": 92.62, "elapsed_time": "0:45:49", "remaining_time": "0:03:39", "throughput": 1234.34, "total_tokens": 3393424}
3583
+ {"current_steps": 17825, "total_steps": 19240, "loss": 0.1624, "lr": 8.204483515003081e-07, "epoch": 9.264553014553014, "percentage": 92.65, "elapsed_time": "0:45:49", "remaining_time": "0:03:38", "throughput": 1234.38, "total_tokens": 3394416}
3584
+ {"current_steps": 17830, "total_steps": 19240, "loss": 0.2033, "lr": 8.146960759967348e-07, "epoch": 9.267151767151766, "percentage": 92.67, "elapsed_time": "0:45:50", "remaining_time": "0:03:37", "throughput": 1234.39, "total_tokens": 3395344}
3585
+ {"current_steps": 17835, "total_steps": 19240, "loss": 0.0657, "lr": 8.089637024655483e-07, "epoch": 9.26975051975052, "percentage": 92.7, "elapsed_time": "0:45:51", "remaining_time": "0:03:36", "throughput": 1234.41, "total_tokens": 3396272}
3586
+ {"current_steps": 17840, "total_steps": 19240, "loss": 0.0863, "lr": 8.032512356238864e-07, "epoch": 9.272349272349272, "percentage": 92.72, "elapsed_time": "0:45:52", "remaining_time": "0:03:35", "throughput": 1234.44, "total_tokens": 3397264}
3587
+ {"current_steps": 17845, "total_steps": 19240, "loss": 0.1738, "lr": 7.975586801725194e-07, "epoch": 9.274948024948024, "percentage": 92.75, "elapsed_time": "0:45:52", "remaining_time": "0:03:35", "throughput": 1234.45, "total_tokens": 3398192}
3588
+ {"current_steps": 17850, "total_steps": 19240, "loss": 0.1029, "lr": 7.91886040795814e-07, "epoch": 9.277546777546778, "percentage": 92.78, "elapsed_time": "0:45:53", "remaining_time": "0:03:34", "throughput": 1234.47, "total_tokens": 3399120}
3589
+ {"current_steps": 17855, "total_steps": 19240, "loss": 0.1444, "lr": 7.862333221617668e-07, "epoch": 9.28014553014553, "percentage": 92.8, "elapsed_time": "0:45:54", "remaining_time": "0:03:33", "throughput": 1234.5, "total_tokens": 3400080}
3590
+ {"current_steps": 17860, "total_steps": 19240, "loss": 0.1011, "lr": 7.806005289219737e-07, "epoch": 9.282744282744282, "percentage": 92.83, "elapsed_time": "0:45:54", "remaining_time": "0:03:32", "throughput": 1234.53, "total_tokens": 3401072}
3591
+ {"current_steps": 17865, "total_steps": 19240, "loss": 0.0576, "lr": 7.749876657116295e-07, "epoch": 9.285343035343036, "percentage": 92.85, "elapsed_time": "0:45:55", "remaining_time": "0:03:32", "throughput": 1234.55, "total_tokens": 3402000}
3592
+ {"current_steps": 17870, "total_steps": 19240, "loss": 0.1148, "lr": 7.693947371495313e-07, "epoch": 9.287941787941788, "percentage": 92.88, "elapsed_time": "0:45:56", "remaining_time": "0:03:31", "throughput": 1234.6, "total_tokens": 3403024}
3593
+ {"current_steps": 17875, "total_steps": 19240, "loss": 0.1021, "lr": 7.638217478380782e-07, "epoch": 9.29054054054054, "percentage": 92.91, "elapsed_time": "0:45:57", "remaining_time": "0:03:30", "throughput": 1234.61, "total_tokens": 3403952}
3594
+ {"current_steps": 17880, "total_steps": 19240, "loss": 0.1638, "lr": 7.582687023632545e-07, "epoch": 9.293139293139292, "percentage": 92.93, "elapsed_time": "0:45:57", "remaining_time": "0:03:29", "throughput": 1234.65, "total_tokens": 3404944}
3595
+ {"current_steps": 17885, "total_steps": 19240, "loss": 0.1531, "lr": 7.527356052946327e-07, "epoch": 9.295738045738046, "percentage": 92.96, "elapsed_time": "0:45:58", "remaining_time": "0:03:28", "throughput": 1234.69, "total_tokens": 3405936}
3596
+ {"current_steps": 17890, "total_steps": 19240, "loss": 0.2226, "lr": 7.47222461185379e-07, "epoch": 9.298336798336798, "percentage": 92.98, "elapsed_time": "0:45:59", "remaining_time": "0:03:28", "throughput": 1234.7, "total_tokens": 3406864}
3597
+ {"current_steps": 17895, "total_steps": 19240, "loss": 0.0866, "lr": 7.417292745722282e-07, "epoch": 9.30093555093555, "percentage": 93.01, "elapsed_time": "0:45:59", "remaining_time": "0:03:27", "throughput": 1234.74, "total_tokens": 3407856}
3598
+ {"current_steps": 17900, "total_steps": 19240, "loss": 0.0624, "lr": 7.362560499755006e-07, "epoch": 9.303534303534304, "percentage": 93.04, "elapsed_time": "0:46:00", "remaining_time": "0:03:26", "throughput": 1234.77, "total_tokens": 3408848}
3599
+ {"current_steps": 17905, "total_steps": 19240, "loss": 0.0868, "lr": 7.30802791899085e-07, "epoch": 9.306133056133056, "percentage": 93.06, "elapsed_time": "0:46:01", "remaining_time": "0:03:25", "throughput": 1234.8, "total_tokens": 3409808}
3600
+ {"current_steps": 17910, "total_steps": 19240, "loss": 0.1577, "lr": 7.253695048304394e-07, "epoch": 9.308731808731808, "percentage": 93.09, "elapsed_time": "0:46:02", "remaining_time": "0:03:25", "throughput": 1234.84, "total_tokens": 3410800}
3601
+ {"current_steps": 17915, "total_steps": 19240, "loss": 0.151, "lr": 7.199561932405952e-07, "epoch": 9.31133056133056, "percentage": 93.11, "elapsed_time": "0:46:02", "remaining_time": "0:03:24", "throughput": 1234.86, "total_tokens": 3411760}
3602
+ {"current_steps": 17920, "total_steps": 19240, "loss": 0.1485, "lr": 7.145628615841365e-07, "epoch": 9.313929313929314, "percentage": 93.14, "elapsed_time": "0:46:03", "remaining_time": "0:03:23", "throughput": 1234.86, "total_tokens": 3412656}
3603
+ {"current_steps": 17925, "total_steps": 19240, "loss": 0.3089, "lr": 7.091895142992133e-07, "epoch": 9.316528066528067, "percentage": 93.17, "elapsed_time": "0:46:04", "remaining_time": "0:03:22", "throughput": 1234.91, "total_tokens": 3413680}
3604
+ {"current_steps": 17930, "total_steps": 19240, "loss": 0.0927, "lr": 7.038361558075273e-07, "epoch": 9.319126819126819, "percentage": 93.19, "elapsed_time": "0:46:05", "remaining_time": "0:03:22", "throughput": 1234.97, "total_tokens": 3414736}
3605
+ {"current_steps": 17935, "total_steps": 19240, "loss": 0.2016, "lr": 6.985027905143299e-07, "epoch": 9.321725571725572, "percentage": 93.22, "elapsed_time": "0:46:05", "remaining_time": "0:03:21", "throughput": 1234.99, "total_tokens": 3415696}
3606
+ {"current_steps": 17940, "total_steps": 19240, "loss": 0.2364, "lr": 6.931894228084268e-07, "epoch": 9.324324324324325, "percentage": 93.24, "elapsed_time": "0:46:06", "remaining_time": "0:03:20", "throughput": 1235.04, "total_tokens": 3416720}
3607
+ {"current_steps": 17945, "total_steps": 19240, "loss": 0.117, "lr": 6.878960570621568e-07, "epoch": 9.326923076923077, "percentage": 93.27, "elapsed_time": "0:46:07", "remaining_time": "0:03:19", "throughput": 1235.07, "total_tokens": 3417680}
3608
+ {"current_steps": 17950, "total_steps": 19240, "loss": 0.1642, "lr": 6.826226976314104e-07, "epoch": 9.329521829521829, "percentage": 93.3, "elapsed_time": "0:46:07", "remaining_time": "0:03:18", "throughput": 1235.09, "total_tokens": 3418640}
3609
+ {"current_steps": 17955, "total_steps": 19240, "loss": 0.0564, "lr": 6.773693488556083e-07, "epoch": 9.332120582120583, "percentage": 93.32, "elapsed_time": "0:46:08", "remaining_time": "0:03:18", "throughput": 1235.08, "total_tokens": 3419504}
3610
+ {"current_steps": 17960, "total_steps": 19240, "loss": 0.1018, "lr": 6.721360150577089e-07, "epoch": 9.334719334719335, "percentage": 93.35, "elapsed_time": "0:46:09", "remaining_time": "0:03:17", "throughput": 1235.11, "total_tokens": 3420464}
3611
+ {"current_steps": 17965, "total_steps": 19240, "loss": 0.2193, "lr": 6.669227005441953e-07, "epoch": 9.337318087318087, "percentage": 93.37, "elapsed_time": "0:46:10", "remaining_time": "0:03:16", "throughput": 1235.13, "total_tokens": 3421424}
3612
+ {"current_steps": 17970, "total_steps": 19240, "loss": 0.1283, "lr": 6.617294096050802e-07, "epoch": 9.33991683991684, "percentage": 93.4, "elapsed_time": "0:46:10", "remaining_time": "0:03:15", "throughput": 1235.16, "total_tokens": 3422384}
3613
+ {"current_steps": 17975, "total_steps": 19240, "loss": 0.2457, "lr": 6.565561465138953e-07, "epoch": 9.342515592515593, "percentage": 93.43, "elapsed_time": "0:46:11", "remaining_time": "0:03:15", "throughput": 1235.2, "total_tokens": 3423376}
3614
+ {"current_steps": 17980, "total_steps": 19240, "loss": 0.2398, "lr": 6.514029155276962e-07, "epoch": 9.345114345114345, "percentage": 93.45, "elapsed_time": "0:46:12", "remaining_time": "0:03:14", "throughput": 1235.21, "total_tokens": 3424304}
3615
+ {"current_steps": 17985, "total_steps": 19240, "loss": 0.1667, "lr": 6.46269720887055e-07, "epoch": 9.347713097713097, "percentage": 93.48, "elapsed_time": "0:46:12", "remaining_time": "0:03:13", "throughput": 1235.22, "total_tokens": 3425232}
3616
+ {"current_steps": 17990, "total_steps": 19240, "loss": 0.1807, "lr": 6.411565668160507e-07, "epoch": 9.35031185031185, "percentage": 93.5, "elapsed_time": "0:46:13", "remaining_time": "0:03:12", "throughput": 1235.24, "total_tokens": 3426160}
3617
+ {"current_steps": 17995, "total_steps": 19240, "loss": 0.1407, "lr": 6.360634575222762e-07, "epoch": 9.352910602910603, "percentage": 93.53, "elapsed_time": "0:46:14", "remaining_time": "0:03:11", "throughput": 1235.32, "total_tokens": 3427280}
3618
+ {"current_steps": 18000, "total_steps": 19240, "loss": 0.0788, "lr": 6.309903971968262e-07, "epoch": 9.355509355509355, "percentage": 93.56, "elapsed_time": "0:46:15", "remaining_time": "0:03:11", "throughput": 1235.32, "total_tokens": 3428176}
3619
+ {"current_steps": 18005, "total_steps": 19240, "loss": 0.0946, "lr": 6.259373900142945e-07, "epoch": 9.358108108108109, "percentage": 93.58, "elapsed_time": "0:46:15", "remaining_time": "0:03:10", "throughput": 1235.36, "total_tokens": 3429168}
3620
+ {"current_steps": 18010, "total_steps": 19240, "loss": 0.2485, "lr": 6.209044401327801e-07, "epoch": 9.36070686070686, "percentage": 93.61, "elapsed_time": "0:46:16", "remaining_time": "0:03:09", "throughput": 1235.4, "total_tokens": 3430160}
3621
+ {"current_steps": 18015, "total_steps": 19240, "loss": 0.1955, "lr": 6.158915516938729e-07, "epoch": 9.363305613305613, "percentage": 93.63, "elapsed_time": "0:46:17", "remaining_time": "0:03:08", "throughput": 1235.42, "total_tokens": 3431120}
3622
+ {"current_steps": 18020, "total_steps": 19240, "loss": 0.0935, "lr": 6.108987288226536e-07, "epoch": 9.365904365904367, "percentage": 93.66, "elapsed_time": "0:46:18", "remaining_time": "0:03:08", "throughput": 1235.42, "total_tokens": 3432016}
3623
+ {"current_steps": 18025, "total_steps": 19240, "loss": 0.1785, "lr": 6.059259756276969e-07, "epoch": 9.368503118503119, "percentage": 93.69, "elapsed_time": "0:46:18", "remaining_time": "0:03:07", "throughput": 1235.44, "total_tokens": 3432944}
3624
+ {"current_steps": 18030, "total_steps": 19240, "loss": 0.1586, "lr": 6.009732962010544e-07, "epoch": 9.371101871101871, "percentage": 93.71, "elapsed_time": "0:46:19", "remaining_time": "0:03:06", "throughput": 1235.46, "total_tokens": 3433904}
3625
+ {"current_steps": 18035, "total_steps": 19240, "loss": 0.2454, "lr": 5.960406946182634e-07, "epoch": 9.373700623700623, "percentage": 93.74, "elapsed_time": "0:46:20", "remaining_time": "0:03:05", "throughput": 1235.49, "total_tokens": 3434864}
3626
+ {"current_steps": 18040, "total_steps": 19240, "loss": 0.2356, "lr": 5.91128174938338e-07, "epoch": 9.376299376299377, "percentage": 93.76, "elapsed_time": "0:46:20", "remaining_time": "0:03:04", "throughput": 1235.49, "total_tokens": 3435760}
3627
+ {"current_steps": 18045, "total_steps": 19240, "loss": 0.1318, "lr": 5.862357412037666e-07, "epoch": 9.378898128898129, "percentage": 93.79, "elapsed_time": "0:46:21", "remaining_time": "0:03:04", "throughput": 1235.49, "total_tokens": 3436656}
3628
+ {"current_steps": 18050, "total_steps": 19240, "loss": 0.0882, "lr": 5.813633974405153e-07, "epoch": 9.381496881496881, "percentage": 93.81, "elapsed_time": "0:46:22", "remaining_time": "0:03:03", "throughput": 1235.52, "total_tokens": 3437616}
3629
+ {"current_steps": 18055, "total_steps": 19240, "loss": 0.1346, "lr": 5.765111476580043e-07, "epoch": 9.384095634095633, "percentage": 93.84, "elapsed_time": "0:46:23", "remaining_time": "0:03:02", "throughput": 1235.52, "total_tokens": 3438512}
3630
+ {"current_steps": 18060, "total_steps": 19240, "loss": 0.2359, "lr": 5.716789958491342e-07, "epoch": 9.386694386694387, "percentage": 93.87, "elapsed_time": "0:46:23", "remaining_time": "0:03:01", "throughput": 1235.55, "total_tokens": 3439472}
3631
+ {"current_steps": 18065, "total_steps": 19240, "loss": 0.2588, "lr": 5.668669459902576e-07, "epoch": 9.38929313929314, "percentage": 93.89, "elapsed_time": "0:46:24", "remaining_time": "0:03:01", "throughput": 1235.56, "total_tokens": 3440400}
3632
+ {"current_steps": 18070, "total_steps": 19240, "loss": 0.1449, "lr": 5.620750020411847e-07, "epoch": 9.391891891891891, "percentage": 93.92, "elapsed_time": "0:46:25", "remaining_time": "0:03:00", "throughput": 1235.57, "total_tokens": 3441328}
3633
+ {"current_steps": 18075, "total_steps": 19240, "loss": 0.1339, "lr": 5.573031679451863e-07, "epoch": 9.394490644490645, "percentage": 93.94, "elapsed_time": "0:46:25", "remaining_time": "0:02:59", "throughput": 1235.59, "total_tokens": 3442256}
3634
+ {"current_steps": 18080, "total_steps": 19240, "loss": 0.2348, "lr": 5.525514476289823e-07, "epoch": 9.397089397089397, "percentage": 93.97, "elapsed_time": "0:46:26", "remaining_time": "0:02:58", "throughput": 1235.61, "total_tokens": 3443216}
3635
+ {"current_steps": 18085, "total_steps": 19240, "loss": 0.1133, "lr": 5.478198450027422e-07, "epoch": 9.39968814968815, "percentage": 94.0, "elapsed_time": "0:46:27", "remaining_time": "0:02:58", "throughput": 1235.64, "total_tokens": 3444176}
3636
+ {"current_steps": 18090, "total_steps": 19240, "loss": 0.1224, "lr": 5.431083639600737e-07, "epoch": 9.402286902286903, "percentage": 94.02, "elapsed_time": "0:46:28", "remaining_time": "0:02:57", "throughput": 1235.63, "total_tokens": 3445040}
3637
+ {"current_steps": 18095, "total_steps": 19240, "loss": 0.1188, "lr": 5.384170083780421e-07, "epoch": 9.404885654885655, "percentage": 94.05, "elapsed_time": "0:46:28", "remaining_time": "0:02:56", "throughput": 1235.65, "total_tokens": 3446000}
3638
+ {"current_steps": 18100, "total_steps": 19240, "loss": 0.1557, "lr": 5.337457821171316e-07, "epoch": 9.407484407484407, "percentage": 94.07, "elapsed_time": "0:46:29", "remaining_time": "0:02:55", "throughput": 1235.67, "total_tokens": 3446928}
3639
+ {"current_steps": 18105, "total_steps": 19240, "loss": 0.2421, "lr": 5.290946890212756e-07, "epoch": 9.41008316008316, "percentage": 94.1, "elapsed_time": "0:46:30", "remaining_time": "0:02:54", "throughput": 1235.67, "total_tokens": 3447824}
3640
+ {"current_steps": 18110, "total_steps": 19240, "loss": 0.1181, "lr": 5.244637329178403e-07, "epoch": 9.412681912681913, "percentage": 94.13, "elapsed_time": "0:46:30", "remaining_time": "0:02:54", "throughput": 1235.7, "total_tokens": 3448816}
3641
+ {"current_steps": 18115, "total_steps": 19240, "loss": 0.2319, "lr": 5.198529176176109e-07, "epoch": 9.415280665280665, "percentage": 94.15, "elapsed_time": "0:46:31", "remaining_time": "0:02:53", "throughput": 1235.74, "total_tokens": 3449808}
3642
+ {"current_steps": 18120, "total_steps": 19240, "loss": 0.1182, "lr": 5.152622469148133e-07, "epoch": 9.417879417879417, "percentage": 94.18, "elapsed_time": "0:46:32", "remaining_time": "0:02:52", "throughput": 1235.75, "total_tokens": 3450736}
3643
+ {"current_steps": 18125, "total_steps": 19240, "loss": 0.0914, "lr": 5.10691724587084e-07, "epoch": 9.420478170478171, "percentage": 94.2, "elapsed_time": "0:46:33", "remaining_time": "0:02:51", "throughput": 1235.77, "total_tokens": 3451664}
3644
+ {"current_steps": 18130, "total_steps": 19240, "loss": 0.2006, "lr": 5.061413543954868e-07, "epoch": 9.423076923076923, "percentage": 94.23, "elapsed_time": "0:46:33", "remaining_time": "0:02:51", "throughput": 1235.79, "total_tokens": 3452624}
3645
+ {"current_steps": 18135, "total_steps": 19240, "loss": 0.0628, "lr": 5.016111400844958e-07, "epoch": 9.425675675675675, "percentage": 94.26, "elapsed_time": "0:46:34", "remaining_time": "0:02:50", "throughput": 1235.81, "total_tokens": 3453552}
3646
+ {"current_steps": 18140, "total_steps": 19240, "loss": 0.2556, "lr": 4.971010853820069e-07, "epoch": 9.428274428274428, "percentage": 94.28, "elapsed_time": "0:46:35", "remaining_time": "0:02:49", "throughput": 1235.85, "total_tokens": 3454576}
3647
+ {"current_steps": 18145, "total_steps": 19240, "loss": 0.0742, "lr": 4.926111939993206e-07, "epoch": 9.430873180873181, "percentage": 94.31, "elapsed_time": "0:46:36", "remaining_time": "0:02:48", "throughput": 1235.87, "total_tokens": 3455504}
3648
+ {"current_steps": 18150, "total_steps": 19240, "loss": 0.0885, "lr": 4.881414696311482e-07, "epoch": 9.433471933471933, "percentage": 94.33, "elapsed_time": "0:46:36", "remaining_time": "0:02:47", "throughput": 1235.88, "total_tokens": 3456432}
3649
+ {"current_steps": 18155, "total_steps": 19240, "loss": 0.122, "lr": 4.83691915955603e-07, "epoch": 9.436070686070686, "percentage": 94.36, "elapsed_time": "0:46:37", "remaining_time": "0:02:47", "throughput": 1235.92, "total_tokens": 3457424}
3650
+ {"current_steps": 18160, "total_steps": 19240, "loss": 0.1111, "lr": 4.792625366342062e-07, "epoch": 9.43866943866944, "percentage": 94.39, "elapsed_time": "0:46:38", "remaining_time": "0:02:46", "throughput": 1235.93, "total_tokens": 3458352}
3651
+ {"current_steps": 18165, "total_steps": 19240, "loss": 0.1137, "lr": 4.7485333531187003e-07, "epoch": 9.441268191268192, "percentage": 94.41, "elapsed_time": "0:46:38", "remaining_time": "0:02:45", "throughput": 1235.96, "total_tokens": 3459312}
3652
+ {"current_steps": 18170, "total_steps": 19240, "loss": 0.1974, "lr": 4.7046431561690307e-07, "epoch": 9.443866943866944, "percentage": 94.44, "elapsed_time": "0:46:39", "remaining_time": "0:02:44", "throughput": 1235.98, "total_tokens": 3460272}
3653
+ {"current_steps": 18175, "total_steps": 19240, "loss": 0.1467, "lr": 4.6609548116101354e-07, "epoch": 9.446465696465696, "percentage": 94.46, "elapsed_time": "0:46:40", "remaining_time": "0:02:44", "throughput": 1236.02, "total_tokens": 3461264}
3654
+ {"current_steps": 18180, "total_steps": 19240, "loss": 0.0769, "lr": 4.6174683553928954e-07, "epoch": 9.44906444906445, "percentage": 94.49, "elapsed_time": "0:46:41", "remaining_time": "0:02:43", "throughput": 1236.02, "total_tokens": 3462160}
3655
+ {"current_steps": 18185, "total_steps": 19240, "loss": 0.1665, "lr": 4.574183823302186e-07, "epoch": 9.451663201663202, "percentage": 94.52, "elapsed_time": "0:46:41", "remaining_time": "0:02:42", "throughput": 1236.06, "total_tokens": 3463152}
3656
+ {"current_steps": 18190, "total_steps": 19240, "loss": 0.1163, "lr": 4.531101250956571e-07, "epoch": 9.454261954261954, "percentage": 94.54, "elapsed_time": "0:46:42", "remaining_time": "0:02:41", "throughput": 1236.08, "total_tokens": 3464112}
3657
+ {"current_steps": 18195, "total_steps": 19240, "loss": 0.12, "lr": 4.4882206738085243e-07, "epoch": 9.456860706860708, "percentage": 94.57, "elapsed_time": "0:46:43", "remaining_time": "0:02:40", "throughput": 1236.13, "total_tokens": 3465136}
3658
+ {"current_steps": 18200, "total_steps": 19240, "loss": 0.1815, "lr": 4.445542127144292e-07, "epoch": 9.45945945945946, "percentage": 94.59, "elapsed_time": "0:46:43", "remaining_time": "0:02:40", "throughput": 1236.17, "total_tokens": 3466128}
3659
+ {"current_steps": 18205, "total_steps": 19240, "loss": 0.2741, "lr": 4.403065646083809e-07, "epoch": 9.462058212058212, "percentage": 94.62, "elapsed_time": "0:46:44", "remaining_time": "0:02:39", "throughput": 1236.21, "total_tokens": 3467152}
3660
+ {"current_steps": 18210, "total_steps": 19240, "loss": 0.0911, "lr": 4.360791265580783e-07, "epoch": 9.464656964656964, "percentage": 94.65, "elapsed_time": "0:46:45", "remaining_time": "0:02:38", "throughput": 1236.23, "total_tokens": 3468080}
3661
+ {"current_steps": 18215, "total_steps": 19240, "loss": 0.272, "lr": 4.318719020422607e-07, "epoch": 9.467255717255718, "percentage": 94.67, "elapsed_time": "0:46:46", "remaining_time": "0:02:37", "throughput": 1236.21, "total_tokens": 3468912}
3662
+ {"current_steps": 18220, "total_steps": 19240, "loss": 0.2503, "lr": 4.276848945230366e-07, "epoch": 9.46985446985447, "percentage": 94.7, "elapsed_time": "0:46:46", "remaining_time": "0:02:37", "throughput": 1236.2, "total_tokens": 3469776}
3663
+ {"current_steps": 18225, "total_steps": 19240, "loss": 0.1203, "lr": 4.235181074458694e-07, "epoch": 9.472453222453222, "percentage": 94.72, "elapsed_time": "0:46:47", "remaining_time": "0:02:36", "throughput": 1236.2, "total_tokens": 3470672}
3664
+ {"current_steps": 18230, "total_steps": 19240, "loss": 0.1469, "lr": 4.193715442395885e-07, "epoch": 9.475051975051976, "percentage": 94.75, "elapsed_time": "0:46:48", "remaining_time": "0:02:35", "throughput": 1236.22, "total_tokens": 3471632}
3665
+ {"current_steps": 18235, "total_steps": 19240, "loss": 0.1232, "lr": 4.152452083163866e-07, "epoch": 9.477650727650728, "percentage": 94.78, "elapsed_time": "0:46:48", "remaining_time": "0:02:34", "throughput": 1236.24, "total_tokens": 3472560}
3666
+ {"current_steps": 18240, "total_steps": 19240, "loss": 0.076, "lr": 4.111391030718004e-07, "epoch": 9.48024948024948, "percentage": 94.8, "elapsed_time": "0:46:49", "remaining_time": "0:02:34", "throughput": 1236.24, "total_tokens": 3473456}
3667
+ {"current_steps": 18245, "total_steps": 19240, "loss": 0.17, "lr": 4.07053231884727e-07, "epoch": 9.482848232848234, "percentage": 94.83, "elapsed_time": "0:46:50", "remaining_time": "0:02:33", "throughput": 1236.24, "total_tokens": 3474352}
3668
+ {"current_steps": 18250, "total_steps": 19240, "loss": 0.1052, "lr": 4.0298759811741026e-07, "epoch": 9.485446985446986, "percentage": 94.85, "elapsed_time": "0:46:51", "remaining_time": "0:02:32", "throughput": 1236.27, "total_tokens": 3475312}
3669
+ {"current_steps": 18255, "total_steps": 19240, "loss": 0.2493, "lr": 3.989422051154407e-07, "epoch": 9.488045738045738, "percentage": 94.88, "elapsed_time": "0:46:51", "remaining_time": "0:02:31", "throughput": 1236.29, "total_tokens": 3476272}
3670
+ {"current_steps": 18260, "total_steps": 19240, "loss": 0.2108, "lr": 3.949170562077553e-07, "epoch": 9.49064449064449, "percentage": 94.91, "elapsed_time": "0:46:52", "remaining_time": "0:02:30", "throughput": 1236.31, "total_tokens": 3477200}
3671
+ {"current_steps": 18265, "total_steps": 19240, "loss": 0.1617, "lr": 3.909121547066297e-07, "epoch": 9.493243243243244, "percentage": 94.93, "elapsed_time": "0:46:53", "remaining_time": "0:02:30", "throughput": 1236.33, "total_tokens": 3478160}
3672
+ {"current_steps": 18270, "total_steps": 19240, "loss": 0.0964, "lr": 3.8692750390767196e-07, "epoch": 9.495841995841996, "percentage": 94.96, "elapsed_time": "0:46:54", "remaining_time": "0:02:29", "throughput": 1236.34, "total_tokens": 3479088}
3673
+ {"current_steps": 18275, "total_steps": 19240, "loss": 0.1693, "lr": 3.8296310708984264e-07, "epoch": 9.498440748440748, "percentage": 94.98, "elapsed_time": "0:46:54", "remaining_time": "0:02:28", "throughput": 1236.37, "total_tokens": 3480048}
3674
+ {"current_steps": 18278, "total_steps": 19240, "eval_loss": 0.145976260304451, "epoch": 9.5, "percentage": 95.0, "elapsed_time": "0:47:03", "remaining_time": "0:02:28", "throughput": 1232.9, "total_tokens": 3480592}
3675
+ {"current_steps": 18280, "total_steps": 19240, "loss": 0.0859, "lr": 3.7901896751541545e-07, "epoch": 9.5010395010395, "percentage": 95.01, "elapsed_time": "0:47:04", "remaining_time": "0:02:28", "throughput": 1232.56, "total_tokens": 3481008}
3676
+ {"current_steps": 18285, "total_steps": 19240, "loss": 0.1885, "lr": 3.750950884300108e-07, "epoch": 9.503638253638254, "percentage": 95.04, "elapsed_time": "0:47:04", "remaining_time": "0:02:27", "throughput": 1232.58, "total_tokens": 3481968}