pavan-naik commited on
Commit
3ccb79c
·
verified ·
1 Parent(s): 24c9b7c

Upload train_artifacts/trainer_log.jsonl with huggingface_hub

Browse files
Files changed (1) hide show
  1. train_artifacts/trainer_log.jsonl +143 -0
train_artifacts/trainer_log.jsonl ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {"current_steps": 5, "total_steps": 7500, "loss": 4.3154, "lr": 4.9999964908081455e-05, "epoch": 0.002, "percentage": 0.07, "elapsed_time": "0:01:29", "remaining_time": "1 day, 13:26:07", "throughput": 323.19, "total_tokens": 29056}
2
+ {"current_steps": 10, "total_steps": 7500, "loss": 3.9349, "lr": 4.999982234733118e-05, "epoch": 0.004, "percentage": 0.13, "elapsed_time": "0:03:01", "remaining_time": "1 day, 13:45:43", "throughput": 324.76, "total_tokens": 58944}
3
+ {"current_steps": 15, "total_steps": 7500, "loss": 3.8089, "lr": 4.999957012512916e-05, "epoch": 0.006, "percentage": 0.2, "elapsed_time": "0:04:28", "remaining_time": "1 day, 13:12:58", "throughput": 324.95, "total_tokens": 87248}
4
+ {"current_steps": 20, "total_steps": 7500, "loss": 4.2662, "lr": 4.999920824258173e-05, "epoch": 0.008, "percentage": 0.27, "elapsed_time": "0:05:53", "remaining_time": "1 day, 12:45:55", "throughput": 325.07, "total_tokens": 115040}
5
+ {"current_steps": 25, "total_steps": 7500, "loss": 3.7189, "lr": 4.9998736701276295e-05, "epoch": 0.01, "percentage": 0.33, "elapsed_time": "0:07:13", "remaining_time": "1 day, 11:59:04", "throughput": 324.91, "total_tokens": 140768}
6
+ {"current_steps": 30, "total_steps": 7500, "loss": 3.7491, "lr": 4.999815550328128e-05, "epoch": 0.012, "percentage": 0.4, "elapsed_time": "0:08:41", "remaining_time": "1 day, 12:04:16", "throughput": 325.05, "total_tokens": 169520}
7
+ {"current_steps": 35, "total_steps": 7500, "loss": 3.5044, "lr": 4.999746465114609e-05, "epoch": 0.014, "percentage": 0.47, "elapsed_time": "0:10:03", "remaining_time": "1 day, 11:43:48", "throughput": 325.13, "total_tokens": 196080}
8
+ {"current_steps": 40, "total_steps": 7500, "loss": 3.4988, "lr": 4.999666414790113e-05, "epoch": 0.016, "percentage": 0.53, "elapsed_time": "0:11:28", "remaining_time": "1 day, 11:39:00", "throughput": 325.23, "total_tokens": 223808}
9
+ {"current_steps": 45, "total_steps": 7500, "loss": 3.4135, "lr": 4.999575399705783e-05, "epoch": 0.018, "percentage": 0.6, "elapsed_time": "0:12:57", "remaining_time": "1 day, 11:47:53", "throughput": 325.51, "total_tokens": 253216}
10
+ {"current_steps": 50, "total_steps": 7500, "loss": 3.3619, "lr": 4.999473420260853e-05, "epoch": 0.02, "percentage": 0.67, "elapsed_time": "0:14:22", "remaining_time": "1 day, 11:42:46", "throughput": 325.76, "total_tokens": 281088}
11
+ {"current_steps": 55, "total_steps": 7500, "loss": 3.5669, "lr": 4.999360476902656e-05, "epoch": 0.022, "percentage": 0.73, "elapsed_time": "0:15:52", "remaining_time": "1 day, 11:49:18", "throughput": 325.6, "total_tokens": 310192}
12
+ {"current_steps": 60, "total_steps": 7500, "loss": 3.5905, "lr": 4.999236570126616e-05, "epoch": 0.024, "percentage": 0.8, "elapsed_time": "0:17:25", "remaining_time": "1 day, 11:59:40", "throughput": 325.85, "total_tokens": 340512}
13
+ {"current_steps": 65, "total_steps": 7500, "loss": 3.6433, "lr": 4.9991017004762496e-05, "epoch": 0.026, "percentage": 0.87, "elapsed_time": "0:18:48", "remaining_time": "1 day, 11:51:48", "throughput": 325.75, "total_tokens": 367680}
14
+ {"current_steps": 70, "total_steps": 7500, "loss": 3.4039, "lr": 4.998955868543161e-05, "epoch": 0.028, "percentage": 0.93, "elapsed_time": "0:20:11", "remaining_time": "1 day, 11:43:48", "throughput": 325.81, "total_tokens": 394832}
15
+ {"current_steps": 75, "total_steps": 7500, "loss": 3.3486, "lr": 4.9987990749670395e-05, "epoch": 0.03, "percentage": 1.0, "elapsed_time": "0:21:33", "remaining_time": "1 day, 11:34:15", "throughput": 325.72, "total_tokens": 421312}
16
+ {"current_steps": 80, "total_steps": 7500, "loss": 3.4422, "lr": 4.9986313204356594e-05, "epoch": 0.032, "percentage": 1.07, "elapsed_time": "0:22:58", "remaining_time": "1 day, 11:30:37", "throughput": 325.62, "total_tokens": 448800}
17
+ {"current_steps": 85, "total_steps": 7500, "loss": 3.3835, "lr": 4.998452605684874e-05, "epoch": 0.034, "percentage": 1.13, "elapsed_time": "0:24:23", "remaining_time": "1 day, 11:27:28", "throughput": 325.72, "total_tokens": 476608}
18
+ {"current_steps": 90, "total_steps": 7500, "loss": 3.4896, "lr": 4.9982629314986126e-05, "epoch": 0.036, "percentage": 1.2, "elapsed_time": "0:25:51", "remaining_time": "1 day, 11:29:15", "throughput": 325.81, "total_tokens": 505552}
19
+ {"current_steps": 95, "total_steps": 7500, "loss": 3.5897, "lr": 4.9980622987088795e-05, "epoch": 0.038, "percentage": 1.27, "elapsed_time": "0:27:16", "remaining_time": "1 day, 11:25:23", "throughput": 325.79, "total_tokens": 532992}
20
+ {"current_steps": 100, "total_steps": 7500, "loss": 3.3488, "lr": 4.9978507081957494e-05, "epoch": 0.04, "percentage": 1.33, "elapsed_time": "0:28:44", "remaining_time": "1 day, 11:26:45", "throughput": 325.95, "total_tokens": 562064}
21
+ {"current_steps": 105, "total_steps": 7500, "loss": 3.2831, "lr": 4.997628160887361e-05, "epoch": 0.042, "percentage": 1.4, "elapsed_time": "0:30:09", "remaining_time": "1 day, 11:24:10", "throughput": 325.66, "total_tokens": 589328}
22
+ {"current_steps": 110, "total_steps": 7500, "loss": 3.3014, "lr": 4.997394657759915e-05, "epoch": 0.044, "percentage": 1.47, "elapsed_time": "0:31:30", "remaining_time": "1 day, 11:17:14", "throughput": 325.7, "total_tokens": 615872}
23
+ {"current_steps": 115, "total_steps": 7500, "loss": 3.2463, "lr": 4.997150199837671e-05, "epoch": 0.046, "percentage": 1.53, "elapsed_time": "0:32:55", "remaining_time": "1 day, 11:14:52", "throughput": 325.71, "total_tokens": 643600}
24
+ {"current_steps": 120, "total_steps": 7500, "loss": 3.479, "lr": 4.9968947881929414e-05, "epoch": 0.048, "percentage": 1.6, "elapsed_time": "0:34:16", "remaining_time": "1 day, 11:07:43", "throughput": 325.56, "total_tokens": 669456}
25
+ {"current_steps": 125, "total_steps": 7500, "loss": 3.2721, "lr": 4.996628423946087e-05, "epoch": 0.05, "percentage": 1.67, "elapsed_time": "0:35:44", "remaining_time": "1 day, 11:08:59", "throughput": 325.56, "total_tokens": 698240}
26
+ {"current_steps": 130, "total_steps": 7500, "loss": 3.5162, "lr": 4.99635110826551e-05, "epoch": 0.052, "percentage": 1.73, "elapsed_time": "0:37:08", "remaining_time": "1 day, 11:06:04", "throughput": 325.55, "total_tokens": 725632}
27
+ {"current_steps": 135, "total_steps": 7500, "loss": 3.2114, "lr": 4.996062842367654e-05, "epoch": 0.054, "percentage": 1.8, "elapsed_time": "0:38:29", "remaining_time": "1 day, 11:00:10", "throughput": 325.55, "total_tokens": 751936}
28
+ {"current_steps": 140, "total_steps": 7500, "loss": 3.4165, "lr": 4.995763627516994e-05, "epoch": 0.056, "percentage": 1.87, "elapsed_time": "0:39:56", "remaining_time": "1 day, 11:00:09", "throughput": 325.59, "total_tokens": 780416}
29
+ {"current_steps": 145, "total_steps": 7500, "loss": 3.2232, "lr": 4.995453465026032e-05, "epoch": 0.058, "percentage": 1.93, "elapsed_time": "0:41:14", "remaining_time": "1 day, 10:52:18", "throughput": 325.52, "total_tokens": 805632}
30
+ {"current_steps": 150, "total_steps": 7500, "loss": 3.0257, "lr": 4.9951323562552934e-05, "epoch": 0.06, "percentage": 2.0, "elapsed_time": "0:42:32", "remaining_time": "1 day, 10:44:40", "throughput": 325.43, "total_tokens": 830720}
31
+ {"current_steps": 155, "total_steps": 7500, "loss": 3.6172, "lr": 4.994800302613318e-05, "epoch": 0.062, "percentage": 2.07, "elapsed_time": "0:43:54", "remaining_time": "1 day, 10:40:25", "throughput": 325.42, "total_tokens": 857216}
32
+ {"current_steps": 160, "total_steps": 7500, "loss": 3.5405, "lr": 4.9944573055566556e-05, "epoch": 0.064, "percentage": 2.13, "elapsed_time": "0:45:19", "remaining_time": "1 day, 10:39:39", "throughput": 325.45, "total_tokens": 885216}
33
+ {"current_steps": 165, "total_steps": 7500, "loss": 3.2942, "lr": 4.994103366589859e-05, "epoch": 0.066, "percentage": 2.2, "elapsed_time": "0:46:42", "remaining_time": "1 day, 10:36:28", "throughput": 325.41, "total_tokens": 912000}
34
+ {"current_steps": 170, "total_steps": 7500, "loss": 3.324, "lr": 4.993738487265478e-05, "epoch": 0.068, "percentage": 2.27, "elapsed_time": "0:48:02", "remaining_time": "1 day, 10:31:27", "throughput": 325.41, "total_tokens": 938000}
35
+ {"current_steps": 175, "total_steps": 7500, "loss": 3.2199, "lr": 4.993362669184051e-05, "epoch": 0.07, "percentage": 2.33, "elapsed_time": "0:49:20", "remaining_time": "1 day, 10:25:24", "throughput": 325.4, "total_tokens": 963408}
36
+ {"current_steps": 180, "total_steps": 7500, "loss": 3.4912, "lr": 4.9929759139941e-05, "epoch": 0.072, "percentage": 2.4, "elapsed_time": "0:50:37", "remaining_time": "1 day, 10:18:25", "throughput": 325.36, "total_tokens": 988128}
37
+ {"current_steps": 185, "total_steps": 7500, "loss": 3.2694, "lr": 4.992578223392124e-05, "epoch": 0.074, "percentage": 2.47, "elapsed_time": "0:52:04", "remaining_time": "1 day, 10:19:21", "throughput": 325.4, "total_tokens": 1016848}
38
+ {"current_steps": 190, "total_steps": 7500, "loss": 3.6505, "lr": 4.992169599122587e-05, "epoch": 0.076, "percentage": 2.53, "elapsed_time": "0:53:40", "remaining_time": "1 day, 10:25:06", "throughput": 325.47, "total_tokens": 1048208}
39
+ {"current_steps": 195, "total_steps": 7500, "loss": 3.1948, "lr": 4.991750042977916e-05, "epoch": 0.078, "percentage": 2.6, "elapsed_time": "0:55:01", "remaining_time": "1 day, 10:21:06", "throughput": 325.4, "total_tokens": 1074192}
40
+ {"current_steps": 200, "total_steps": 7500, "loss": 3.5253, "lr": 4.991319556798488e-05, "epoch": 0.08, "percentage": 2.67, "elapsed_time": "0:56:17", "remaining_time": "1 day, 10:14:27", "throughput": 325.34, "total_tokens": 1098736}
41
+ {"current_steps": 205, "total_steps": 7500, "loss": 3.0949, "lr": 4.990878142472628e-05, "epoch": 0.082, "percentage": 2.73, "elapsed_time": "0:57:39", "remaining_time": "1 day, 10:12:01", "throughput": 325.2, "total_tokens": 1125136}
42
+ {"current_steps": 210, "total_steps": 7500, "loss": 3.2418, "lr": 4.990425801936594e-05, "epoch": 0.084, "percentage": 2.8, "elapsed_time": "0:59:02", "remaining_time": "1 day, 10:09:23", "throughput": 325.24, "total_tokens": 1152064}
43
+ {"current_steps": 215, "total_steps": 7500, "loss": 3.1188, "lr": 4.9899625371745726e-05, "epoch": 0.086, "percentage": 2.87, "elapsed_time": "1:00:25", "remaining_time": "1 day, 10:07:39", "throughput": 325.29, "total_tokens": 1179472}
44
+ {"current_steps": 220, "total_steps": 7500, "loss": 3.3561, "lr": 4.989488350218671e-05, "epoch": 0.088, "percentage": 2.93, "elapsed_time": "1:01:58", "remaining_time": "1 day, 10:10:42", "throughput": 325.35, "total_tokens": 1209744}
45
+ {"current_steps": 225, "total_steps": 7500, "loss": 3.2474, "lr": 4.989003243148904e-05, "epoch": 0.09, "percentage": 3.0, "elapsed_time": "1:03:23", "remaining_time": "1 day, 10:09:52", "throughput": 325.34, "total_tokens": 1237568}
46
+ {"current_steps": 230, "total_steps": 7500, "loss": 3.5069, "lr": 4.988507218093189e-05, "epoch": 0.092, "percentage": 3.07, "elapsed_time": "1:04:54", "remaining_time": "1 day, 10:11:27", "throughput": 325.42, "total_tokens": 1267216}
47
+ {"current_steps": 235, "total_steps": 7500, "loss": 3.5529, "lr": 4.988000277227334e-05, "epoch": 0.094, "percentage": 3.13, "elapsed_time": "1:06:16", "remaining_time": "1 day, 10:08:39", "throughput": 325.43, "total_tokens": 1293920}
48
+ {"current_steps": 240, "total_steps": 7500, "loss": 3.5374, "lr": 4.9874824227750305e-05, "epoch": 0.096, "percentage": 3.2, "elapsed_time": "1:07:44", "remaining_time": "1 day, 10:09:02", "throughput": 325.48, "total_tokens": 1322832}
49
+ {"current_steps": 245, "total_steps": 7500, "loss": 3.3377, "lr": 4.986953657007841e-05, "epoch": 0.098, "percentage": 3.27, "elapsed_time": "1:09:10", "remaining_time": "1 day, 10:08:19", "throughput": 325.46, "total_tokens": 1350768}
50
+ {"current_steps": 250, "total_steps": 7500, "loss": 3.2379, "lr": 4.9864139822451905e-05, "epoch": 0.1, "percentage": 3.33, "elapsed_time": "1:10:39", "remaining_time": "1 day, 10:09:12", "throughput": 325.54, "total_tokens": 1380224}
51
+ {"current_steps": 255, "total_steps": 7500, "loss": 3.5737, "lr": 4.985863400854358e-05, "epoch": 0.102, "percentage": 3.4, "elapsed_time": "1:12:12", "remaining_time": "1 day, 10:11:40", "throughput": 325.6, "total_tokens": 1410720}
52
+ {"current_steps": 260, "total_steps": 7500, "loss": 3.711, "lr": 4.9853019152504607e-05, "epoch": 0.104, "percentage": 3.47, "elapsed_time": "1:13:29", "remaining_time": "1 day, 10:06:35", "throughput": 325.54, "total_tokens": 1435568}
53
+ {"current_steps": 265, "total_steps": 7500, "loss": 3.3007, "lr": 4.9847295278964514e-05, "epoch": 0.106, "percentage": 3.53, "elapsed_time": "1:14:58", "remaining_time": "1 day, 10:06:55", "throughput": 325.57, "total_tokens": 1464560}
54
+ {"current_steps": 270, "total_steps": 7500, "loss": 3.1681, "lr": 4.9841462413030995e-05, "epoch": 0.108, "percentage": 3.6, "elapsed_time": "1:16:24", "remaining_time": "1 day, 10:06:05", "throughput": 325.58, "total_tokens": 1492672}
55
+ {"current_steps": 275, "total_steps": 7500, "loss": 3.1717, "lr": 4.9835520580289854e-05, "epoch": 0.11, "percentage": 3.67, "elapsed_time": "1:17:52", "remaining_time": "1 day, 10:06:12", "throughput": 325.62, "total_tokens": 1521616}
56
+ {"current_steps": 280, "total_steps": 7500, "loss": 3.1284, "lr": 4.982946980680488e-05, "epoch": 0.112, "percentage": 3.73, "elapsed_time": "1:19:08", "remaining_time": "1 day, 10:00:45", "throughput": 325.57, "total_tokens": 1545984}
57
+ {"current_steps": 285, "total_steps": 7500, "loss": 3.3701, "lr": 4.982331011911774e-05, "epoch": 0.114, "percentage": 3.8, "elapsed_time": "1:20:47", "remaining_time": "1 day, 10:05:09", "throughput": 325.64, "total_tokens": 1578400}
58
+ {"current_steps": 290, "total_steps": 7500, "loss": 3.0473, "lr": 4.981704154424781e-05, "epoch": 0.116, "percentage": 3.87, "elapsed_time": "1:22:04", "remaining_time": "1 day, 10:00:44", "throughput": 325.6, "total_tokens": 1603552}
59
+ {"current_steps": 295, "total_steps": 7500, "loss": 3.307, "lr": 4.981066410969215e-05, "epoch": 0.118, "percentage": 3.93, "elapsed_time": "1:23:28", "remaining_time": "1 day, 9:58:54", "throughput": 325.63, "total_tokens": 1631040}
60
+ {"current_steps": 300, "total_steps": 7500, "loss": 3.2398, "lr": 4.9804177843425295e-05, "epoch": 0.12, "percentage": 4.0, "elapsed_time": "1:24:47", "remaining_time": "1 day, 9:55:09", "throughput": 325.62, "total_tokens": 1656720}
61
+ {"current_steps": 305, "total_steps": 7500, "loss": 3.285, "lr": 4.979758277389919e-05, "epoch": 0.122, "percentage": 4.07, "elapsed_time": "1:26:19", "remaining_time": "1 day, 9:56:26", "throughput": 325.53, "total_tokens": 1686096}
62
+ {"current_steps": 310, "total_steps": 7500, "loss": 3.3341, "lr": 4.979087893004302e-05, "epoch": 0.124, "percentage": 4.13, "elapsed_time": "1:27:46", "remaining_time": "1 day, 9:55:49", "throughput": 325.58, "total_tokens": 1714672}
63
+ {"current_steps": 315, "total_steps": 7500, "loss": 3.1822, "lr": 4.978406634126315e-05, "epoch": 0.126, "percentage": 4.2, "elapsed_time": "1:29:07", "remaining_time": "1 day, 9:52:54", "throughput": 325.54, "total_tokens": 1740816}
64
+ {"current_steps": 320, "total_steps": 7500, "loss": 3.154, "lr": 4.9777145037442906e-05, "epoch": 0.128, "percentage": 4.27, "elapsed_time": "1:30:25", "remaining_time": "1 day, 9:48:54", "throughput": 325.53, "total_tokens": 1766176}
65
+ {"current_steps": 325, "total_steps": 7500, "loss": 3.1815, "lr": 4.977011504894252e-05, "epoch": 0.13, "percentage": 4.33, "elapsed_time": "1:31:56", "remaining_time": "1 day, 9:49:56", "throughput": 325.59, "total_tokens": 1796224}
66
+ {"current_steps": 330, "total_steps": 7500, "loss": 3.031, "lr": 4.976297640659897e-05, "epoch": 0.132, "percentage": 4.4, "elapsed_time": "1:33:25", "remaining_time": "1 day, 9:49:44", "throughput": 325.61, "total_tokens": 1825072}
67
+ {"current_steps": 335, "total_steps": 7500, "loss": 3.0894, "lr": 4.975572914172582e-05, "epoch": 0.134, "percentage": 4.47, "elapsed_time": "1:34:42", "remaining_time": "1 day, 9:45:38", "throughput": 325.59, "total_tokens": 1850160}
68
+ {"current_steps": 340, "total_steps": 7500, "loss": 3.014, "lr": 4.974837328611312e-05, "epoch": 0.136, "percentage": 4.53, "elapsed_time": "1:36:19", "remaining_time": "1 day, 9:48:30", "throughput": 325.63, "total_tokens": 1882016}
69
+ {"current_steps": 345, "total_steps": 7500, "loss": 3.2996, "lr": 4.974090887202726e-05, "epoch": 0.138, "percentage": 4.6, "elapsed_time": "1:37:43", "remaining_time": "1 day, 9:46:39", "throughput": 325.62, "total_tokens": 1909200}
70
+ {"current_steps": 350, "total_steps": 7500, "loss": 3.1165, "lr": 4.9733335932210814e-05, "epoch": 0.14, "percentage": 4.67, "elapsed_time": "1:39:17", "remaining_time": "1 day, 9:48:20", "throughput": 325.67, "total_tokens": 1940144}
71
+ {"current_steps": 355, "total_steps": 7500, "loss": 3.3919, "lr": 4.972565449988239e-05, "epoch": 0.142, "percentage": 4.73, "elapsed_time": "1:40:45", "remaining_time": "1 day, 9:47:49", "throughput": 325.7, "total_tokens": 1968928}
72
+ {"current_steps": 360, "total_steps": 7500, "loss": 3.5546, "lr": 4.9717864608736506e-05, "epoch": 0.144, "percentage": 4.8, "elapsed_time": "1:42:13", "remaining_time": "1 day, 9:47:31", "throughput": 325.68, "total_tokens": 1997632}
73
+ {"current_steps": 365, "total_steps": 7500, "loss": 3.1407, "lr": 4.9709966292943455e-05, "epoch": 0.146, "percentage": 4.87, "elapsed_time": "1:43:32", "remaining_time": "1 day, 9:44:08", "throughput": 325.64, "total_tokens": 2023168}
74
+ {"current_steps": 370, "total_steps": 7500, "loss": 3.3342, "lr": 4.970195958714909e-05, "epoch": 0.148, "percentage": 4.93, "elapsed_time": "1:44:56", "remaining_time": "1 day, 9:42:17", "throughput": 325.63, "total_tokens": 2050352}
75
+ {"current_steps": 375, "total_steps": 7500, "loss": 3.2827, "lr": 4.969384452647477e-05, "epoch": 0.15, "percentage": 5.0, "elapsed_time": "1:46:25", "remaining_time": "1 day, 9:42:11", "throughput": 325.64, "total_tokens": 2079520}
76
+ {"current_steps": 380, "total_steps": 7500, "loss": 3.3336, "lr": 4.968562114651709e-05, "epoch": 0.152, "percentage": 5.07, "elapsed_time": "1:47:50", "remaining_time": "1 day, 9:40:36", "throughput": 325.65, "total_tokens": 2107088}
77
+ {"current_steps": 385, "total_steps": 7500, "loss": 3.2357, "lr": 4.967728948334784e-05, "epoch": 0.154, "percentage": 5.13, "elapsed_time": "1:49:11", "remaining_time": "1 day, 9:37:49", "throughput": 325.64, "total_tokens": 2133344}
78
+ {"current_steps": 390, "total_steps": 7500, "loss": 3.3007, "lr": 4.966884957351375e-05, "epoch": 0.156, "percentage": 5.2, "elapsed_time": "1:50:34", "remaining_time": "1 day, 9:35:52", "throughput": 325.63, "total_tokens": 2160400}
79
+ {"current_steps": 395, "total_steps": 7500, "loss": 3.024, "lr": 4.966030145403642e-05, "epoch": 0.158, "percentage": 5.27, "elapsed_time": "1:51:53", "remaining_time": "1 day, 9:32:40", "throughput": 325.59, "total_tokens": 2185888}
80
+ {"current_steps": 400, "total_steps": 7500, "loss": 3.2666, "lr": 4.965164516241206e-05, "epoch": 0.16, "percentage": 5.33, "elapsed_time": "1:53:13", "remaining_time": "1 day, 9:29:41", "throughput": 325.6, "total_tokens": 2211904}
81
+ {"current_steps": 405, "total_steps": 7500, "loss": 3.4459, "lr": 4.964288073661142e-05, "epoch": 0.162, "percentage": 5.4, "elapsed_time": "1:54:46", "remaining_time": "1 day, 9:30:37", "throughput": 325.55, "total_tokens": 2241840}
82
+ {"current_steps": 410, "total_steps": 7500, "loss": 3.2095, "lr": 4.963400821507954e-05, "epoch": 0.164, "percentage": 5.47, "elapsed_time": "1:56:14", "remaining_time": "1 day, 9:30:08", "throughput": 325.61, "total_tokens": 2270992}
83
+ {"current_steps": 415, "total_steps": 7500, "loss": 3.1499, "lr": 4.962502763673565e-05, "epoch": 0.166, "percentage": 5.53, "elapsed_time": "1:57:36", "remaining_time": "1 day, 9:27:53", "throughput": 325.62, "total_tokens": 2297808}
84
+ {"current_steps": 420, "total_steps": 7500, "loss": 3.5546, "lr": 4.961593904097297e-05, "epoch": 0.168, "percentage": 5.6, "elapsed_time": "1:58:51", "remaining_time": "1 day, 9:23:44", "throughput": 325.56, "total_tokens": 2321904}
85
+ {"current_steps": 425, "total_steps": 7500, "loss": 3.0465, "lr": 4.960674246765851e-05, "epoch": 0.17, "percentage": 5.67, "elapsed_time": "2:00:12", "remaining_time": "1 day, 9:21:07", "throughput": 325.54, "total_tokens": 2347984}
86
+ {"current_steps": 430, "total_steps": 7500, "loss": 3.2036, "lr": 4.9597437957132955e-05, "epoch": 0.172, "percentage": 5.73, "elapsed_time": "2:01:32", "remaining_time": "1 day, 9:18:20", "throughput": 325.53, "total_tokens": 2373904}
87
+ {"current_steps": 435, "total_steps": 7500, "loss": 3.3128, "lr": 4.958802555021042e-05, "epoch": 0.174, "percentage": 5.8, "elapsed_time": "2:02:57", "remaining_time": "1 day, 9:17:08", "throughput": 325.55, "total_tokens": 2401872}
88
+ {"current_steps": 440, "total_steps": 7500, "loss": 3.5103, "lr": 4.957850528817834e-05, "epoch": 0.176, "percentage": 5.87, "elapsed_time": "2:04:28", "remaining_time": "1 day, 9:17:19", "throughput": 325.57, "total_tokens": 2431584}
89
+ {"current_steps": 445, "total_steps": 7500, "loss": 3.2127, "lr": 4.956887721279726e-05, "epoch": 0.178, "percentage": 5.93, "elapsed_time": "2:05:49", "remaining_time": "1 day, 9:14:51", "throughput": 325.56, "total_tokens": 2457840}
90
+ {"current_steps": 450, "total_steps": 7500, "loss": 3.2069, "lr": 4.9559141366300594e-05, "epoch": 0.18, "percentage": 6.0, "elapsed_time": "2:07:09", "remaining_time": "1 day, 9:12:08", "throughput": 325.56, "total_tokens": 2483824}
91
+ {"current_steps": 455, "total_steps": 7500, "loss": 3.2804, "lr": 4.954929779139455e-05, "epoch": 0.182, "percentage": 6.07, "elapsed_time": "2:08:42", "remaining_time": "1 day, 9:12:54", "throughput": 325.6, "total_tokens": 2514480}
92
+ {"current_steps": 460, "total_steps": 7500, "loss": 3.2412, "lr": 4.953934653125786e-05, "epoch": 0.184, "percentage": 6.13, "elapsed_time": "2:10:10", "remaining_time": "1 day, 9:12:15", "throughput": 325.61, "total_tokens": 2543168}
93
+ {"current_steps": 465, "total_steps": 7500, "loss": 3.4199, "lr": 4.952928762954161e-05, "epoch": 0.186, "percentage": 6.2, "elapsed_time": "2:11:44", "remaining_time": "1 day, 9:13:00", "throughput": 325.65, "total_tokens": 2573968}
94
+ {"current_steps": 470, "total_steps": 7500, "loss": 3.3238, "lr": 4.951912113036908e-05, "epoch": 0.188, "percentage": 6.27, "elapsed_time": "2:13:09", "remaining_time": "1 day, 9:11:43", "throughput": 325.67, "total_tokens": 2601936}
95
+ {"current_steps": 475, "total_steps": 7500, "loss": 3.1038, "lr": 4.9508847078335495e-05, "epoch": 0.19, "percentage": 6.33, "elapsed_time": "2:14:20", "remaining_time": "1 day, 9:06:47", "throughput": 325.64, "total_tokens": 2624752}
96
+ {"current_steps": 480, "total_steps": 7500, "loss": 3.287, "lr": 4.949846551850788e-05, "epoch": 0.192, "percentage": 6.4, "elapsed_time": "2:15:56", "remaining_time": "1 day, 9:08:05", "throughput": 325.67, "total_tokens": 2656288}
97
+ {"current_steps": 485, "total_steps": 7500, "loss": 3.3266, "lr": 4.948797649642484e-05, "epoch": 0.194, "percentage": 6.47, "elapsed_time": "2:17:21", "remaining_time": "1 day, 9:06:46", "throughput": 325.7, "total_tokens": 2684304}
98
+ {"current_steps": 490, "total_steps": 7500, "loss": 3.4985, "lr": 4.9477380058096343e-05, "epoch": 0.196, "percentage": 6.53, "elapsed_time": "2:18:46", "remaining_time": "1 day, 9:05:22", "throughput": 325.7, "total_tokens": 2712032}
99
+ {"current_steps": 495, "total_steps": 7500, "loss": 3.4324, "lr": 4.9466676250003576e-05, "epoch": 0.198, "percentage": 6.6, "elapsed_time": "2:20:15", "remaining_time": "1 day, 9:04:45", "throughput": 325.72, "total_tokens": 2740960}
100
+ {"current_steps": 500, "total_steps": 7500, "loss": 3.2944, "lr": 4.945586511909865e-05, "epoch": 0.2, "percentage": 6.67, "elapsed_time": "2:21:46", "remaining_time": "1 day, 9:04:57", "throughput": 325.75, "total_tokens": 2771168}
101
+ {"current_steps": 505, "total_steps": 7500, "loss": 3.0404, "lr": 4.9444946712804494e-05, "epoch": 0.202, "percentage": 6.73, "elapsed_time": "2:23:10", "remaining_time": "1 day, 9:03:11", "throughput": 325.67, "total_tokens": 2797680}
102
+ {"current_steps": 510, "total_steps": 7500, "loss": 3.032, "lr": 4.943392107901458e-05, "epoch": 0.204, "percentage": 6.8, "elapsed_time": "2:24:49", "remaining_time": "1 day, 9:04:50", "throughput": 325.69, "total_tokens": 2829888}
103
+ {"current_steps": 515, "total_steps": 7500, "loss": 3.298, "lr": 4.9422788266092715e-05, "epoch": 0.206, "percentage": 6.87, "elapsed_time": "2:26:21", "remaining_time": "1 day, 9:04:58", "throughput": 325.71, "total_tokens": 2860096}
104
+ {"current_steps": 520, "total_steps": 7500, "loss": 3.2742, "lr": 4.941154832287288e-05, "epoch": 0.208, "percentage": 6.93, "elapsed_time": "2:27:45", "remaining_time": "1 day, 9:03:18", "throughput": 325.7, "total_tokens": 2887424}
105
+ {"current_steps": 525, "total_steps": 7500, "loss": 3.1988, "lr": 4.940020129865895e-05, "epoch": 0.21, "percentage": 7.0, "elapsed_time": "2:29:05", "remaining_time": "1 day, 9:00:45", "throughput": 325.69, "total_tokens": 2913376}
106
+ {"current_steps": 530, "total_steps": 7500, "loss": 3.29, "lr": 4.938874724322454e-05, "epoch": 0.212, "percentage": 7.07, "elapsed_time": "2:30:34", "remaining_time": "1 day, 9:00:17", "throughput": 325.71, "total_tokens": 2942704}
107
+ {"current_steps": 535, "total_steps": 7500, "loss": 2.9378, "lr": 4.937718620681273e-05, "epoch": 0.214, "percentage": 7.13, "elapsed_time": "2:32:00", "remaining_time": "1 day, 8:59:02", "throughput": 325.72, "total_tokens": 2970896}
108
+ {"current_steps": 540, "total_steps": 7500, "loss": 3.1336, "lr": 4.936551824013589e-05, "epoch": 0.216, "percentage": 7.2, "elapsed_time": "2:33:24", "remaining_time": "1 day, 8:57:09", "throughput": 325.74, "total_tokens": 2998080}
109
+ {"current_steps": 545, "total_steps": 7500, "loss": 3.3297, "lr": 4.935374339437543e-05, "epoch": 0.218, "percentage": 7.27, "elapsed_time": "2:34:35", "remaining_time": "1 day, 8:52:51", "throughput": 325.7, "total_tokens": 3021104}
110
+ {"current_steps": 550, "total_steps": 7500, "loss": 3.2395, "lr": 4.934186172118157e-05, "epoch": 0.22, "percentage": 7.33, "elapsed_time": "2:35:56", "remaining_time": "1 day, 8:50:33", "throughput": 325.71, "total_tokens": 3047488}
111
+ {"current_steps": 555, "total_steps": 7500, "loss": 3.0372, "lr": 4.932987327267316e-05, "epoch": 0.222, "percentage": 7.4, "elapsed_time": "2:37:21", "remaining_time": "1 day, 8:49:11", "throughput": 325.73, "total_tokens": 3075552}
112
+ {"current_steps": 560, "total_steps": 7500, "loss": 3.1987, "lr": 4.93177781014374e-05, "epoch": 0.224, "percentage": 7.47, "elapsed_time": "2:38:50", "remaining_time": "1 day, 8:48:27", "throughput": 325.74, "total_tokens": 3104448}
113
+ {"current_steps": 565, "total_steps": 7500, "loss": 3.0645, "lr": 4.9305576260529607e-05, "epoch": 0.226, "percentage": 7.53, "elapsed_time": "2:40:07", "remaining_time": "1 day, 8:45:29", "throughput": 325.74, "total_tokens": 3129632}
114
+ {"current_steps": 570, "total_steps": 7500, "loss": 3.2352, "lr": 4.9293267803473046e-05, "epoch": 0.228, "percentage": 7.6, "elapsed_time": "2:41:27", "remaining_time": "1 day, 8:43:05", "throughput": 325.73, "total_tokens": 3155696}
115
+ {"current_steps": 575, "total_steps": 7500, "loss": 2.9967, "lr": 4.9280852784258624e-05, "epoch": 0.23, "percentage": 7.67, "elapsed_time": "2:42:44", "remaining_time": "1 day, 8:39:57", "throughput": 325.72, "total_tokens": 3180480}
116
+ {"current_steps": 580, "total_steps": 7500, "loss": 3.0534, "lr": 4.9268331257344685e-05, "epoch": 0.232, "percentage": 7.73, "elapsed_time": "2:44:15", "remaining_time": "1 day, 8:39:47", "throughput": 325.74, "total_tokens": 3210352}
117
+ {"current_steps": 585, "total_steps": 7500, "loss": 3.3599, "lr": 4.925570327765678e-05, "epoch": 0.234, "percentage": 7.8, "elapsed_time": "2:45:58", "remaining_time": "1 day, 8:41:57", "throughput": 325.79, "total_tokens": 3244448}
118
+ {"current_steps": 590, "total_steps": 7500, "loss": 3.3486, "lr": 4.924296890058741e-05, "epoch": 0.236, "percentage": 7.87, "elapsed_time": "2:47:22", "remaining_time": "1 day, 8:40:17", "throughput": 325.78, "total_tokens": 3271680}
119
+ {"current_steps": 595, "total_steps": 7500, "loss": 3.0605, "lr": 4.923012818199576e-05, "epoch": 0.238, "percentage": 7.93, "elapsed_time": "2:48:53", "remaining_time": "1 day, 8:39:54", "throughput": 325.81, "total_tokens": 3301472}
120
+ {"current_steps": 600, "total_steps": 7500, "loss": 3.0015, "lr": 4.9217181178207535e-05, "epoch": 0.24, "percentage": 8.0, "elapsed_time": "2:50:27", "remaining_time": "1 day, 8:40:19", "throughput": 325.86, "total_tokens": 3332848}
121
+ {"current_steps": 605, "total_steps": 7500, "loss": 3.3475, "lr": 4.920412794601461e-05, "epoch": 0.242, "percentage": 8.07, "elapsed_time": "2:52:06", "remaining_time": "1 day, 8:41:31", "throughput": 325.82, "total_tokens": 3364720}
122
+ {"current_steps": 610, "total_steps": 7500, "loss": 3.0312, "lr": 4.919096854267484e-05, "epoch": 0.244, "percentage": 8.13, "elapsed_time": "2:53:28", "remaining_time": "1 day, 8:39:28", "throughput": 325.83, "total_tokens": 3391536}
123
+ {"current_steps": 615, "total_steps": 7500, "loss": 3.3344, "lr": 4.9177703025911825e-05, "epoch": 0.246, "percentage": 8.2, "elapsed_time": "2:54:53", "remaining_time": "1 day, 8:37:50", "throughput": 325.85, "total_tokens": 3419104}
124
+ {"current_steps": 620, "total_steps": 7500, "loss": 3.1757, "lr": 4.91643314539146e-05, "epoch": 0.248, "percentage": 8.27, "elapsed_time": "2:56:17", "remaining_time": "1 day, 8:36:13", "throughput": 325.83, "total_tokens": 3446448}
125
+ {"current_steps": 625, "total_steps": 7500, "loss": 3.2677, "lr": 4.9150853885337426e-05, "epoch": 0.25, "percentage": 8.33, "elapsed_time": "2:57:43", "remaining_time": "1 day, 8:35:03", "throughput": 325.83, "total_tokens": 3474672}
126
+ {"current_steps": 630, "total_steps": 7500, "loss": 3.2773, "lr": 4.913727037929952e-05, "epoch": 0.252, "percentage": 8.4, "elapsed_time": "2:59:17", "remaining_time": "1 day, 8:35:12", "throughput": 325.85, "total_tokens": 3505504}
127
+ {"current_steps": 635, "total_steps": 7500, "loss": 3.0654, "lr": 4.912358099538476e-05, "epoch": 0.254, "percentage": 8.47, "elapsed_time": "3:00:42", "remaining_time": "1 day, 8:33:41", "throughput": 325.86, "total_tokens": 3533264}
128
+ {"current_steps": 640, "total_steps": 7500, "loss": 3.1039, "lr": 4.910978579364151e-05, "epoch": 0.256, "percentage": 8.53, "elapsed_time": "3:02:14", "remaining_time": "1 day, 8:33:29", "throughput": 325.89, "total_tokens": 3563616}
129
+ {"current_steps": 645, "total_steps": 7500, "loss": 3.0446, "lr": 4.909588483458225e-05, "epoch": 0.258, "percentage": 8.6, "elapsed_time": "3:03:31", "remaining_time": "1 day, 8:30:24", "throughput": 325.87, "total_tokens": 3588144}
130
+ {"current_steps": 650, "total_steps": 7500, "loss": 2.9653, "lr": 4.908187817918341e-05, "epoch": 0.26, "percentage": 8.67, "elapsed_time": "3:04:50", "remaining_time": "1 day, 8:27:58", "throughput": 325.85, "total_tokens": 3613936}
131
+ {"current_steps": 655, "total_steps": 7500, "loss": 3.1394, "lr": 4.906776588888502e-05, "epoch": 0.262, "percentage": 8.73, "elapsed_time": "3:06:22", "remaining_time": "1 day, 8:27:40", "throughput": 325.88, "total_tokens": 3644144}
132
+ {"current_steps": 660, "total_steps": 7500, "loss": 3.1486, "lr": 4.905354802559049e-05, "epoch": 0.264, "percentage": 8.8, "elapsed_time": "3:07:53", "remaining_time": "1 day, 8:27:09", "throughput": 325.91, "total_tokens": 3673984}
133
+ {"current_steps": 665, "total_steps": 7500, "loss": 2.9102, "lr": 4.9039224651666325e-05, "epoch": 0.266, "percentage": 8.87, "elapsed_time": "3:09:33", "remaining_time": "1 day, 8:28:14", "throughput": 325.94, "total_tokens": 3706960}
134
+ {"current_steps": 670, "total_steps": 7500, "loss": 3.0771, "lr": 4.902479582994185e-05, "epoch": 0.268, "percentage": 8.93, "elapsed_time": "3:11:01", "remaining_time": "1 day, 8:27:19", "throughput": 325.95, "total_tokens": 3735840}
135
+ {"current_steps": 675, "total_steps": 7500, "loss": 2.9073, "lr": 4.9010261623708944e-05, "epoch": 0.27, "percentage": 9.0, "elapsed_time": "3:12:29", "remaining_time": "1 day, 8:26:20", "throughput": 325.96, "total_tokens": 3764752}
136
+ {"current_steps": 680, "total_steps": 7500, "loss": 2.8426, "lr": 4.899562209672174e-05, "epoch": 0.272, "percentage": 9.07, "elapsed_time": "3:14:00", "remaining_time": "1 day, 8:25:46", "throughput": 325.99, "total_tokens": 3794640}
137
+ {"current_steps": 685, "total_steps": 7500, "loss": 3.1466, "lr": 4.898087731319636e-05, "epoch": 0.274, "percentage": 9.13, "elapsed_time": "3:15:27", "remaining_time": "1 day, 8:24:37", "throughput": 326.0, "total_tokens": 3823168}
138
+ {"current_steps": 690, "total_steps": 7500, "loss": 3.2213, "lr": 4.896602733781065e-05, "epoch": 0.276, "percentage": 9.2, "elapsed_time": "3:16:46", "remaining_time": "1 day, 8:22:06", "throughput": 325.98, "total_tokens": 3848672}
139
+ {"current_steps": 695, "total_steps": 7500, "loss": 3.1395, "lr": 4.8951072235703855e-05, "epoch": 0.278, "percentage": 9.27, "elapsed_time": "3:18:17", "remaining_time": "1 day, 8:21:28", "throughput": 325.99, "total_tokens": 3878272}
140
+ {"current_steps": 700, "total_steps": 7500, "loss": 3.149, "lr": 4.893601207247638e-05, "epoch": 0.28, "percentage": 9.33, "elapsed_time": "3:19:43", "remaining_time": "1 day, 8:20:11", "throughput": 325.99, "total_tokens": 3906512}
141
+ {"current_steps": 705, "total_steps": 7500, "loss": 3.1194, "lr": 4.892084691418947e-05, "epoch": 0.282, "percentage": 9.4, "elapsed_time": "3:20:58", "remaining_time": "1 day, 8:17:07", "throughput": 325.9, "total_tokens": 3930048}
142
+ {"current_steps": 710, "total_steps": 7500, "loss": 3.0816, "lr": 4.890557682736491e-05, "epoch": 0.284, "percentage": 9.47, "elapsed_time": "3:22:15", "remaining_time": "1 day, 8:14:13", "throughput": 325.88, "total_tokens": 3954672}
143
+ {"current_steps": 715, "total_steps": 7500, "loss": 3.0122, "lr": 4.8890201878984796e-05, "epoch": 0.286, "percentage": 9.53, "elapsed_time": "3:23:41", "remaining_time": "1 day, 8:12:56", "throughput": 325.87, "total_tokens": 3982656}