Training in progress, step 34344
Browse files- adapter_model.safetensors +1 -1
- trainer_log.jsonl +383 -0
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 819328
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:62f457ab07069b7a02f572c5508229da15e1c860636436651b99915532cbb3a2
|
| 3 |
size 819328
|
trainer_log.jsonl
CHANGED
|
@@ -6505,3 +6505,386 @@
|
|
| 6505 |
{"current_steps": 32440, "total_steps": 38160, "loss": 0.2534, "lr": 0.0020075612611106445, "epoch": 17.0020964360587, "percentage": 85.01, "elapsed_time": "1:23:36", "remaining_time": "0:14:44", "throughput": 4218.57, "total_tokens": 21160712}
|
| 6506 |
{"current_steps": 32445, "total_steps": 38160, "loss": 0.2888, "lr": 0.0020041339652784942, "epoch": 17.004716981132077, "percentage": 85.02, "elapsed_time": "1:23:36", "remaining_time": "0:14:43", "throughput": 4218.57, "total_tokens": 21163528}
|
| 6507 |
{"current_steps": 32450, "total_steps": 38160, "loss": 0.4204, "lr": 0.0020007093880332534, "epoch": 17.00733752620545, "percentage": 85.04, "elapsed_time": "1:23:37", "remaining_time": "0:14:42", "throughput": 4218.66, "total_tokens": 21167304}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6505 |
{"current_steps": 32440, "total_steps": 38160, "loss": 0.2534, "lr": 0.0020075612611106445, "epoch": 17.0020964360587, "percentage": 85.01, "elapsed_time": "1:23:36", "remaining_time": "0:14:44", "throughput": 4218.57, "total_tokens": 21160712}
|
| 6506 |
{"current_steps": 32445, "total_steps": 38160, "loss": 0.2888, "lr": 0.0020041339652784942, "epoch": 17.004716981132077, "percentage": 85.02, "elapsed_time": "1:23:36", "remaining_time": "0:14:43", "throughput": 4218.57, "total_tokens": 21163528}
|
| 6507 |
{"current_steps": 32450, "total_steps": 38160, "loss": 0.4204, "lr": 0.0020007093880332534, "epoch": 17.00733752620545, "percentage": 85.04, "elapsed_time": "1:23:37", "remaining_time": "0:14:42", "throughput": 4218.66, "total_tokens": 21167304}
|
| 6508 |
+
{"current_steps": 32455, "total_steps": 38160, "loss": 0.398, "lr": 0.001997287530091308, "epoch": 17.009958071278827, "percentage": 85.05, "elapsed_time": "1:23:38", "remaining_time": "0:14:42", "throughput": 4218.7, "total_tokens": 21170792}
|
| 6509 |
+
{"current_steps": 32460, "total_steps": 38160, "loss": 0.3363, "lr": 0.001993868392168466, "epoch": 17.0125786163522, "percentage": 85.06, "elapsed_time": "1:23:39", "remaining_time": "0:14:41", "throughput": 4218.77, "total_tokens": 21174408}
|
| 6510 |
+
{"current_steps": 32465, "total_steps": 38160, "loss": 0.4082, "lr": 0.0019904519749799764, "epoch": 17.015199161425578, "percentage": 85.08, "elapsed_time": "1:23:39", "remaining_time": "0:14:40", "throughput": 4218.86, "total_tokens": 21178248}
|
| 6511 |
+
{"current_steps": 32470, "total_steps": 38160, "loss": 0.235, "lr": 0.00198703827924051, "epoch": 17.01781970649895, "percentage": 85.09, "elapsed_time": "1:23:40", "remaining_time": "0:14:39", "throughput": 4218.85, "total_tokens": 21181064}
|
| 6512 |
+
{"current_steps": 32475, "total_steps": 38160, "loss": 0.3987, "lr": 0.0019836273056641767, "epoch": 17.020440251572328, "percentage": 85.1, "elapsed_time": "1:23:41", "remaining_time": "0:14:39", "throughput": 4218.89, "total_tokens": 21184296}
|
| 6513 |
+
{"current_steps": 32480, "total_steps": 38160, "loss": 0.3609, "lr": 0.0019802190549645126, "epoch": 17.0230607966457, "percentage": 85.12, "elapsed_time": "1:23:41", "remaining_time": "0:14:38", "throughput": 4218.89, "total_tokens": 21187176}
|
| 6514 |
+
{"current_steps": 32485, "total_steps": 38160, "loss": 0.2959, "lr": 0.001976813527854487, "epoch": 17.02568134171908, "percentage": 85.13, "elapsed_time": "1:23:42", "remaining_time": "0:14:37", "throughput": 4218.86, "total_tokens": 21189800}
|
| 6515 |
+
{"current_steps": 32490, "total_steps": 38160, "loss": 0.4361, "lr": 0.0019734107250464926, "epoch": 17.028301886792452, "percentage": 85.14, "elapsed_time": "1:23:43", "remaining_time": "0:14:36", "throughput": 4219.15, "total_tokens": 21196328}
|
| 6516 |
+
{"current_steps": 32495, "total_steps": 38160, "loss": 0.3843, "lr": 0.0019700106472523627, "epoch": 17.03092243186583, "percentage": 85.15, "elapsed_time": "1:23:44", "remaining_time": "0:14:35", "throughput": 4219.16, "total_tokens": 21199400}
|
| 6517 |
+
{"current_steps": 32500, "total_steps": 38160, "loss": 0.2976, "lr": 0.0019666132951833464, "epoch": 17.033542976939202, "percentage": 85.17, "elapsed_time": "1:23:45", "remaining_time": "0:14:35", "throughput": 4219.16, "total_tokens": 21202248}
|
| 6518 |
+
{"current_steps": 32505, "total_steps": 38160, "loss": 0.4429, "lr": 0.001963218669550137, "epoch": 17.03616352201258, "percentage": 85.18, "elapsed_time": "1:23:45", "remaining_time": "0:14:34", "throughput": 4219.19, "total_tokens": 21205448}
|
| 6519 |
+
{"current_steps": 32510, "total_steps": 38160, "loss": 0.2986, "lr": 0.001959826771062854, "epoch": 17.038784067085953, "percentage": 85.19, "elapsed_time": "1:23:46", "remaining_time": "0:14:33", "throughput": 4219.3, "total_tokens": 21209576}
|
| 6520 |
+
{"current_steps": 32515, "total_steps": 38160, "loss": 0.413, "lr": 0.0019564376004310375, "epoch": 17.04140461215933, "percentage": 85.21, "elapsed_time": "1:23:47", "remaining_time": "0:14:32", "throughput": 4219.34, "total_tokens": 21212904}
|
| 6521 |
+
{"current_steps": 32520, "total_steps": 38160, "loss": 0.3284, "lr": 0.0019530511583636667, "epoch": 17.044025157232703, "percentage": 85.22, "elapsed_time": "1:23:48", "remaining_time": "0:14:32", "throughput": 4219.37, "total_tokens": 21216104}
|
| 6522 |
+
{"current_steps": 32525, "total_steps": 38160, "loss": 0.4254, "lr": 0.001949667445569147, "epoch": 17.04664570230608, "percentage": 85.23, "elapsed_time": "1:23:49", "remaining_time": "0:14:31", "throughput": 4219.47, "total_tokens": 21220104}
|
| 6523 |
+
{"current_steps": 32530, "total_steps": 38160, "loss": 0.3836, "lr": 0.0019462864627553106, "epoch": 17.049266247379453, "percentage": 85.25, "elapsed_time": "1:23:49", "remaining_time": "0:14:30", "throughput": 4219.47, "total_tokens": 21222920}
|
| 6524 |
+
{"current_steps": 32535, "total_steps": 38160, "loss": 0.3382, "lr": 0.0019429082106294231, "epoch": 17.05188679245283, "percentage": 85.26, "elapsed_time": "1:23:50", "remaining_time": "0:14:29", "throughput": 4219.52, "total_tokens": 21226408}
|
| 6525 |
+
{"current_steps": 32540, "total_steps": 38160, "loss": 0.4326, "lr": 0.001939532689898173, "epoch": 17.054507337526207, "percentage": 85.27, "elapsed_time": "1:23:51", "remaining_time": "0:14:28", "throughput": 4219.6, "total_tokens": 21230184}
|
| 6526 |
+
{"current_steps": 32545, "total_steps": 38160, "loss": 0.3308, "lr": 0.0019361599012676816, "epoch": 17.05712788259958, "percentage": 85.29, "elapsed_time": "1:23:52", "remaining_time": "0:14:28", "throughput": 4219.63, "total_tokens": 21233288}
|
| 6527 |
+
{"current_steps": 32550, "total_steps": 38160, "loss": 0.3218, "lr": 0.0019327898454435033, "epoch": 17.059748427672957, "percentage": 85.3, "elapsed_time": "1:23:52", "remaining_time": "0:14:27", "throughput": 4219.59, "total_tokens": 21235720}
|
| 6528 |
+
{"current_steps": 32555, "total_steps": 38160, "loss": 0.3175, "lr": 0.0019294225231306055, "epoch": 17.06236897274633, "percentage": 85.31, "elapsed_time": "1:23:53", "remaining_time": "0:14:26", "throughput": 4219.56, "total_tokens": 21238408}
|
| 6529 |
+
{"current_steps": 32560, "total_steps": 38160, "loss": 0.3418, "lr": 0.0019260579350334, "epoch": 17.064989517819708, "percentage": 85.32, "elapsed_time": "1:23:54", "remaining_time": "0:14:25", "throughput": 4219.7, "total_tokens": 21242632}
|
| 6530 |
+
{"current_steps": 32565, "total_steps": 38160, "loss": 0.3343, "lr": 0.001922696081855722, "epoch": 17.06761006289308, "percentage": 85.34, "elapsed_time": "1:23:54", "remaining_time": "0:14:25", "throughput": 4219.75, "total_tokens": 21246056}
|
| 6531 |
+
{"current_steps": 32570, "total_steps": 38160, "loss": 0.4302, "lr": 0.0019193369643008256, "epoch": 17.070230607966458, "percentage": 85.35, "elapsed_time": "1:23:55", "remaining_time": "0:14:24", "throughput": 4219.75, "total_tokens": 21249032}
|
| 6532 |
+
{"current_steps": 32575, "total_steps": 38160, "loss": 0.3244, "lr": 0.0019159805830714055, "epoch": 17.07285115303983, "percentage": 85.36, "elapsed_time": "1:23:56", "remaining_time": "0:14:23", "throughput": 4219.74, "total_tokens": 21251816}
|
| 6533 |
+
{"current_steps": 32580, "total_steps": 38160, "loss": 0.3173, "lr": 0.00191262693886958, "epoch": 17.07547169811321, "percentage": 85.38, "elapsed_time": "1:23:57", "remaining_time": "0:14:22", "throughput": 4219.89, "total_tokens": 21256200}
|
| 6534 |
+
{"current_steps": 32585, "total_steps": 38160, "loss": 0.3273, "lr": 0.0019092760323968876, "epoch": 17.078092243186582, "percentage": 85.39, "elapsed_time": "1:23:58", "remaining_time": "0:14:21", "throughput": 4220.01, "total_tokens": 21260392}
|
| 6535 |
+
{"current_steps": 32590, "total_steps": 38160, "loss": 0.3774, "lr": 0.0019059278643543047, "epoch": 17.08071278825996, "percentage": 85.4, "elapsed_time": "1:23:58", "remaining_time": "0:14:21", "throughput": 4220.1, "total_tokens": 21264296}
|
| 6536 |
+
{"current_steps": 32595, "total_steps": 38160, "loss": 0.4133, "lr": 0.0019025824354422243, "epoch": 17.083333333333332, "percentage": 85.42, "elapsed_time": "1:23:59", "remaining_time": "0:14:20", "throughput": 4220.16, "total_tokens": 21267848}
|
| 6537 |
+
{"current_steps": 32600, "total_steps": 38160, "loss": 0.3157, "lr": 0.0018992397463604765, "epoch": 17.08595387840671, "percentage": 85.43, "elapsed_time": "1:24:00", "remaining_time": "0:14:19", "throughput": 4220.23, "total_tokens": 21271560}
|
| 6538 |
+
{"current_steps": 32605, "total_steps": 38160, "loss": 0.3872, "lr": 0.0018958997978083102, "epoch": 17.088574423480082, "percentage": 85.44, "elapsed_time": "1:24:01", "remaining_time": "0:14:18", "throughput": 4220.25, "total_tokens": 21274760}
|
| 6539 |
+
{"current_steps": 32610, "total_steps": 38160, "loss": 0.3059, "lr": 0.0018925625904844118, "epoch": 17.09119496855346, "percentage": 85.46, "elapsed_time": "1:24:01", "remaining_time": "0:14:18", "throughput": 4220.22, "total_tokens": 21277352}
|
| 6540 |
+
{"current_steps": 32615, "total_steps": 38160, "loss": 0.4617, "lr": 0.0018892281250868807, "epoch": 17.093815513626833, "percentage": 85.47, "elapsed_time": "1:24:02", "remaining_time": "0:14:17", "throughput": 4220.22, "total_tokens": 21280168}
|
| 6541 |
+
{"current_steps": 32620, "total_steps": 38160, "loss": 0.3896, "lr": 0.0018858964023132478, "epoch": 17.09643605870021, "percentage": 85.48, "elapsed_time": "1:24:03", "remaining_time": "0:14:16", "throughput": 4220.28, "total_tokens": 21283688}
|
| 6542 |
+
{"current_steps": 32625, "total_steps": 38160, "loss": 0.3636, "lr": 0.0018825674228604738, "epoch": 17.099056603773583, "percentage": 85.5, "elapsed_time": "1:24:03", "remaining_time": "0:14:15", "throughput": 4220.32, "total_tokens": 21286920}
|
| 6543 |
+
{"current_steps": 32630, "total_steps": 38160, "loss": 0.3713, "lr": 0.001879241187424943, "epoch": 17.10167714884696, "percentage": 85.51, "elapsed_time": "1:24:04", "remaining_time": "0:14:14", "throughput": 4220.39, "total_tokens": 21290600}
|
| 6544 |
+
{"current_steps": 32635, "total_steps": 38160, "loss": 0.3655, "lr": 0.0018759176967024676, "epoch": 17.104297693920337, "percentage": 85.52, "elapsed_time": "1:24:05", "remaining_time": "0:14:14", "throughput": 4220.35, "total_tokens": 21293032}
|
| 6545 |
+
{"current_steps": 32640, "total_steps": 38160, "loss": 0.3681, "lr": 0.0018725969513882844, "epoch": 17.10691823899371, "percentage": 85.53, "elapsed_time": "1:24:06", "remaining_time": "0:14:13", "throughput": 4220.35, "total_tokens": 21295976}
|
| 6546 |
+
{"current_steps": 32645, "total_steps": 38160, "loss": 0.3579, "lr": 0.001869278952177053, "epoch": 17.109538784067087, "percentage": 85.55, "elapsed_time": "1:24:06", "remaining_time": "0:14:12", "throughput": 4220.44, "total_tokens": 21299816}
|
| 6547 |
+
{"current_steps": 32650, "total_steps": 38160, "loss": 0.3474, "lr": 0.0018659636997628596, "epoch": 17.11215932914046, "percentage": 85.56, "elapsed_time": "1:24:07", "remaining_time": "0:14:11", "throughput": 4220.44, "total_tokens": 21302696}
|
| 6548 |
+
{"current_steps": 32655, "total_steps": 38160, "loss": 0.3309, "lr": 0.00186265119483922, "epoch": 17.114779874213838, "percentage": 85.57, "elapsed_time": "1:24:08", "remaining_time": "0:14:11", "throughput": 4220.41, "total_tokens": 21305288}
|
| 6549 |
+
{"current_steps": 32660, "total_steps": 38160, "loss": 0.2926, "lr": 0.001859341438099069, "epoch": 17.11740041928721, "percentage": 85.59, "elapsed_time": "1:24:08", "remaining_time": "0:14:10", "throughput": 4220.47, "total_tokens": 21308808}
|
| 6550 |
+
{"current_steps": 32665, "total_steps": 38160, "loss": 0.315, "lr": 0.0018560344302347787, "epoch": 17.120020964360588, "percentage": 85.6, "elapsed_time": "1:24:09", "remaining_time": "0:14:09", "throughput": 4220.5, "total_tokens": 21312008}
|
| 6551 |
+
{"current_steps": 32670, "total_steps": 38160, "loss": 0.3369, "lr": 0.0018527301719381299, "epoch": 17.12264150943396, "percentage": 85.61, "elapsed_time": "1:24:10", "remaining_time": "0:14:08", "throughput": 4220.44, "total_tokens": 21314280}
|
| 6552 |
+
{"current_steps": 32675, "total_steps": 38160, "loss": 0.2937, "lr": 0.0018494286639003338, "epoch": 17.12526205450734, "percentage": 85.63, "elapsed_time": "1:24:10", "remaining_time": "0:14:07", "throughput": 4220.41, "total_tokens": 21316808}
|
| 6553 |
+
{"current_steps": 32680, "total_steps": 38160, "loss": 0.3437, "lr": 0.0018461299068120323, "epoch": 17.127882599580712, "percentage": 85.64, "elapsed_time": "1:24:11", "remaining_time": "0:14:07", "throughput": 4220.5, "total_tokens": 21320712}
|
| 6554 |
+
{"current_steps": 32685, "total_steps": 38160, "loss": 0.3572, "lr": 0.0018428339013632855, "epoch": 17.13050314465409, "percentage": 85.65, "elapsed_time": "1:24:12", "remaining_time": "0:14:06", "throughput": 4220.62, "total_tokens": 21324968}
|
| 6555 |
+
{"current_steps": 32690, "total_steps": 38160, "loss": 0.3503, "lr": 0.0018395406482435866, "epoch": 17.133123689727462, "percentage": 85.67, "elapsed_time": "1:24:13", "remaining_time": "0:14:05", "throughput": 4220.68, "total_tokens": 21328360}
|
| 6556 |
+
{"current_steps": 32695, "total_steps": 38160, "loss": 0.3586, "lr": 0.0018362501481418397, "epoch": 17.13574423480084, "percentage": 85.68, "elapsed_time": "1:24:14", "remaining_time": "0:14:04", "throughput": 4220.68, "total_tokens": 21331336}
|
| 6557 |
+
{"current_steps": 32700, "total_steps": 38160, "loss": 0.488, "lr": 0.0018329624017463808, "epoch": 17.138364779874212, "percentage": 85.69, "elapsed_time": "1:24:14", "remaining_time": "0:14:03", "throughput": 4220.69, "total_tokens": 21334344}
|
| 6558 |
+
{"current_steps": 32705, "total_steps": 38160, "loss": 0.4184, "lr": 0.001829677409744968, "epoch": 17.14098532494759, "percentage": 85.7, "elapsed_time": "1:24:15", "remaining_time": "0:14:03", "throughput": 4220.96, "total_tokens": 21340360}
|
| 6559 |
+
{"current_steps": 32710, "total_steps": 38160, "loss": 0.3368, "lr": 0.0018263951728247883, "epoch": 17.143605870020963, "percentage": 85.72, "elapsed_time": "1:24:16", "remaining_time": "0:14:02", "throughput": 4220.96, "total_tokens": 21343176}
|
| 6560 |
+
{"current_steps": 32715, "total_steps": 38160, "loss": 0.3526, "lr": 0.0018231156916724443, "epoch": 17.14622641509434, "percentage": 85.73, "elapsed_time": "1:24:17", "remaining_time": "0:14:01", "throughput": 4220.96, "total_tokens": 21346024}
|
| 6561 |
+
{"current_steps": 32720, "total_steps": 38160, "loss": 0.3552, "lr": 0.0018198389669739723, "epoch": 17.148846960167713, "percentage": 85.74, "elapsed_time": "1:24:17", "remaining_time": "0:14:00", "throughput": 4221.06, "total_tokens": 21350120}
|
| 6562 |
+
{"current_steps": 32725, "total_steps": 38160, "loss": 0.4589, "lr": 0.0018165649994148203, "epoch": 17.15146750524109, "percentage": 85.76, "elapsed_time": "1:24:18", "remaining_time": "0:14:00", "throughput": 4221.12, "total_tokens": 21353608}
|
| 6563 |
+
{"current_steps": 32730, "total_steps": 38160, "loss": 0.3645, "lr": 0.0018132937896798622, "epoch": 17.154088050314467, "percentage": 85.77, "elapsed_time": "1:24:19", "remaining_time": "0:13:59", "throughput": 4221.29, "total_tokens": 21358376}
|
| 6564 |
+
{"current_steps": 32735, "total_steps": 38160, "loss": 0.2894, "lr": 0.001810025338453401, "epoch": 17.15670859538784, "percentage": 85.78, "elapsed_time": "1:24:20", "remaining_time": "0:13:58", "throughput": 4221.28, "total_tokens": 21361128}
|
| 6565 |
+
{"current_steps": 32740, "total_steps": 38160, "loss": 0.3659, "lr": 0.0018067596464191608, "epoch": 17.159329140461217, "percentage": 85.8, "elapsed_time": "1:24:21", "remaining_time": "0:13:57", "throughput": 4221.34, "total_tokens": 21364584}
|
| 6566 |
+
{"current_steps": 32745, "total_steps": 38160, "loss": 0.3359, "lr": 0.001803496714260289, "epoch": 17.16194968553459, "percentage": 85.81, "elapsed_time": "1:24:21", "remaining_time": "0:13:57", "throughput": 4221.41, "total_tokens": 21368168}
|
| 6567 |
+
{"current_steps": 32750, "total_steps": 38160, "loss": 0.3017, "lr": 0.0018002365426593512, "epoch": 17.164570230607968, "percentage": 85.82, "elapsed_time": "1:24:22", "remaining_time": "0:13:56", "throughput": 4221.46, "total_tokens": 21371752}
|
| 6568 |
+
{"current_steps": 32755, "total_steps": 38160, "loss": 0.4353, "lr": 0.0017969791322983352, "epoch": 17.16719077568134, "percentage": 85.84, "elapsed_time": "1:24:23", "remaining_time": "0:13:55", "throughput": 4221.45, "total_tokens": 21374632}
|
| 6569 |
+
{"current_steps": 32760, "total_steps": 38160, "loss": 0.3639, "lr": 0.0017937244838586556, "epoch": 17.169811320754718, "percentage": 85.85, "elapsed_time": "1:24:24", "remaining_time": "0:13:54", "throughput": 4221.46, "total_tokens": 21377608}
|
| 6570 |
+
{"current_steps": 32765, "total_steps": 38160, "loss": 0.4024, "lr": 0.00179047259802115, "epoch": 17.17243186582809, "percentage": 85.86, "elapsed_time": "1:24:24", "remaining_time": "0:13:53", "throughput": 4221.53, "total_tokens": 21381192}
|
| 6571 |
+
{"current_steps": 32770, "total_steps": 38160, "loss": 0.4633, "lr": 0.001787223475466077, "epoch": 17.17505241090147, "percentage": 85.88, "elapsed_time": "1:24:25", "remaining_time": "0:13:53", "throughput": 4221.54, "total_tokens": 21384168}
|
| 6572 |
+
{"current_steps": 32775, "total_steps": 38160, "loss": 0.3457, "lr": 0.0017839771168731099, "epoch": 17.177672955974842, "percentage": 85.89, "elapsed_time": "1:24:26", "remaining_time": "0:13:52", "throughput": 4221.56, "total_tokens": 21387240}
|
| 6573 |
+
{"current_steps": 32780, "total_steps": 38160, "loss": 0.2905, "lr": 0.001780733522921357, "epoch": 17.18029350104822, "percentage": 85.9, "elapsed_time": "1:24:26", "remaining_time": "0:13:51", "throughput": 4221.57, "total_tokens": 21390216}
|
| 6574 |
+
{"current_steps": 32785, "total_steps": 38160, "loss": 0.3821, "lr": 0.001777492694289336, "epoch": 17.182914046121592, "percentage": 85.91, "elapsed_time": "1:24:27", "remaining_time": "0:13:50", "throughput": 4221.6, "total_tokens": 21393544}
|
| 6575 |
+
{"current_steps": 32790, "total_steps": 38160, "loss": 0.3825, "lr": 0.0017742546316549923, "epoch": 17.18553459119497, "percentage": 85.93, "elapsed_time": "1:24:28", "remaining_time": "0:13:50", "throughput": 4221.69, "total_tokens": 21397288}
|
| 6576 |
+
{"current_steps": 32795, "total_steps": 38160, "loss": 0.2817, "lr": 0.001771019335695691, "epoch": 17.188155136268342, "percentage": 85.94, "elapsed_time": "1:24:29", "remaining_time": "0:13:49", "throughput": 4221.66, "total_tokens": 21399912}
|
| 6577 |
+
{"current_steps": 32800, "total_steps": 38160, "loss": 0.347, "lr": 0.001767786807088224, "epoch": 17.19077568134172, "percentage": 85.95, "elapsed_time": "1:24:29", "remaining_time": "0:13:48", "throughput": 4221.64, "total_tokens": 21402472}
|
| 6578 |
+
{"current_steps": 32805, "total_steps": 38160, "loss": 0.3228, "lr": 0.001764557046508794, "epoch": 17.193396226415093, "percentage": 85.97, "elapsed_time": "1:24:30", "remaining_time": "0:13:47", "throughput": 4221.63, "total_tokens": 21405352}
|
| 6579 |
+
{"current_steps": 32810, "total_steps": 38160, "loss": 0.3025, "lr": 0.001761330054633035, "epoch": 17.19601677148847, "percentage": 85.98, "elapsed_time": "1:24:31", "remaining_time": "0:13:46", "throughput": 4221.61, "total_tokens": 21408008}
|
| 6580 |
+
{"current_steps": 32815, "total_steps": 38160, "loss": 0.4617, "lr": 0.0017581058321359893, "epoch": 17.198637316561843, "percentage": 85.99, "elapsed_time": "1:24:31", "remaining_time": "0:13:46", "throughput": 4221.7, "total_tokens": 21411880}
|
| 6581 |
+
{"current_steps": 32820, "total_steps": 38160, "loss": 0.3466, "lr": 0.0017548843796921336, "epoch": 17.20125786163522, "percentage": 86.01, "elapsed_time": "1:24:32", "remaining_time": "0:13:45", "throughput": 4221.68, "total_tokens": 21414504}
|
| 6582 |
+
{"current_steps": 32825, "total_steps": 38160, "loss": 0.3209, "lr": 0.001751665697975359, "epoch": 17.203878406708597, "percentage": 86.02, "elapsed_time": "1:24:33", "remaining_time": "0:13:44", "throughput": 4221.7, "total_tokens": 21417608}
|
| 6583 |
+
{"current_steps": 32830, "total_steps": 38160, "loss": 0.3877, "lr": 0.0017484497876589739, "epoch": 17.20649895178197, "percentage": 86.03, "elapsed_time": "1:24:33", "remaining_time": "0:13:43", "throughput": 4221.72, "total_tokens": 21420712}
|
| 6584 |
+
{"current_steps": 32835, "total_steps": 38160, "loss": 0.4841, "lr": 0.0017452366494157144, "epoch": 17.209119496855347, "percentage": 86.05, "elapsed_time": "1:24:34", "remaining_time": "0:13:42", "throughput": 4221.7, "total_tokens": 21423400}
|
| 6585 |
+
{"current_steps": 32840, "total_steps": 38160, "loss": 0.4041, "lr": 0.0017420262839177253, "epoch": 17.21174004192872, "percentage": 86.06, "elapsed_time": "1:24:35", "remaining_time": "0:13:42", "throughput": 4221.76, "total_tokens": 21426888}
|
| 6586 |
+
{"current_steps": 32845, "total_steps": 38160, "loss": 0.3786, "lr": 0.0017388186918365837, "epoch": 17.214360587002098, "percentage": 86.07, "elapsed_time": "1:24:36", "remaining_time": "0:13:41", "throughput": 4221.76, "total_tokens": 21429736}
|
| 6587 |
+
{"current_steps": 32850, "total_steps": 38160, "loss": 0.3619, "lr": 0.0017356138738432818, "epoch": 17.21698113207547, "percentage": 86.08, "elapsed_time": "1:24:36", "remaining_time": "0:13:40", "throughput": 4221.82, "total_tokens": 21433320}
|
| 6588 |
+
{"current_steps": 32855, "total_steps": 38160, "loss": 0.317, "lr": 0.0017324118306082281, "epoch": 17.219601677148848, "percentage": 86.1, "elapsed_time": "1:24:37", "remaining_time": "0:13:39", "throughput": 4221.85, "total_tokens": 21436584}
|
| 6589 |
+
{"current_steps": 32860, "total_steps": 38160, "loss": 0.4705, "lr": 0.0017292125628012532, "epoch": 17.22222222222222, "percentage": 86.11, "elapsed_time": "1:24:38", "remaining_time": "0:13:39", "throughput": 4221.85, "total_tokens": 21439528}
|
| 6590 |
+
{"current_steps": 32865, "total_steps": 38160, "loss": 0.3027, "lr": 0.0017260160710916139, "epoch": 17.2248427672956, "percentage": 86.12, "elapsed_time": "1:24:38", "remaining_time": "0:13:38", "throughput": 4221.9, "total_tokens": 21442824}
|
| 6591 |
+
{"current_steps": 32870, "total_steps": 38160, "loss": 0.4039, "lr": 0.00172282235614797, "epoch": 17.22746331236897, "percentage": 86.14, "elapsed_time": "1:24:39", "remaining_time": "0:13:37", "throughput": 4221.93, "total_tokens": 21446024}
|
| 6592 |
+
{"current_steps": 32875, "total_steps": 38160, "loss": 0.2764, "lr": 0.001719631418638417, "epoch": 17.23008385744235, "percentage": 86.15, "elapsed_time": "1:24:40", "remaining_time": "0:13:36", "throughput": 4221.91, "total_tokens": 21448744}
|
| 6593 |
+
{"current_steps": 32880, "total_steps": 38160, "loss": 0.3494, "lr": 0.0017164432592304628, "epoch": 17.232704402515722, "percentage": 86.16, "elapsed_time": "1:24:41", "remaining_time": "0:13:35", "throughput": 4221.92, "total_tokens": 21451688}
|
| 6594 |
+
{"current_steps": 32885, "total_steps": 38160, "loss": 0.318, "lr": 0.001713257878591029, "epoch": 17.2353249475891, "percentage": 86.18, "elapsed_time": "1:24:41", "remaining_time": "0:13:35", "throughput": 4221.85, "total_tokens": 21453864}
|
| 6595 |
+
{"current_steps": 32890, "total_steps": 38160, "loss": 0.3615, "lr": 0.0017100752773864625, "epoch": 17.237945492662472, "percentage": 86.19, "elapsed_time": "1:24:42", "remaining_time": "0:13:34", "throughput": 4221.87, "total_tokens": 21456936}
|
| 6596 |
+
{"current_steps": 32895, "total_steps": 38160, "loss": 0.3165, "lr": 0.0017068954562825338, "epoch": 17.24056603773585, "percentage": 86.2, "elapsed_time": "1:24:43", "remaining_time": "0:13:33", "throughput": 4221.92, "total_tokens": 21460424}
|
| 6597 |
+
{"current_steps": 32900, "total_steps": 38160, "loss": 0.3355, "lr": 0.0017037184159444162, "epoch": 17.243186582809223, "percentage": 86.22, "elapsed_time": "1:24:43", "remaining_time": "0:13:32", "throughput": 4221.89, "total_tokens": 21463016}
|
| 6598 |
+
{"current_steps": 32905, "total_steps": 38160, "loss": 0.3303, "lr": 0.0017005441570367163, "epoch": 17.2458071278826, "percentage": 86.23, "elapsed_time": "1:24:44", "remaining_time": "0:13:32", "throughput": 4221.96, "total_tokens": 21466632}
|
| 6599 |
+
{"current_steps": 32910, "total_steps": 38160, "loss": 0.3073, "lr": 0.0016973726802234478, "epoch": 17.248427672955973, "percentage": 86.24, "elapsed_time": "1:24:45", "remaining_time": "0:13:31", "throughput": 4221.97, "total_tokens": 21469544}
|
| 6600 |
+
{"current_steps": 32915, "total_steps": 38160, "loss": 0.2833, "lr": 0.0016942039861680502, "epoch": 17.25104821802935, "percentage": 86.26, "elapsed_time": "1:24:45", "remaining_time": "0:13:30", "throughput": 4221.93, "total_tokens": 21472040}
|
| 6601 |
+
{"current_steps": 32920, "total_steps": 38160, "loss": 0.3187, "lr": 0.0016910380755333781, "epoch": 17.253668763102727, "percentage": 86.27, "elapsed_time": "1:24:46", "remaining_time": "0:13:29", "throughput": 4222.02, "total_tokens": 21475784}
|
| 6602 |
+
{"current_steps": 32925, "total_steps": 38160, "loss": 0.3059, "lr": 0.0016878749489817068, "epoch": 17.2562893081761, "percentage": 86.28, "elapsed_time": "1:24:47", "remaining_time": "0:13:28", "throughput": 4222.11, "total_tokens": 21479656}
|
| 6603 |
+
{"current_steps": 32930, "total_steps": 38160, "loss": 0.3022, "lr": 0.0016847146071747236, "epoch": 17.258909853249477, "percentage": 86.29, "elapsed_time": "1:24:48", "remaining_time": "0:13:28", "throughput": 4222.13, "total_tokens": 21482824}
|
| 6604 |
+
{"current_steps": 32935, "total_steps": 38160, "loss": 0.3187, "lr": 0.0016815570507735334, "epoch": 17.26153039832285, "percentage": 86.31, "elapsed_time": "1:24:48", "remaining_time": "0:13:27", "throughput": 4222.12, "total_tokens": 21485672}
|
| 6605 |
+
{"current_steps": 32940, "total_steps": 38160, "loss": 0.3124, "lr": 0.0016784022804386622, "epoch": 17.264150943396228, "percentage": 86.32, "elapsed_time": "1:24:49", "remaining_time": "0:13:26", "throughput": 4222.13, "total_tokens": 21488616}
|
| 6606 |
+
{"current_steps": 32945, "total_steps": 38160, "loss": 0.3186, "lr": 0.0016752502968300541, "epoch": 17.2667714884696, "percentage": 86.33, "elapsed_time": "1:24:50", "remaining_time": "0:13:25", "throughput": 4222.07, "total_tokens": 21490952}
|
| 6607 |
+
{"current_steps": 32950, "total_steps": 38160, "loss": 0.3617, "lr": 0.00167210110060707, "epoch": 17.269392033542978, "percentage": 86.35, "elapsed_time": "1:24:50", "remaining_time": "0:13:24", "throughput": 4222.05, "total_tokens": 21493832}
|
| 6608 |
+
{"current_steps": 32955, "total_steps": 38160, "loss": 0.3026, "lr": 0.0016689546924284798, "epoch": 17.27201257861635, "percentage": 86.36, "elapsed_time": "1:24:51", "remaining_time": "0:13:24", "throughput": 4221.99, "total_tokens": 21496136}
|
| 6609 |
+
{"current_steps": 32960, "total_steps": 38160, "loss": 0.3869, "lr": 0.0016658110729524816, "epoch": 17.27463312368973, "percentage": 86.37, "elapsed_time": "1:24:52", "remaining_time": "0:13:23", "throughput": 4222.03, "total_tokens": 21499432}
|
| 6610 |
+
{"current_steps": 32965, "total_steps": 38160, "loss": 0.4947, "lr": 0.0016626702428366796, "epoch": 17.2772536687631, "percentage": 86.39, "elapsed_time": "1:24:52", "remaining_time": "0:13:22", "throughput": 4222.04, "total_tokens": 21502472}
|
| 6611 |
+
{"current_steps": 32970, "total_steps": 38160, "loss": 0.2648, "lr": 0.0016595322027381032, "epoch": 17.27987421383648, "percentage": 86.4, "elapsed_time": "1:24:53", "remaining_time": "0:13:21", "throughput": 4222.1, "total_tokens": 21506088}
|
| 6612 |
+
{"current_steps": 32975, "total_steps": 38160, "loss": 0.2912, "lr": 0.0016563969533131939, "epoch": 17.282494758909852, "percentage": 86.41, "elapsed_time": "1:24:54", "remaining_time": "0:13:21", "throughput": 4222.09, "total_tokens": 21508904}
|
| 6613 |
+
{"current_steps": 32980, "total_steps": 38160, "loss": 0.299, "lr": 0.0016532644952178138, "epoch": 17.28511530398323, "percentage": 86.43, "elapsed_time": "1:24:55", "remaining_time": "0:13:20", "throughput": 4222.08, "total_tokens": 21511624}
|
| 6614 |
+
{"current_steps": 32985, "total_steps": 38160, "loss": 0.2494, "lr": 0.001650134829107232, "epoch": 17.287735849056602, "percentage": 86.44, "elapsed_time": "1:24:55", "remaining_time": "0:13:19", "throughput": 4222.08, "total_tokens": 21514504}
|
| 6615 |
+
{"current_steps": 32990, "total_steps": 38160, "loss": 0.4217, "lr": 0.0016470079556361382, "epoch": 17.29035639412998, "percentage": 86.45, "elapsed_time": "1:24:56", "remaining_time": "0:13:18", "throughput": 4222.07, "total_tokens": 21517256}
|
| 6616 |
+
{"current_steps": 32995, "total_steps": 38160, "loss": 0.3217, "lr": 0.001643883875458641, "epoch": 17.292976939203353, "percentage": 86.46, "elapsed_time": "1:24:57", "remaining_time": "0:13:17", "throughput": 4222.1, "total_tokens": 21520648}
|
| 6617 |
+
{"current_steps": 33000, "total_steps": 38160, "loss": 0.3184, "lr": 0.0016407625892282639, "epoch": 17.29559748427673, "percentage": 86.48, "elapsed_time": "1:24:57", "remaining_time": "0:13:17", "throughput": 4222.11, "total_tokens": 21523688}
|
| 6618 |
+
{"current_steps": 33005, "total_steps": 38160, "loss": 0.395, "lr": 0.0016376440975979416, "epoch": 17.298218029350103, "percentage": 86.49, "elapsed_time": "1:24:58", "remaining_time": "0:13:16", "throughput": 4222.14, "total_tokens": 21527016}
|
| 6619 |
+
{"current_steps": 33010, "total_steps": 38160, "loss": 0.3319, "lr": 0.0016345284012200339, "epoch": 17.30083857442348, "percentage": 86.5, "elapsed_time": "1:24:59", "remaining_time": "0:13:15", "throughput": 4222.2, "total_tokens": 21530408}
|
| 6620 |
+
{"current_steps": 33015, "total_steps": 38160, "loss": 0.4028, "lr": 0.0016314155007462977, "epoch": 17.303459119496857, "percentage": 86.52, "elapsed_time": "1:25:00", "remaining_time": "0:13:14", "throughput": 4222.19, "total_tokens": 21533288}
|
| 6621 |
+
{"current_steps": 33020, "total_steps": 38160, "loss": 0.3355, "lr": 0.0016283053968279221, "epoch": 17.30607966457023, "percentage": 86.53, "elapsed_time": "1:25:00", "remaining_time": "0:13:13", "throughput": 4222.23, "total_tokens": 21536520}
|
| 6622 |
+
{"current_steps": 33025, "total_steps": 38160, "loss": 0.3896, "lr": 0.001625198090115505, "epoch": 17.308700209643607, "percentage": 86.54, "elapsed_time": "1:25:01", "remaining_time": "0:13:13", "throughput": 4222.2, "total_tokens": 21539304}
|
| 6623 |
+
{"current_steps": 33030, "total_steps": 38160, "loss": 0.3091, "lr": 0.0016220935812590598, "epoch": 17.31132075471698, "percentage": 86.56, "elapsed_time": "1:25:02", "remaining_time": "0:13:12", "throughput": 4222.22, "total_tokens": 21542472}
|
| 6624 |
+
{"current_steps": 33035, "total_steps": 38160, "loss": 0.3956, "lr": 0.0016189918709080169, "epoch": 17.313941299790358, "percentage": 86.57, "elapsed_time": "1:25:02", "remaining_time": "0:13:11", "throughput": 4222.25, "total_tokens": 21545736}
|
| 6625 |
+
{"current_steps": 33040, "total_steps": 38160, "loss": 0.4414, "lr": 0.001615892959711216, "epoch": 17.31656184486373, "percentage": 86.58, "elapsed_time": "1:25:03", "remaining_time": "0:13:10", "throughput": 4222.26, "total_tokens": 21548744}
|
| 6626 |
+
{"current_steps": 33045, "total_steps": 38160, "loss": 0.4363, "lr": 0.0016127968483169114, "epoch": 17.319182389937108, "percentage": 86.6, "elapsed_time": "1:25:04", "remaining_time": "0:13:10", "throughput": 4222.26, "total_tokens": 21551592}
|
| 6627 |
+
{"current_steps": 33050, "total_steps": 38160, "loss": 0.345, "lr": 0.0016097035373727768, "epoch": 17.32180293501048, "percentage": 86.61, "elapsed_time": "1:25:04", "remaining_time": "0:13:09", "throughput": 4222.24, "total_tokens": 21554216}
|
| 6628 |
+
{"current_steps": 33055, "total_steps": 38160, "loss": 0.2501, "lr": 0.0016066130275258965, "epoch": 17.32442348008386, "percentage": 86.62, "elapsed_time": "1:25:05", "remaining_time": "0:13:08", "throughput": 4222.21, "total_tokens": 21556776}
|
| 6629 |
+
{"current_steps": 33060, "total_steps": 38160, "loss": 0.3911, "lr": 0.0016035253194227749, "epoch": 17.32704402515723, "percentage": 86.64, "elapsed_time": "1:25:06", "remaining_time": "0:13:07", "throughput": 4222.24, "total_tokens": 21559976}
|
| 6630 |
+
{"current_steps": 33065, "total_steps": 38160, "loss": 0.3528, "lr": 0.0016004404137093219, "epoch": 17.32966457023061, "percentage": 86.65, "elapsed_time": "1:25:06", "remaining_time": "0:13:06", "throughput": 4222.23, "total_tokens": 21562792}
|
| 6631 |
+
{"current_steps": 33070, "total_steps": 38160, "loss": 0.4055, "lr": 0.0015973583110308598, "epoch": 17.332285115303982, "percentage": 86.66, "elapsed_time": "1:25:07", "remaining_time": "0:13:06", "throughput": 4222.24, "total_tokens": 21565736}
|
| 6632 |
+
{"current_steps": 33075, "total_steps": 38160, "loss": 0.3064, "lr": 0.0015942790120321343, "epoch": 17.33490566037736, "percentage": 86.67, "elapsed_time": "1:25:08", "remaining_time": "0:13:05", "throughput": 4222.35, "total_tokens": 21569864}
|
| 6633 |
+
{"current_steps": 33080, "total_steps": 38160, "loss": 0.2279, "lr": 0.0015912025173572986, "epoch": 17.337526205450732, "percentage": 86.69, "elapsed_time": "1:25:09", "remaining_time": "0:13:04", "throughput": 4222.35, "total_tokens": 21572776}
|
| 6634 |
+
{"current_steps": 33085, "total_steps": 38160, "loss": 0.536, "lr": 0.001588128827649921, "epoch": 17.34014675052411, "percentage": 86.7, "elapsed_time": "1:25:09", "remaining_time": "0:13:03", "throughput": 4222.43, "total_tokens": 21576584}
|
| 6635 |
+
{"current_steps": 33090, "total_steps": 38160, "loss": 0.3323, "lr": 0.001585057943552986, "epoch": 17.342767295597483, "percentage": 86.71, "elapsed_time": "1:25:10", "remaining_time": "0:13:03", "throughput": 4222.44, "total_tokens": 21579496}
|
| 6636 |
+
{"current_steps": 33095, "total_steps": 38160, "loss": 0.392, "lr": 0.0015819898657088826, "epoch": 17.34538784067086, "percentage": 86.73, "elapsed_time": "1:25:11", "remaining_time": "0:13:02", "throughput": 4222.42, "total_tokens": 21582120}
|
| 6637 |
+
{"current_steps": 33100, "total_steps": 38160, "loss": 0.3889, "lr": 0.0015789245947594142, "epoch": 17.348008385744233, "percentage": 86.74, "elapsed_time": "1:25:12", "remaining_time": "0:13:01", "throughput": 4222.44, "total_tokens": 21585224}
|
| 6638 |
+
{"current_steps": 33105, "total_steps": 38160, "loss": 0.2715, "lr": 0.0015758621313458065, "epoch": 17.35062893081761, "percentage": 86.75, "elapsed_time": "1:25:12", "remaining_time": "0:13:00", "throughput": 4222.46, "total_tokens": 21588264}
|
| 6639 |
+
{"current_steps": 33110, "total_steps": 38160, "loss": 0.4209, "lr": 0.00157280247610869, "epoch": 17.353249475890987, "percentage": 86.77, "elapsed_time": "1:25:13", "remaining_time": "0:12:59", "throughput": 4222.49, "total_tokens": 21591464}
|
| 6640 |
+
{"current_steps": 33115, "total_steps": 38160, "loss": 0.3255, "lr": 0.0015697456296881128, "epoch": 17.35587002096436, "percentage": 86.78, "elapsed_time": "1:25:14", "remaining_time": "0:12:59", "throughput": 4222.45, "total_tokens": 21594024}
|
| 6641 |
+
{"current_steps": 33120, "total_steps": 38160, "loss": 0.4112, "lr": 0.0015666915927235285, "epoch": 17.358490566037737, "percentage": 86.79, "elapsed_time": "1:25:14", "remaining_time": "0:12:58", "throughput": 4222.44, "total_tokens": 21596776}
|
| 6642 |
+
{"current_steps": 33125, "total_steps": 38160, "loss": 0.2347, "lr": 0.001563640365853806, "epoch": 17.36111111111111, "percentage": 86.81, "elapsed_time": "1:25:15", "remaining_time": "0:12:57", "throughput": 4222.5, "total_tokens": 21600392}
|
| 6643 |
+
{"current_steps": 33130, "total_steps": 38160, "loss": 0.5085, "lr": 0.001560591949717228, "epoch": 17.363731656184488, "percentage": 86.82, "elapsed_time": "1:25:16", "remaining_time": "0:12:56", "throughput": 4222.56, "total_tokens": 21603880}
|
| 6644 |
+
{"current_steps": 33135, "total_steps": 38160, "loss": 0.3387, "lr": 0.0015575463449514898, "epoch": 17.36635220125786, "percentage": 86.83, "elapsed_time": "1:25:16", "remaining_time": "0:12:56", "throughput": 4222.54, "total_tokens": 21606600}
|
| 6645 |
+
{"current_steps": 33140, "total_steps": 38160, "loss": 0.4815, "lr": 0.0015545035521936995, "epoch": 17.368972746331238, "percentage": 86.84, "elapsed_time": "1:25:17", "remaining_time": "0:12:55", "throughput": 4222.67, "total_tokens": 21610856}
|
| 6646 |
+
{"current_steps": 33145, "total_steps": 38160, "loss": 0.3272, "lr": 0.0015514635720803682, "epoch": 17.37159329140461, "percentage": 86.86, "elapsed_time": "1:25:18", "remaining_time": "0:12:54", "throughput": 4222.78, "total_tokens": 21615016}
|
| 6647 |
+
{"current_steps": 33150, "total_steps": 38160, "loss": 0.409, "lr": 0.0015484264052474323, "epoch": 17.37421383647799, "percentage": 86.87, "elapsed_time": "1:25:19", "remaining_time": "0:12:53", "throughput": 4222.87, "total_tokens": 21618920}
|
| 6648 |
+
{"current_steps": 33155, "total_steps": 38160, "loss": 0.2537, "lr": 0.0015453920523302266, "epoch": 17.37683438155136, "percentage": 86.88, "elapsed_time": "1:25:20", "remaining_time": "0:12:52", "throughput": 4222.89, "total_tokens": 21621960}
|
| 6649 |
+
{"current_steps": 33160, "total_steps": 38160, "loss": 0.3156, "lr": 0.0015423605139635037, "epoch": 17.37945492662474, "percentage": 86.9, "elapsed_time": "1:25:20", "remaining_time": "0:12:52", "throughput": 4222.87, "total_tokens": 21624584}
|
| 6650 |
+
{"current_steps": 33165, "total_steps": 38160, "loss": 0.3202, "lr": 0.0015393317907814296, "epoch": 17.382075471698112, "percentage": 86.91, "elapsed_time": "1:25:21", "remaining_time": "0:12:51", "throughput": 4222.93, "total_tokens": 21628200}
|
| 6651 |
+
{"current_steps": 33170, "total_steps": 38160, "loss": 0.2614, "lr": 0.001536305883417579, "epoch": 17.38469601677149, "percentage": 86.92, "elapsed_time": "1:25:22", "remaining_time": "0:12:50", "throughput": 4222.91, "total_tokens": 21630792}
|
| 6652 |
+
{"current_steps": 33175, "total_steps": 38160, "loss": 0.4361, "lr": 0.001533282792504934, "epoch": 17.387316561844862, "percentage": 86.94, "elapsed_time": "1:25:22", "remaining_time": "0:12:49", "throughput": 4222.9, "total_tokens": 21633480}
|
| 6653 |
+
{"current_steps": 33180, "total_steps": 38160, "loss": 0.3617, "lr": 0.0015302625186758938, "epoch": 17.38993710691824, "percentage": 86.95, "elapsed_time": "1:25:23", "remaining_time": "0:12:49", "throughput": 4222.92, "total_tokens": 21636584}
|
| 6654 |
+
{"current_steps": 33185, "total_steps": 38160, "loss": 0.3276, "lr": 0.0015272450625622629, "epoch": 17.392557651991613, "percentage": 86.96, "elapsed_time": "1:25:24", "remaining_time": "0:12:48", "throughput": 4222.94, "total_tokens": 21639624}
|
| 6655 |
+
{"current_steps": 33190, "total_steps": 38160, "loss": 0.446, "lr": 0.0015242304247952581, "epoch": 17.39517819706499, "percentage": 86.98, "elapsed_time": "1:25:24", "remaining_time": "0:12:47", "throughput": 4222.92, "total_tokens": 21642248}
|
| 6656 |
+
{"current_steps": 33195, "total_steps": 38160, "loss": 0.4489, "lr": 0.0015212186060055116, "epoch": 17.397798742138363, "percentage": 86.99, "elapsed_time": "1:25:25", "remaining_time": "0:12:46", "throughput": 4222.96, "total_tokens": 21645576}
|
| 6657 |
+
{"current_steps": 33200, "total_steps": 38160, "loss": 0.4349, "lr": 0.0015182096068230577, "epoch": 17.40041928721174, "percentage": 87.0, "elapsed_time": "1:25:26", "remaining_time": "0:12:45", "throughput": 4222.98, "total_tokens": 21648712}
|
| 6658 |
+
{"current_steps": 33205, "total_steps": 38160, "loss": 0.3886, "lr": 0.0015152034278773463, "epoch": 17.403039832285117, "percentage": 87.02, "elapsed_time": "1:25:27", "remaining_time": "0:12:45", "throughput": 4222.98, "total_tokens": 21651624}
|
| 6659 |
+
{"current_steps": 33210, "total_steps": 38160, "loss": 0.4416, "lr": 0.0015122000697972327, "epoch": 17.40566037735849, "percentage": 87.03, "elapsed_time": "1:25:27", "remaining_time": "0:12:44", "throughput": 4223.08, "total_tokens": 21655624}
|
| 6660 |
+
{"current_steps": 33215, "total_steps": 38160, "loss": 0.413, "lr": 0.0015091995332109874, "epoch": 17.408280922431867, "percentage": 87.04, "elapsed_time": "1:25:28", "remaining_time": "0:12:43", "throughput": 4223.09, "total_tokens": 21658632}
|
| 6661 |
+
{"current_steps": 33220, "total_steps": 38160, "loss": 0.2863, "lr": 0.001506201818746292, "epoch": 17.41090146750524, "percentage": 87.05, "elapsed_time": "1:25:29", "remaining_time": "0:12:42", "throughput": 4223.12, "total_tokens": 21661736}
|
| 6662 |
+
{"current_steps": 33225, "total_steps": 38160, "loss": 0.4074, "lr": 0.0015032069270302284, "epoch": 17.413522012578618, "percentage": 87.07, "elapsed_time": "1:25:30", "remaining_time": "0:12:41", "throughput": 4223.13, "total_tokens": 21664776}
|
| 6663 |
+
{"current_steps": 33230, "total_steps": 38160, "loss": 0.3195, "lr": 0.001500214858689295, "epoch": 17.41614255765199, "percentage": 87.08, "elapsed_time": "1:25:30", "remaining_time": "0:12:41", "throughput": 4223.14, "total_tokens": 21667752}
|
| 6664 |
+
{"current_steps": 33235, "total_steps": 38160, "loss": 0.4871, "lr": 0.0014972256143494028, "epoch": 17.418763102725368, "percentage": 87.09, "elapsed_time": "1:25:31", "remaining_time": "0:12:40", "throughput": 4223.27, "total_tokens": 21672168}
|
| 6665 |
+
{"current_steps": 33240, "total_steps": 38160, "loss": 0.3707, "lr": 0.0014942391946358618, "epoch": 17.42138364779874, "percentage": 87.11, "elapsed_time": "1:25:32", "remaining_time": "0:12:39", "throughput": 4223.22, "total_tokens": 21674472}
|
| 6666 |
+
{"current_steps": 33245, "total_steps": 38160, "loss": 0.4961, "lr": 0.001491255600173399, "epoch": 17.42400419287212, "percentage": 87.12, "elapsed_time": "1:25:32", "remaining_time": "0:12:38", "throughput": 4223.24, "total_tokens": 21677608}
|
| 6667 |
+
{"current_steps": 33250, "total_steps": 38160, "loss": 0.3764, "lr": 0.0014882748315861514, "epoch": 17.42662473794549, "percentage": 87.13, "elapsed_time": "1:25:33", "remaining_time": "0:12:38", "throughput": 4223.25, "total_tokens": 21680552}
|
| 6668 |
+
{"current_steps": 33255, "total_steps": 38160, "loss": 0.3989, "lr": 0.0014852968894976575, "epoch": 17.42924528301887, "percentage": 87.15, "elapsed_time": "1:25:34", "remaining_time": "0:12:37", "throughput": 4223.18, "total_tokens": 21682696}
|
| 6669 |
+
{"current_steps": 33260, "total_steps": 38160, "loss": 0.3894, "lr": 0.0014823217745308687, "epoch": 17.431865828092242, "percentage": 87.16, "elapsed_time": "1:25:35", "remaining_time": "0:12:36", "throughput": 4223.31, "total_tokens": 21686920}
|
| 6670 |
+
{"current_steps": 33265, "total_steps": 38160, "loss": 0.2692, "lr": 0.0014793494873081502, "epoch": 17.43448637316562, "percentage": 87.17, "elapsed_time": "1:25:35", "remaining_time": "0:12:35", "throughput": 4223.28, "total_tokens": 21689416}
|
| 6671 |
+
{"current_steps": 33270, "total_steps": 38160, "loss": 0.2444, "lr": 0.001476380028451265, "epoch": 17.437106918238992, "percentage": 87.19, "elapsed_time": "1:25:36", "remaining_time": "0:12:34", "throughput": 4223.38, "total_tokens": 21693480}
|
| 6672 |
+
{"current_steps": 33275, "total_steps": 38160, "loss": 0.3571, "lr": 0.0014734133985813947, "epoch": 17.43972746331237, "percentage": 87.2, "elapsed_time": "1:25:37", "remaining_time": "0:12:34", "throughput": 4223.38, "total_tokens": 21696296}
|
| 6673 |
+
{"current_steps": 33280, "total_steps": 38160, "loss": 0.3066, "lr": 0.001470449598319119, "epoch": 17.442348008385743, "percentage": 87.21, "elapsed_time": "1:25:38", "remaining_time": "0:12:33", "throughput": 4223.6, "total_tokens": 21702088}
|
| 6674 |
+
{"current_steps": 33285, "total_steps": 38160, "loss": 0.4009, "lr": 0.001467488628284434, "epoch": 17.44496855345912, "percentage": 87.22, "elapsed_time": "1:25:38", "remaining_time": "0:12:32", "throughput": 4223.62, "total_tokens": 21705160}
|
| 6675 |
+
{"current_steps": 33290, "total_steps": 38160, "loss": 0.4519, "lr": 0.0014645304890967409, "epoch": 17.447589098532493, "percentage": 87.24, "elapsed_time": "1:25:39", "remaining_time": "0:12:31", "throughput": 4223.65, "total_tokens": 21708328}
|
| 6676 |
+
{"current_steps": 33295, "total_steps": 38160, "loss": 0.506, "lr": 0.0014615751813748528, "epoch": 17.45020964360587, "percentage": 87.25, "elapsed_time": "1:25:40", "remaining_time": "0:12:31", "throughput": 4223.88, "total_tokens": 21713768}
|
| 6677 |
+
{"current_steps": 33300, "total_steps": 38160, "loss": 0.3227, "lr": 0.0014586227057369805, "epoch": 17.452830188679247, "percentage": 87.26, "elapsed_time": "1:25:41", "remaining_time": "0:12:30", "throughput": 4223.93, "total_tokens": 21717192}
|
| 6678 |
+
{"current_steps": 33305, "total_steps": 38160, "loss": 0.4205, "lr": 0.0014556730628007479, "epoch": 17.45545073375262, "percentage": 87.28, "elapsed_time": "1:25:42", "remaining_time": "0:12:29", "throughput": 4224.03, "total_tokens": 21721384}
|
| 6679 |
+
{"current_steps": 33310, "total_steps": 38160, "loss": 0.3632, "lr": 0.0014527262531831885, "epoch": 17.458071278825997, "percentage": 87.29, "elapsed_time": "1:25:43", "remaining_time": "0:12:28", "throughput": 4224.05, "total_tokens": 21724392}
|
| 6680 |
+
{"current_steps": 33315, "total_steps": 38160, "loss": 0.4045, "lr": 0.0014497822775007424, "epoch": 17.46069182389937, "percentage": 87.3, "elapsed_time": "1:25:43", "remaining_time": "0:12:28", "throughput": 4224.04, "total_tokens": 21727208}
|
| 6681 |
+
{"current_steps": 33320, "total_steps": 38160, "loss": 0.2817, "lr": 0.0014468411363692567, "epoch": 17.463312368972748, "percentage": 87.32, "elapsed_time": "1:25:44", "remaining_time": "0:12:27", "throughput": 4224.03, "total_tokens": 21729960}
|
| 6682 |
+
{"current_steps": 33325, "total_steps": 38160, "loss": 0.3727, "lr": 0.0014439028304039797, "epoch": 17.46593291404612, "percentage": 87.33, "elapsed_time": "1:25:45", "remaining_time": "0:12:26", "throughput": 4224.15, "total_tokens": 21734216}
|
| 6683 |
+
{"current_steps": 33330, "total_steps": 38160, "loss": 0.4124, "lr": 0.0014409673602195777, "epoch": 17.468553459119498, "percentage": 87.34, "elapsed_time": "1:25:46", "remaining_time": "0:12:25", "throughput": 4224.25, "total_tokens": 21738152}
|
| 6684 |
+
{"current_steps": 33335, "total_steps": 38160, "loss": 0.3939, "lr": 0.0014380347264301113, "epoch": 17.47117400419287, "percentage": 87.36, "elapsed_time": "1:25:46", "remaining_time": "0:12:24", "throughput": 4224.3, "total_tokens": 21741576}
|
| 6685 |
+
{"current_steps": 33340, "total_steps": 38160, "loss": 0.2836, "lr": 0.0014351049296490565, "epoch": 17.47379454926625, "percentage": 87.37, "elapsed_time": "1:25:47", "remaining_time": "0:12:24", "throughput": 4224.37, "total_tokens": 21745224}
|
| 6686 |
+
{"current_steps": 33345, "total_steps": 38160, "loss": 0.4377, "lr": 0.0014321779704892944, "epoch": 17.47641509433962, "percentage": 87.38, "elapsed_time": "1:25:48", "remaining_time": "0:12:23", "throughput": 4224.4, "total_tokens": 21748424}
|
| 6687 |
+
{"current_steps": 33350, "total_steps": 38160, "loss": 0.3713, "lr": 0.001429253849563114, "epoch": 17.479035639413, "percentage": 87.4, "elapsed_time": "1:25:48", "remaining_time": "0:12:22", "throughput": 4224.41, "total_tokens": 21751336}
|
| 6688 |
+
{"current_steps": 33355, "total_steps": 38160, "loss": 0.3011, "lr": 0.001426332567482204, "epoch": 17.481656184486372, "percentage": 87.41, "elapsed_time": "1:25:49", "remaining_time": "0:12:21", "throughput": 4224.44, "total_tokens": 21754568}
|
| 6689 |
+
{"current_steps": 33360, "total_steps": 38160, "loss": 0.3048, "lr": 0.0014234141248576626, "epoch": 17.48427672955975, "percentage": 87.42, "elapsed_time": "1:25:50", "remaining_time": "0:12:21", "throughput": 4224.48, "total_tokens": 21757832}
|
| 6690 |
+
{"current_steps": 33365, "total_steps": 38160, "loss": 0.2928, "lr": 0.001420498522299996, "epoch": 17.486897274633122, "percentage": 87.43, "elapsed_time": "1:25:51", "remaining_time": "0:12:20", "throughput": 4224.5, "total_tokens": 21760968}
|
| 6691 |
+
{"current_steps": 33370, "total_steps": 38160, "loss": 0.3082, "lr": 0.0014175857604191166, "epoch": 17.4895178197065, "percentage": 87.45, "elapsed_time": "1:25:51", "remaining_time": "0:12:19", "throughput": 4224.62, "total_tokens": 21765160}
|
| 6692 |
+
{"current_steps": 33375, "total_steps": 38160, "loss": 0.4813, "lr": 0.0014146758398243403, "epoch": 17.492138364779873, "percentage": 87.46, "elapsed_time": "1:25:52", "remaining_time": "0:12:18", "throughput": 4224.66, "total_tokens": 21768584}
|
| 6693 |
+
{"current_steps": 33380, "total_steps": 38160, "loss": 0.3548, "lr": 0.00141176876112439, "epoch": 17.49475890985325, "percentage": 87.47, "elapsed_time": "1:25:53", "remaining_time": "0:12:17", "throughput": 4224.65, "total_tokens": 21771304}
|
| 6694 |
+
{"current_steps": 33385, "total_steps": 38160, "loss": 0.4201, "lr": 0.0014088645249273894, "epoch": 17.497379454926623, "percentage": 87.49, "elapsed_time": "1:25:54", "remaining_time": "0:12:17", "throughput": 4224.68, "total_tokens": 21774568}
|
| 6695 |
+
{"current_steps": 33390, "total_steps": 38160, "loss": 0.4537, "lr": 0.0014059631318408742, "epoch": 17.5, "percentage": 87.5, "elapsed_time": "1:25:54", "remaining_time": "0:12:16", "throughput": 4224.7, "total_tokens": 21777608}
|
| 6696 |
+
{"current_steps": 33395, "total_steps": 38160, "loss": 0.338, "lr": 0.0014030645824717825, "epoch": 17.502620545073377, "percentage": 87.51, "elapsed_time": "1:25:55", "remaining_time": "0:12:15", "throughput": 4224.77, "total_tokens": 21781288}
|
| 6697 |
+
{"current_steps": 33400, "total_steps": 38160, "loss": 0.4108, "lr": 0.0014001688774264586, "epoch": 17.50524109014675, "percentage": 87.53, "elapsed_time": "1:25:56", "remaining_time": "0:12:14", "throughput": 4224.77, "total_tokens": 21784168}
|
| 6698 |
+
{"current_steps": 33405, "total_steps": 38160, "loss": 0.3368, "lr": 0.0013972760173106536, "epoch": 17.507861635220127, "percentage": 87.54, "elapsed_time": "1:25:56", "remaining_time": "0:12:14", "throughput": 4224.75, "total_tokens": 21786760}
|
| 6699 |
+
{"current_steps": 33410, "total_steps": 38160, "loss": 0.3319, "lr": 0.0013943860027295178, "epoch": 17.5104821802935, "percentage": 87.55, "elapsed_time": "1:25:57", "remaining_time": "0:12:13", "throughput": 4224.72, "total_tokens": 21789352}
|
| 6700 |
+
{"current_steps": 33415, "total_steps": 38160, "loss": 0.4001, "lr": 0.001391498834287605, "epoch": 17.513102725366878, "percentage": 87.57, "elapsed_time": "1:25:58", "remaining_time": "0:12:12", "throughput": 4224.72, "total_tokens": 21792296}
|
| 6701 |
+
{"current_steps": 33420, "total_steps": 38160, "loss": 0.3236, "lr": 0.0013886145125888833, "epoch": 17.51572327044025, "percentage": 87.58, "elapsed_time": "1:25:58", "remaining_time": "0:12:11", "throughput": 4224.69, "total_tokens": 21794824}
|
| 6702 |
+
{"current_steps": 33425, "total_steps": 38160, "loss": 0.3415, "lr": 0.0013857330382367184, "epoch": 17.518343815513628, "percentage": 87.59, "elapsed_time": "1:25:59", "remaining_time": "0:12:10", "throughput": 4224.71, "total_tokens": 21797928}
|
| 6703 |
+
{"current_steps": 33430, "total_steps": 38160, "loss": 0.561, "lr": 0.0013828544118338847, "epoch": 17.520964360587, "percentage": 87.6, "elapsed_time": "1:26:00", "remaining_time": "0:12:10", "throughput": 4224.76, "total_tokens": 21801352}
|
| 6704 |
+
{"current_steps": 33435, "total_steps": 38160, "loss": 0.339, "lr": 0.001379978633982556, "epoch": 17.52358490566038, "percentage": 87.62, "elapsed_time": "1:26:01", "remaining_time": "0:12:09", "throughput": 4224.89, "total_tokens": 21805768}
|
| 6705 |
+
{"current_steps": 33440, "total_steps": 38160, "loss": 0.2803, "lr": 0.001377105705284307, "epoch": 17.52620545073375, "percentage": 87.63, "elapsed_time": "1:26:01", "remaining_time": "0:12:08", "throughput": 4224.89, "total_tokens": 21808648}
|
| 6706 |
+
{"current_steps": 33445, "total_steps": 38160, "loss": 0.3737, "lr": 0.001374235626340128, "epoch": 17.52882599580713, "percentage": 87.64, "elapsed_time": "1:26:02", "remaining_time": "0:12:07", "throughput": 4224.85, "total_tokens": 21811080}
|
| 6707 |
+
{"current_steps": 33450, "total_steps": 38160, "loss": 0.2897, "lr": 0.0013713683977504042, "epoch": 17.531446540880502, "percentage": 87.66, "elapsed_time": "1:26:03", "remaining_time": "0:12:07", "throughput": 4224.91, "total_tokens": 21814568}
|
| 6708 |
+
{"current_steps": 33455, "total_steps": 38160, "loss": 0.3355, "lr": 0.0013685040201149316, "epoch": 17.53406708595388, "percentage": 87.67, "elapsed_time": "1:26:03", "remaining_time": "0:12:06", "throughput": 4224.91, "total_tokens": 21817416}
|
| 6709 |
+
{"current_steps": 33460, "total_steps": 38160, "loss": 0.3509, "lr": 0.0013656424940328993, "epoch": 17.536687631027252, "percentage": 87.68, "elapsed_time": "1:26:04", "remaining_time": "0:12:05", "throughput": 4224.94, "total_tokens": 21820648}
|
| 6710 |
+
{"current_steps": 33465, "total_steps": 38160, "loss": 0.3862, "lr": 0.0013627838201029096, "epoch": 17.53930817610063, "percentage": 87.7, "elapsed_time": "1:26:05", "remaining_time": "0:12:04", "throughput": 4224.94, "total_tokens": 21823560}
|
| 6711 |
+
{"current_steps": 33470, "total_steps": 38160, "loss": 0.4364, "lr": 0.0013599279989229612, "epoch": 17.541928721174003, "percentage": 87.71, "elapsed_time": "1:26:06", "remaining_time": "0:12:03", "throughput": 4225.01, "total_tokens": 21827208}
|
| 6712 |
+
{"current_steps": 33475, "total_steps": 38160, "loss": 0.4299, "lr": 0.001357075031090461, "epoch": 17.54454926624738, "percentage": 87.72, "elapsed_time": "1:26:06", "remaining_time": "0:12:03", "throughput": 4225.01, "total_tokens": 21830120}
|
| 6713 |
+
{"current_steps": 33480, "total_steps": 38160, "loss": 0.3608, "lr": 0.001354224917202217, "epoch": 17.547169811320753, "percentage": 87.74, "elapsed_time": "1:26:07", "remaining_time": "0:12:02", "throughput": 4225.02, "total_tokens": 21833064}
|
| 6714 |
+
{"current_steps": 33485, "total_steps": 38160, "loss": 0.3748, "lr": 0.001351377657854445, "epoch": 17.54979035639413, "percentage": 87.75, "elapsed_time": "1:26:08", "remaining_time": "0:12:01", "throughput": 4224.99, "total_tokens": 21835752}
|
| 6715 |
+
{"current_steps": 33490, "total_steps": 38160, "loss": 0.4422, "lr": 0.001348533253642754, "epoch": 17.552410901467507, "percentage": 87.76, "elapsed_time": "1:26:08", "remaining_time": "0:12:00", "throughput": 4225.0, "total_tokens": 21838664}
|
| 6716 |
+
{"current_steps": 33495, "total_steps": 38160, "loss": 0.3835, "lr": 0.0013456917051621613, "epoch": 17.55503144654088, "percentage": 87.78, "elapsed_time": "1:26:09", "remaining_time": "0:12:00", "throughput": 4225.06, "total_tokens": 21842184}
|
| 6717 |
+
{"current_steps": 33500, "total_steps": 38160, "loss": 0.3616, "lr": 0.0013428530130070865, "epoch": 17.557651991614257, "percentage": 87.79, "elapsed_time": "1:26:10", "remaining_time": "0:11:59", "throughput": 4225.07, "total_tokens": 21845224}
|
| 6718 |
+
{"current_steps": 33505, "total_steps": 38160, "loss": 0.548, "lr": 0.001340017177771351, "epoch": 17.56027253668763, "percentage": 87.8, "elapsed_time": "1:26:11", "remaining_time": "0:11:58", "throughput": 4225.12, "total_tokens": 21848552}
|
| 6719 |
+
{"current_steps": 33510, "total_steps": 38160, "loss": 0.31, "lr": 0.0013371842000481854, "epoch": 17.562893081761008, "percentage": 87.81, "elapsed_time": "1:26:11", "remaining_time": "0:11:57", "throughput": 4225.07, "total_tokens": 21850856}
|
| 6720 |
+
{"current_steps": 33515, "total_steps": 38160, "loss": 0.2463, "lr": 0.0013343540804302073, "epoch": 17.56551362683438, "percentage": 87.83, "elapsed_time": "1:26:12", "remaining_time": "0:11:56", "throughput": 4225.25, "total_tokens": 21856104}
|
| 6721 |
+
{"current_steps": 33520, "total_steps": 38160, "loss": 0.2938, "lr": 0.0013315268195094531, "epoch": 17.568134171907758, "percentage": 87.84, "elapsed_time": "1:26:13", "remaining_time": "0:11:56", "throughput": 4225.2, "total_tokens": 21858440}
|
| 6722 |
+
{"current_steps": 33525, "total_steps": 38160, "loss": 0.5363, "lr": 0.0013287024178773464, "epoch": 17.57075471698113, "percentage": 87.85, "elapsed_time": "1:26:14", "remaining_time": "0:11:55", "throughput": 4225.17, "total_tokens": 21861064}
|
| 6723 |
+
{"current_steps": 33530, "total_steps": 38160, "loss": 0.2107, "lr": 0.0013258808761247248, "epoch": 17.57337526205451, "percentage": 87.87, "elapsed_time": "1:26:14", "remaining_time": "0:11:54", "throughput": 4225.16, "total_tokens": 21863848}
|
| 6724 |
+
{"current_steps": 33535, "total_steps": 38160, "loss": 0.3549, "lr": 0.0013230621948418224, "epoch": 17.57599580712788, "percentage": 87.88, "elapsed_time": "1:26:15", "remaining_time": "0:11:53", "throughput": 4225.2, "total_tokens": 21867208}
|
| 6725 |
+
{"current_steps": 33540, "total_steps": 38160, "loss": 0.333, "lr": 0.001320246374618273, "epoch": 17.57861635220126, "percentage": 87.89, "elapsed_time": "1:26:16", "remaining_time": "0:11:53", "throughput": 4225.36, "total_tokens": 21871944}
|
| 6726 |
+
{"current_steps": 33545, "total_steps": 38160, "loss": 0.1826, "lr": 0.001317433416043115, "epoch": 17.581236897274632, "percentage": 87.91, "elapsed_time": "1:26:17", "remaining_time": "0:11:52", "throughput": 4225.36, "total_tokens": 21874728}
|
| 6727 |
+
{"current_steps": 33550, "total_steps": 38160, "loss": 0.3017, "lr": 0.0013146233197047896, "epoch": 17.58385744234801, "percentage": 87.92, "elapsed_time": "1:26:17", "remaining_time": "0:11:51", "throughput": 4225.32, "total_tokens": 21877320}
|
| 6728 |
+
{"current_steps": 33555, "total_steps": 38160, "loss": 0.3312, "lr": 0.0013118160861911314, "epoch": 17.586477987421382, "percentage": 87.93, "elapsed_time": "1:26:18", "remaining_time": "0:11:50", "throughput": 4225.34, "total_tokens": 21880488}
|
| 6729 |
+
{"current_steps": 33560, "total_steps": 38160, "loss": 0.3926, "lr": 0.001309011716089386, "epoch": 17.58909853249476, "percentage": 87.95, "elapsed_time": "1:26:19", "remaining_time": "0:11:49", "throughput": 4225.31, "total_tokens": 21883080}
|
| 6730 |
+
{"current_steps": 33565, "total_steps": 38160, "loss": 0.4223, "lr": 0.0013062102099861955, "epoch": 17.591719077568133, "percentage": 87.96, "elapsed_time": "1:26:19", "remaining_time": "0:11:49", "throughput": 4225.37, "total_tokens": 21886536}
|
| 6731 |
+
{"current_steps": 33570, "total_steps": 38160, "loss": 0.3628, "lr": 0.001303411568467601, "epoch": 17.59433962264151, "percentage": 87.97, "elapsed_time": "1:26:20", "remaining_time": "0:11:48", "throughput": 4225.41, "total_tokens": 21889896}
|
| 6732 |
+
{"current_steps": 33575, "total_steps": 38160, "loss": 0.3385, "lr": 0.0013006157921190498, "epoch": 17.596960167714883, "percentage": 87.98, "elapsed_time": "1:26:21", "remaining_time": "0:11:47", "throughput": 4225.45, "total_tokens": 21893256}
|
| 6733 |
+
{"current_steps": 33580, "total_steps": 38160, "loss": 0.3295, "lr": 0.0012978228815253817, "epoch": 17.59958071278826, "percentage": 88.0, "elapsed_time": "1:26:21", "remaining_time": "0:11:46", "throughput": 4225.47, "total_tokens": 21896328}
|
| 6734 |
+
{"current_steps": 33585, "total_steps": 38160, "loss": 0.4199, "lr": 0.001295032837270843, "epoch": 17.602201257861637, "percentage": 88.01, "elapsed_time": "1:26:22", "remaining_time": "0:11:45", "throughput": 4225.44, "total_tokens": 21898856}
|
| 6735 |
+
{"current_steps": 33590, "total_steps": 38160, "loss": 0.3436, "lr": 0.0012922456599390834, "epoch": 17.60482180293501, "percentage": 88.02, "elapsed_time": "1:26:23", "remaining_time": "0:11:45", "throughput": 4225.46, "total_tokens": 21901992}
|
| 6736 |
+
{"current_steps": 33595, "total_steps": 38160, "loss": 0.2795, "lr": 0.0012894613501131435, "epoch": 17.607442348008387, "percentage": 88.04, "elapsed_time": "1:26:24", "remaining_time": "0:11:44", "throughput": 4225.46, "total_tokens": 21904904}
|
| 6737 |
+
{"current_steps": 33600, "total_steps": 38160, "loss": 0.3064, "lr": 0.0012866799083754703, "epoch": 17.61006289308176, "percentage": 88.05, "elapsed_time": "1:26:24", "remaining_time": "0:11:43", "throughput": 4225.55, "total_tokens": 21908808}
|
| 6738 |
+
{"current_steps": 33605, "total_steps": 38160, "loss": 0.335, "lr": 0.0012839013353079136, "epoch": 17.612683438155138, "percentage": 88.06, "elapsed_time": "1:26:25", "remaining_time": "0:11:42", "throughput": 4225.56, "total_tokens": 21911720}
|
| 6739 |
+
{"current_steps": 33610, "total_steps": 38160, "loss": 0.2681, "lr": 0.0012811256314917135, "epoch": 17.61530398322851, "percentage": 88.08, "elapsed_time": "1:26:26", "remaining_time": "0:11:42", "throughput": 4225.5, "total_tokens": 21914024}
|
| 6740 |
+
{"current_steps": 33615, "total_steps": 38160, "loss": 0.278, "lr": 0.0012783527975075203, "epoch": 17.617924528301888, "percentage": 88.09, "elapsed_time": "1:26:26", "remaining_time": "0:11:41", "throughput": 4225.48, "total_tokens": 21916648}
|
| 6741 |
+
{"current_steps": 33620, "total_steps": 38160, "loss": 0.359, "lr": 0.0012755828339353736, "epoch": 17.62054507337526, "percentage": 88.1, "elapsed_time": "1:26:27", "remaining_time": "0:11:40", "throughput": 4225.54, "total_tokens": 21920136}
|
| 6742 |
+
{"current_steps": 33625, "total_steps": 38160, "loss": 0.2832, "lr": 0.0012728157413547231, "epoch": 17.62316561844864, "percentage": 88.12, "elapsed_time": "1:26:28", "remaining_time": "0:11:39", "throughput": 4225.65, "total_tokens": 21924168}
|
| 6743 |
+
{"current_steps": 33630, "total_steps": 38160, "loss": 0.3578, "lr": 0.0012700515203444107, "epoch": 17.62578616352201, "percentage": 88.13, "elapsed_time": "1:26:29", "remaining_time": "0:11:39", "throughput": 4226.0, "total_tokens": 21931656}
|
| 6744 |
+
{"current_steps": 33635, "total_steps": 38160, "loss": 0.312, "lr": 0.0012672901714826839, "epoch": 17.62840670859539, "percentage": 88.14, "elapsed_time": "1:26:30", "remaining_time": "0:11:38", "throughput": 4225.96, "total_tokens": 21934184}
|
| 6745 |
+
{"current_steps": 33640, "total_steps": 38160, "loss": 0.4219, "lr": 0.001264531695347179, "epoch": 17.631027253668762, "percentage": 88.16, "elapsed_time": "1:26:31", "remaining_time": "0:11:37", "throughput": 4226.12, "total_tokens": 21938856}
|
| 6746 |
+
{"current_steps": 33645, "total_steps": 38160, "loss": 0.287, "lr": 0.0012617760925149428, "epoch": 17.63364779874214, "percentage": 88.17, "elapsed_time": "1:26:31", "remaining_time": "0:11:36", "throughput": 4226.11, "total_tokens": 21941736}
|
| 6747 |
+
{"current_steps": 33650, "total_steps": 38160, "loss": 0.4117, "lr": 0.0012590233635624126, "epoch": 17.636268343815512, "percentage": 88.18, "elapsed_time": "1:26:32", "remaining_time": "0:11:35", "throughput": 4226.17, "total_tokens": 21945256}
|
| 6748 |
+
{"current_steps": 33655, "total_steps": 38160, "loss": 0.3318, "lr": 0.0012562735090654292, "epoch": 17.63888888888889, "percentage": 88.19, "elapsed_time": "1:26:33", "remaining_time": "0:11:35", "throughput": 4226.1, "total_tokens": 21947432}
|
| 6749 |
+
{"current_steps": 33660, "total_steps": 38160, "loss": 0.4229, "lr": 0.0012535265295992308, "epoch": 17.641509433962263, "percentage": 88.21, "elapsed_time": "1:26:34", "remaining_time": "0:11:34", "throughput": 4226.13, "total_tokens": 21950664}
|
| 6750 |
+
{"current_steps": 33665, "total_steps": 38160, "loss": 0.3847, "lr": 0.0012507824257384581, "epoch": 17.64412997903564, "percentage": 88.22, "elapsed_time": "1:26:34", "remaining_time": "0:11:33", "throughput": 4226.13, "total_tokens": 21953704}
|
| 6751 |
+
{"current_steps": 33670, "total_steps": 38160, "loss": 0.2912, "lr": 0.0012480411980571415, "epoch": 17.646750524109013, "percentage": 88.23, "elapsed_time": "1:26:35", "remaining_time": "0:11:32", "throughput": 4226.17, "total_tokens": 21956936}
|
| 6752 |
+
{"current_steps": 33675, "total_steps": 38160, "loss": 0.3565, "lr": 0.0012453028471287124, "epoch": 17.64937106918239, "percentage": 88.25, "elapsed_time": "1:26:36", "remaining_time": "0:11:32", "throughput": 4226.2, "total_tokens": 21960200}
|
| 6753 |
+
{"current_steps": 33680, "total_steps": 38160, "loss": 0.3221, "lr": 0.0012425673735260061, "epoch": 17.651991614255767, "percentage": 88.26, "elapsed_time": "1:26:36", "remaining_time": "0:11:31", "throughput": 4226.18, "total_tokens": 21962824}
|
| 6754 |
+
{"current_steps": 33685, "total_steps": 38160, "loss": 0.3622, "lr": 0.0012398347778212531, "epoch": 17.65461215932914, "percentage": 88.27, "elapsed_time": "1:26:37", "remaining_time": "0:11:30", "throughput": 4226.25, "total_tokens": 21966504}
|
| 6755 |
+
{"current_steps": 33690, "total_steps": 38160, "loss": 0.347, "lr": 0.0012371050605860829, "epoch": 17.657232704402517, "percentage": 88.29, "elapsed_time": "1:26:38", "remaining_time": "0:11:29", "throughput": 4226.37, "total_tokens": 21970664}
|
| 6756 |
+
{"current_steps": 33695, "total_steps": 38160, "loss": 0.3037, "lr": 0.0012343782223915183, "epoch": 17.65985324947589, "percentage": 88.3, "elapsed_time": "1:26:39", "remaining_time": "0:11:28", "throughput": 4226.44, "total_tokens": 21974312}
|
| 6757 |
+
{"current_steps": 33700, "total_steps": 38160, "loss": 0.4432, "lr": 0.0012316542638079802, "epoch": 17.662473794549268, "percentage": 88.31, "elapsed_time": "1:26:39", "remaining_time": "0:11:28", "throughput": 4226.44, "total_tokens": 21977288}
|
| 6758 |
+
{"current_steps": 33705, "total_steps": 38160, "loss": 0.315, "lr": 0.0012289331854052927, "epoch": 17.66509433962264, "percentage": 88.33, "elapsed_time": "1:26:40", "remaining_time": "0:11:27", "throughput": 4226.4, "total_tokens": 21979720}
|
| 6759 |
+
{"current_steps": 33710, "total_steps": 38160, "loss": 0.3506, "lr": 0.0012262149877526735, "epoch": 17.667714884696018, "percentage": 88.34, "elapsed_time": "1:26:41", "remaining_time": "0:11:26", "throughput": 4226.4, "total_tokens": 21982600}
|
| 6760 |
+
{"current_steps": 33715, "total_steps": 38160, "loss": 0.3453, "lr": 0.0012234996714187395, "epoch": 17.67033542976939, "percentage": 88.35, "elapsed_time": "1:26:42", "remaining_time": "0:11:25", "throughput": 4226.46, "total_tokens": 21986184}
|
| 6761 |
+
{"current_steps": 33720, "total_steps": 38160, "loss": 0.4235, "lr": 0.0012207872369715067, "epoch": 17.67295597484277, "percentage": 88.36, "elapsed_time": "1:26:42", "remaining_time": "0:11:25", "throughput": 4226.48, "total_tokens": 21989352}
|
| 6762 |
+
{"current_steps": 33725, "total_steps": 38160, "loss": 0.3318, "lr": 0.0012180776849783825, "epoch": 17.67557651991614, "percentage": 88.38, "elapsed_time": "1:26:43", "remaining_time": "0:11:24", "throughput": 4226.47, "total_tokens": 21992072}
|
| 6763 |
+
{"current_steps": 33730, "total_steps": 38160, "loss": 0.3965, "lr": 0.0012153710160061704, "epoch": 17.67819706498952, "percentage": 88.39, "elapsed_time": "1:26:44", "remaining_time": "0:11:23", "throughput": 4226.57, "total_tokens": 21996200}
|
| 6764 |
+
{"current_steps": 33735, "total_steps": 38160, "loss": 0.3472, "lr": 0.0012126672306210788, "epoch": 17.680817610062892, "percentage": 88.4, "elapsed_time": "1:26:44", "remaining_time": "0:11:22", "throughput": 4226.54, "total_tokens": 21998728}
|
| 6765 |
+
{"current_steps": 33740, "total_steps": 38160, "loss": 0.3751, "lr": 0.001209966329388709, "epoch": 17.68343815513627, "percentage": 88.42, "elapsed_time": "1:26:45", "remaining_time": "0:11:21", "throughput": 4226.52, "total_tokens": 22001480}
|
| 6766 |
+
{"current_steps": 33745, "total_steps": 38160, "loss": 0.4695, "lr": 0.0012072683128740607, "epoch": 17.686058700209642, "percentage": 88.43, "elapsed_time": "1:26:46", "remaining_time": "0:11:21", "throughput": 4226.66, "total_tokens": 22005800}
|
| 6767 |
+
{"current_steps": 33750, "total_steps": 38160, "loss": 0.254, "lr": 0.0012045731816415256, "epoch": 17.68867924528302, "percentage": 88.44, "elapsed_time": "1:26:47", "remaining_time": "0:11:20", "throughput": 4226.7, "total_tokens": 22009096}
|
| 6768 |
+
{"current_steps": 33755, "total_steps": 38160, "loss": 0.3314, "lr": 0.001201880936254891, "epoch": 17.691299790356393, "percentage": 88.46, "elapsed_time": "1:26:47", "remaining_time": "0:11:19", "throughput": 4226.71, "total_tokens": 22012072}
|
| 6769 |
+
{"current_steps": 33760, "total_steps": 38160, "loss": 0.4468, "lr": 0.0011991915772773482, "epoch": 17.69392033542977, "percentage": 88.47, "elapsed_time": "1:26:48", "remaining_time": "0:11:18", "throughput": 4226.74, "total_tokens": 22015304}
|
| 6770 |
+
{"current_steps": 33765, "total_steps": 38160, "loss": 0.3097, "lr": 0.0011965051052714787, "epoch": 17.696540880503143, "percentage": 88.48, "elapsed_time": "1:26:49", "remaining_time": "0:11:18", "throughput": 4226.81, "total_tokens": 22019016}
|
| 6771 |
+
{"current_steps": 33770, "total_steps": 38160, "loss": 0.3999, "lr": 0.0011938215207992647, "epoch": 17.69916142557652, "percentage": 88.5, "elapsed_time": "1:26:50", "remaining_time": "0:11:17", "throughput": 4226.82, "total_tokens": 22021992}
|
| 6772 |
+
{"current_steps": 33775, "total_steps": 38160, "loss": 0.2875, "lr": 0.0011911408244220806, "epoch": 17.701781970649897, "percentage": 88.51, "elapsed_time": "1:26:50", "remaining_time": "0:11:16", "throughput": 4226.81, "total_tokens": 22024808}
|
| 6773 |
+
{"current_steps": 33780, "total_steps": 38160, "loss": 0.3619, "lr": 0.0011884630167006925, "epoch": 17.70440251572327, "percentage": 88.52, "elapsed_time": "1:26:51", "remaining_time": "0:11:15", "throughput": 4226.78, "total_tokens": 22027336}
|
| 6774 |
+
{"current_steps": 33785, "total_steps": 38160, "loss": 0.3339, "lr": 0.0011857880981952728, "epoch": 17.707023060796647, "percentage": 88.54, "elapsed_time": "1:26:52", "remaining_time": "0:11:14", "throughput": 4226.8, "total_tokens": 22030440}
|
| 6775 |
+
{"current_steps": 33790, "total_steps": 38160, "loss": 0.4687, "lr": 0.0011831160694653802, "epoch": 17.70964360587002, "percentage": 88.55, "elapsed_time": "1:26:52", "remaining_time": "0:11:14", "throughput": 4226.79, "total_tokens": 22033160}
|
| 6776 |
+
{"current_steps": 33795, "total_steps": 38160, "loss": 0.205, "lr": 0.001180446931069976, "epoch": 17.712264150943398, "percentage": 88.56, "elapsed_time": "1:26:53", "remaining_time": "0:11:13", "throughput": 4226.76, "total_tokens": 22035720}
|
| 6777 |
+
{"current_steps": 33800, "total_steps": 38160, "loss": 0.4249, "lr": 0.0011777806835674136, "epoch": 17.71488469601677, "percentage": 88.57, "elapsed_time": "1:26:54", "remaining_time": "0:11:12", "throughput": 4226.76, "total_tokens": 22038664}
|
| 6778 |
+
{"current_steps": 33805, "total_steps": 38160, "loss": 0.2835, "lr": 0.0011751173275154402, "epoch": 17.717505241090148, "percentage": 88.59, "elapsed_time": "1:26:54", "remaining_time": "0:11:11", "throughput": 4226.77, "total_tokens": 22041512}
|
| 6779 |
+
{"current_steps": 33810, "total_steps": 38160, "loss": 0.356, "lr": 0.0011724568634711968, "epoch": 17.72012578616352, "percentage": 88.6, "elapsed_time": "1:26:55", "remaining_time": "0:11:11", "throughput": 4226.85, "total_tokens": 22045320}
|
| 6780 |
+
{"current_steps": 33815, "total_steps": 38160, "loss": 0.2648, "lr": 0.0011697992919912248, "epoch": 17.7227463312369, "percentage": 88.61, "elapsed_time": "1:26:56", "remaining_time": "0:11:10", "throughput": 4226.85, "total_tokens": 22048232}
|
| 6781 |
+
{"current_steps": 33820, "total_steps": 38160, "loss": 0.3178, "lr": 0.0011671446136314561, "epoch": 17.72536687631027, "percentage": 88.63, "elapsed_time": "1:26:56", "remaining_time": "0:11:09", "throughput": 4226.89, "total_tokens": 22051528}
|
| 6782 |
+
{"current_steps": 33825, "total_steps": 38160, "loss": 0.3741, "lr": 0.001164492828947225, "epoch": 17.72798742138365, "percentage": 88.64, "elapsed_time": "1:26:57", "remaining_time": "0:11:08", "throughput": 4226.87, "total_tokens": 22054248}
|
| 6783 |
+
{"current_steps": 33830, "total_steps": 38160, "loss": 0.2488, "lr": 0.0011618439384932476, "epoch": 17.730607966457022, "percentage": 88.65, "elapsed_time": "1:26:58", "remaining_time": "0:11:07", "throughput": 4227.0, "total_tokens": 22058600}
|
| 6784 |
+
{"current_steps": 33835, "total_steps": 38160, "loss": 0.3176, "lr": 0.001159197942823646, "epoch": 17.7332285115304, "percentage": 88.67, "elapsed_time": "1:26:59", "remaining_time": "0:11:07", "throughput": 4227.04, "total_tokens": 22061896}
|
| 6785 |
+
{"current_steps": 33840, "total_steps": 38160, "loss": 0.4769, "lr": 0.0011565548424919286, "epoch": 17.735849056603772, "percentage": 88.68, "elapsed_time": "1:26:59", "remaining_time": "0:11:06", "throughput": 4227.08, "total_tokens": 22065192}
|
| 6786 |
+
{"current_steps": 33845, "total_steps": 38160, "loss": 0.3963, "lr": 0.0011539146380510033, "epoch": 17.73846960167715, "percentage": 88.69, "elapsed_time": "1:27:00", "remaining_time": "0:11:05", "throughput": 4227.1, "total_tokens": 22068296}
|
| 6787 |
+
{"current_steps": 33850, "total_steps": 38160, "loss": 0.4096, "lr": 0.0011512773300531736, "epoch": 17.741090146750523, "percentage": 88.71, "elapsed_time": "1:27:01", "remaining_time": "0:11:04", "throughput": 4227.18, "total_tokens": 22071976}
|
| 6788 |
+
{"current_steps": 33855, "total_steps": 38160, "loss": 0.4253, "lr": 0.0011486429190501295, "epoch": 17.7437106918239, "percentage": 88.72, "elapsed_time": "1:27:02", "remaining_time": "0:11:04", "throughput": 4227.25, "total_tokens": 22075656}
|
| 6789 |
+
{"current_steps": 33860, "total_steps": 38160, "loss": 0.306, "lr": 0.0011460114055929654, "epoch": 17.746331236897273, "percentage": 88.73, "elapsed_time": "1:27:03", "remaining_time": "0:11:03", "throughput": 4227.36, "total_tokens": 22079816}
|
| 6790 |
+
{"current_steps": 33865, "total_steps": 38160, "loss": 0.3417, "lr": 0.0011433827902321574, "epoch": 17.74895178197065, "percentage": 88.74, "elapsed_time": "1:27:03", "remaining_time": "0:11:02", "throughput": 4227.34, "total_tokens": 22082472}
|
| 6791 |
+
{"current_steps": 33870, "total_steps": 38160, "loss": 0.3528, "lr": 0.001140757073517586, "epoch": 17.751572327044027, "percentage": 88.76, "elapsed_time": "1:27:04", "remaining_time": "0:11:01", "throughput": 4227.4, "total_tokens": 22086024}
|
| 6792 |
+
{"current_steps": 33875, "total_steps": 38160, "loss": 0.3983, "lr": 0.0011381342559985198, "epoch": 17.7541928721174, "percentage": 88.77, "elapsed_time": "1:27:05", "remaining_time": "0:11:00", "throughput": 4227.49, "total_tokens": 22090024}
|
| 6793 |
+
{"current_steps": 33880, "total_steps": 38160, "loss": 0.2659, "lr": 0.0011355143382236232, "epoch": 17.756813417190777, "percentage": 88.78, "elapsed_time": "1:27:05", "remaining_time": "0:11:00", "throughput": 4227.47, "total_tokens": 22092680}
|
| 6794 |
+
{"current_steps": 33885, "total_steps": 38160, "loss": 0.4122, "lr": 0.0011328973207409532, "epoch": 17.75943396226415, "percentage": 88.8, "elapsed_time": "1:27:06", "remaining_time": "0:10:59", "throughput": 4227.43, "total_tokens": 22095112}
|
| 6795 |
+
{"current_steps": 33890, "total_steps": 38160, "loss": 0.3993, "lr": 0.0011302832040979598, "epoch": 17.762054507337528, "percentage": 88.81, "elapsed_time": "1:27:07", "remaining_time": "0:10:58", "throughput": 4227.49, "total_tokens": 22098696}
|
| 6796 |
+
{"current_steps": 33895, "total_steps": 38160, "loss": 0.3849, "lr": 0.0011276719888414844, "epoch": 17.7646750524109, "percentage": 88.82, "elapsed_time": "1:27:08", "remaining_time": "0:10:57", "throughput": 4227.53, "total_tokens": 22102024}
|
| 6797 |
+
{"current_steps": 33900, "total_steps": 38160, "loss": 0.3704, "lr": 0.0011250636755177645, "epoch": 17.767295597484278, "percentage": 88.84, "elapsed_time": "1:27:08", "remaining_time": "0:10:57", "throughput": 4227.54, "total_tokens": 22105064}
|
| 6798 |
+
{"current_steps": 33905, "total_steps": 38160, "loss": 0.4462, "lr": 0.001122458264672434, "epoch": 17.76991614255765, "percentage": 88.85, "elapsed_time": "1:27:09", "remaining_time": "0:10:56", "throughput": 4227.62, "total_tokens": 22108968}
|
| 6799 |
+
{"current_steps": 33910, "total_steps": 38160, "loss": 0.33, "lr": 0.0011198557568505069, "epoch": 17.77253668763103, "percentage": 88.86, "elapsed_time": "1:27:10", "remaining_time": "0:10:55", "throughput": 4227.75, "total_tokens": 22113288}
|
| 6800 |
+
{"current_steps": 33915, "total_steps": 38160, "loss": 0.4518, "lr": 0.0011172561525964026, "epoch": 17.7751572327044, "percentage": 88.88, "elapsed_time": "1:27:11", "remaining_time": "0:10:54", "throughput": 4227.89, "total_tokens": 22117768}
|
| 6801 |
+
{"current_steps": 33920, "total_steps": 38160, "loss": 0.3208, "lr": 0.001114659452453931, "epoch": 17.77777777777778, "percentage": 88.89, "elapsed_time": "1:27:12", "remaining_time": "0:10:54", "throughput": 4227.91, "total_tokens": 22120936}
|
| 6802 |
+
{"current_steps": 33925, "total_steps": 38160, "loss": 0.276, "lr": 0.0011120656569662878, "epoch": 17.780398322851152, "percentage": 88.9, "elapsed_time": "1:27:12", "remaining_time": "0:10:53", "throughput": 4227.99, "total_tokens": 22124616}
|
| 6803 |
+
{"current_steps": 33930, "total_steps": 38160, "loss": 0.3226, "lr": 0.0011094747666760706, "epoch": 17.78301886792453, "percentage": 88.92, "elapsed_time": "1:27:13", "remaining_time": "0:10:52", "throughput": 4228.03, "total_tokens": 22128072}
|
| 6804 |
+
{"current_steps": 33935, "total_steps": 38160, "loss": 0.3419, "lr": 0.001106886782125256, "epoch": 17.785639412997902, "percentage": 88.93, "elapsed_time": "1:27:14", "remaining_time": "0:10:51", "throughput": 4228.07, "total_tokens": 22131272}
|
| 6805 |
+
{"current_steps": 33940, "total_steps": 38160, "loss": 0.2731, "lr": 0.0011043017038552272, "epoch": 17.78825995807128, "percentage": 88.94, "elapsed_time": "1:27:15", "remaining_time": "0:10:50", "throughput": 4228.13, "total_tokens": 22134888}
|
| 6806 |
+
{"current_steps": 33945, "total_steps": 38160, "loss": 0.2779, "lr": 0.0011017195324067536, "epoch": 17.790880503144653, "percentage": 88.95, "elapsed_time": "1:27:15", "remaining_time": "0:10:50", "throughput": 4228.1, "total_tokens": 22137480}
|
| 6807 |
+
{"current_steps": 33950, "total_steps": 38160, "loss": 0.3466, "lr": 0.0010991402683199912, "epoch": 17.79350104821803, "percentage": 88.97, "elapsed_time": "1:27:16", "remaining_time": "0:10:49", "throughput": 4228.15, "total_tokens": 22141000}
|
| 6808 |
+
{"current_steps": 33955, "total_steps": 38160, "loss": 0.2441, "lr": 0.0010965639121344956, "epoch": 17.796121593291403, "percentage": 88.98, "elapsed_time": "1:27:17", "remaining_time": "0:10:48", "throughput": 4228.14, "total_tokens": 22143752}
|
| 6809 |
+
{"current_steps": 33960, "total_steps": 38160, "loss": 0.336, "lr": 0.0010939904643892117, "epoch": 17.79874213836478, "percentage": 88.99, "elapsed_time": "1:27:17", "remaining_time": "0:10:47", "throughput": 4228.2, "total_tokens": 22147272}
|
| 6810 |
+
{"current_steps": 33965, "total_steps": 38160, "loss": 0.2258, "lr": 0.0010914199256224742, "epoch": 17.801362683438157, "percentage": 89.01, "elapsed_time": "1:27:18", "remaining_time": "0:10:47", "throughput": 4228.28, "total_tokens": 22151080}
|
| 6811 |
+
{"current_steps": 33970, "total_steps": 38160, "loss": 0.2787, "lr": 0.0010888522963720098, "epoch": 17.80398322851153, "percentage": 89.02, "elapsed_time": "1:27:19", "remaining_time": "0:10:46", "throughput": 4228.34, "total_tokens": 22154664}
|
| 6812 |
+
{"current_steps": 33975, "total_steps": 38160, "loss": 0.2967, "lr": 0.0010862875771749402, "epoch": 17.806603773584907, "percentage": 89.03, "elapsed_time": "1:27:20", "remaining_time": "0:10:45", "throughput": 4228.34, "total_tokens": 22157608}
|
| 6813 |
+
{"current_steps": 33980, "total_steps": 38160, "loss": 0.3866, "lr": 0.0010837257685677715, "epoch": 17.80922431865828, "percentage": 89.05, "elapsed_time": "1:27:20", "remaining_time": "0:10:44", "throughput": 4228.31, "total_tokens": 22160136}
|
| 6814 |
+
{"current_steps": 33985, "total_steps": 38160, "loss": 0.3009, "lr": 0.0010811668710864098, "epoch": 17.811844863731658, "percentage": 89.06, "elapsed_time": "1:27:21", "remaining_time": "0:10:43", "throughput": 4228.25, "total_tokens": 22162376}
|
| 6815 |
+
{"current_steps": 33990, "total_steps": 38160, "loss": 0.5536, "lr": 0.0010786108852661419, "epoch": 17.81446540880503, "percentage": 89.07, "elapsed_time": "1:27:22", "remaining_time": "0:10:43", "throughput": 4228.34, "total_tokens": 22166376}
|
| 6816 |
+
{"current_steps": 33995, "total_steps": 38160, "loss": 0.3097, "lr": 0.001076057811641652, "epoch": 17.817085953878408, "percentage": 89.09, "elapsed_time": "1:27:22", "remaining_time": "0:10:42", "throughput": 4228.33, "total_tokens": 22169128}
|
| 6817 |
+
{"current_steps": 34000, "total_steps": 38160, "loss": 0.42, "lr": 0.001073507650747016, "epoch": 17.81970649895178, "percentage": 89.1, "elapsed_time": "1:27:23", "remaining_time": "0:10:41", "throughput": 4228.47, "total_tokens": 22173544}
|
| 6818 |
+
{"current_steps": 34005, "total_steps": 38160, "loss": 0.325, "lr": 0.0010709604031156987, "epoch": 17.822327044025158, "percentage": 89.11, "elapsed_time": "1:27:24", "remaining_time": "0:10:40", "throughput": 4228.49, "total_tokens": 22176712}
|
| 6819 |
+
{"current_steps": 34010, "total_steps": 38160, "loss": 0.3171, "lr": 0.001068416069280556, "epoch": 17.82494758909853, "percentage": 89.12, "elapsed_time": "1:27:25", "remaining_time": "0:10:40", "throughput": 4228.51, "total_tokens": 22179816}
|
| 6820 |
+
{"current_steps": 34015, "total_steps": 38160, "loss": 0.3094, "lr": 0.0010658746497738286, "epoch": 17.82756813417191, "percentage": 89.14, "elapsed_time": "1:27:26", "remaining_time": "0:10:39", "throughput": 4228.53, "total_tokens": 22182888}
|
| 6821 |
+
{"current_steps": 34020, "total_steps": 38160, "loss": 0.3926, "lr": 0.0010633361451271562, "epoch": 17.830188679245282, "percentage": 89.15, "elapsed_time": "1:27:26", "remaining_time": "0:10:38", "throughput": 4228.6, "total_tokens": 22186472}
|
| 6822 |
+
{"current_steps": 34025, "total_steps": 38160, "loss": 0.2988, "lr": 0.001060800555871562, "epoch": 17.83280922431866, "percentage": 89.16, "elapsed_time": "1:27:27", "remaining_time": "0:10:37", "throughput": 4228.59, "total_tokens": 22189320}
|
| 6823 |
+
{"current_steps": 34030, "total_steps": 38160, "loss": 0.3648, "lr": 0.0010582678825374675, "epoch": 17.835429769392032, "percentage": 89.18, "elapsed_time": "1:27:28", "remaining_time": "0:10:36", "throughput": 4228.59, "total_tokens": 22192200}
|
| 6824 |
+
{"current_steps": 34035, "total_steps": 38160, "loss": 0.2771, "lr": 0.0010557381256546766, "epoch": 17.83805031446541, "percentage": 89.19, "elapsed_time": "1:27:28", "remaining_time": "0:10:36", "throughput": 4228.55, "total_tokens": 22194696}
|
| 6825 |
+
{"current_steps": 34040, "total_steps": 38160, "loss": 0.3027, "lr": 0.0010532112857523878, "epoch": 17.840670859538783, "percentage": 89.2, "elapsed_time": "1:27:29", "remaining_time": "0:10:35", "throughput": 4228.52, "total_tokens": 22197192}
|
| 6826 |
+
{"current_steps": 34045, "total_steps": 38160, "loss": 0.32, "lr": 0.00105068736335918, "epoch": 17.84329140461216, "percentage": 89.22, "elapsed_time": "1:27:30", "remaining_time": "0:10:34", "throughput": 4228.51, "total_tokens": 22199976}
|
| 6827 |
+
{"current_steps": 34050, "total_steps": 38160, "loss": 0.3616, "lr": 0.0010481663590030342, "epoch": 17.845911949685533, "percentage": 89.23, "elapsed_time": "1:27:30", "remaining_time": "0:10:33", "throughput": 4228.54, "total_tokens": 22203176}
|
| 6828 |
+
{"current_steps": 34055, "total_steps": 38160, "loss": 0.2587, "lr": 0.0010456482732113153, "epoch": 17.84853249475891, "percentage": 89.24, "elapsed_time": "1:27:31", "remaining_time": "0:10:33", "throughput": 4228.65, "total_tokens": 22207368}
|
| 6829 |
+
{"current_steps": 34060, "total_steps": 38160, "loss": 0.3802, "lr": 0.0010431331065107801, "epoch": 17.851153039832283, "percentage": 89.26, "elapsed_time": "1:27:32", "remaining_time": "0:10:32", "throughput": 4228.67, "total_tokens": 22210376}
|
| 6830 |
+
{"current_steps": 34065, "total_steps": 38160, "loss": 0.3327, "lr": 0.0010406208594275711, "epoch": 17.85377358490566, "percentage": 89.27, "elapsed_time": "1:27:33", "remaining_time": "0:10:31", "throughput": 4228.8, "total_tokens": 22214856}
|
| 6831 |
+
{"current_steps": 34070, "total_steps": 38160, "loss": 0.3129, "lr": 0.0010381115324872181, "epoch": 17.856394129979037, "percentage": 89.28, "elapsed_time": "1:27:33", "remaining_time": "0:10:30", "throughput": 4228.88, "total_tokens": 22218504}
|
| 6832 |
+
{"current_steps": 34075, "total_steps": 38160, "loss": 0.3389, "lr": 0.0010356051262146477, "epoch": 17.85901467505241, "percentage": 89.3, "elapsed_time": "1:27:34", "remaining_time": "0:10:29", "throughput": 4228.87, "total_tokens": 22221384}
|
| 6833 |
+
{"current_steps": 34080, "total_steps": 38160, "loss": 0.4304, "lr": 0.0010331016411341703, "epoch": 17.861635220125788, "percentage": 89.31, "elapsed_time": "1:27:35", "remaining_time": "0:10:29", "throughput": 4228.97, "total_tokens": 22225480}
|
| 6834 |
+
{"current_steps": 34085, "total_steps": 38160, "loss": 0.3826, "lr": 0.0010306010777694906, "epoch": 17.86425576519916, "percentage": 89.32, "elapsed_time": "1:27:36", "remaining_time": "0:10:28", "throughput": 4228.98, "total_tokens": 22228488}
|
| 6835 |
+
{"current_steps": 34090, "total_steps": 38160, "loss": 0.3676, "lr": 0.001028103436643692, "epoch": 17.866876310272538, "percentage": 89.33, "elapsed_time": "1:27:36", "remaining_time": "0:10:27", "throughput": 4229.02, "total_tokens": 22231912}
|
| 6836 |
+
{"current_steps": 34095, "total_steps": 38160, "loss": 0.3717, "lr": 0.0010256087182792528, "epoch": 17.86949685534591, "percentage": 89.35, "elapsed_time": "1:27:37", "remaining_time": "0:10:26", "throughput": 4229.06, "total_tokens": 22235144}
|
| 6837 |
+
{"current_steps": 34100, "total_steps": 38160, "loss": 0.4452, "lr": 0.0010231169231980424, "epoch": 17.872117400419288, "percentage": 89.36, "elapsed_time": "1:27:38", "remaining_time": "0:10:26", "throughput": 4229.07, "total_tokens": 22238088}
|
| 6838 |
+
{"current_steps": 34105, "total_steps": 38160, "loss": 0.4332, "lr": 0.001020628051921314, "epoch": 17.87473794549266, "percentage": 89.37, "elapsed_time": "1:27:39", "remaining_time": "0:10:25", "throughput": 4229.15, "total_tokens": 22241800}
|
| 6839 |
+
{"current_steps": 34110, "total_steps": 38160, "loss": 0.405, "lr": 0.0010181421049697125, "epoch": 17.87735849056604, "percentage": 89.39, "elapsed_time": "1:27:39", "remaining_time": "0:10:24", "throughput": 4229.22, "total_tokens": 22245512}
|
| 6840 |
+
{"current_steps": 34115, "total_steps": 38160, "loss": 0.4048, "lr": 0.0010156590828632704, "epoch": 17.879979035639412, "percentage": 89.4, "elapsed_time": "1:27:40", "remaining_time": "0:10:23", "throughput": 4229.21, "total_tokens": 22248328}
|
| 6841 |
+
{"current_steps": 34120, "total_steps": 38160, "loss": 0.4386, "lr": 0.0010131789861214074, "epoch": 17.88259958071279, "percentage": 89.41, "elapsed_time": "1:27:41", "remaining_time": "0:10:22", "throughput": 4229.35, "total_tokens": 22252744}
|
| 6842 |
+
{"current_steps": 34125, "total_steps": 38160, "loss": 0.2906, "lr": 0.0010107018152629283, "epoch": 17.885220125786162, "percentage": 89.43, "elapsed_time": "1:27:42", "remaining_time": "0:10:22", "throughput": 4229.51, "total_tokens": 22257544}
|
| 6843 |
+
{"current_steps": 34130, "total_steps": 38160, "loss": 0.3536, "lr": 0.0010082275708060318, "epoch": 17.88784067085954, "percentage": 89.44, "elapsed_time": "1:27:43", "remaining_time": "0:10:21", "throughput": 4229.51, "total_tokens": 22260456}
|
| 6844 |
+
{"current_steps": 34135, "total_steps": 38160, "loss": 0.3871, "lr": 0.001005756253268299, "epoch": 17.890461215932913, "percentage": 89.45, "elapsed_time": "1:27:43", "remaining_time": "0:10:20", "throughput": 4229.59, "total_tokens": 22264328}
|
| 6845 |
+
{"current_steps": 34140, "total_steps": 38160, "loss": 0.2535, "lr": 0.0010032878631667064, "epoch": 17.89308176100629, "percentage": 89.47, "elapsed_time": "1:27:44", "remaining_time": "0:10:19", "throughput": 4229.59, "total_tokens": 22267080}
|
| 6846 |
+
{"current_steps": 34145, "total_steps": 38160, "loss": 0.2634, "lr": 0.0010008224010176108, "epoch": 17.895702306079663, "percentage": 89.48, "elapsed_time": "1:27:45", "remaining_time": "0:10:19", "throughput": 4229.61, "total_tokens": 22270248}
|
| 6847 |
+
{"current_steps": 34150, "total_steps": 38160, "loss": 0.3422, "lr": 0.0009983598673367533, "epoch": 17.89832285115304, "percentage": 89.49, "elapsed_time": "1:27:46", "remaining_time": "0:10:18", "throughput": 4229.68, "total_tokens": 22273960}
|
| 6848 |
+
{"current_steps": 34155, "total_steps": 38160, "loss": 0.326, "lr": 0.0009959002626392748, "epoch": 17.900943396226417, "percentage": 89.5, "elapsed_time": "1:27:46", "remaining_time": "0:10:17", "throughput": 4229.71, "total_tokens": 22277256}
|
| 6849 |
+
{"current_steps": 34160, "total_steps": 38160, "loss": 0.3905, "lr": 0.0009934435874396925, "epoch": 17.90356394129979, "percentage": 89.52, "elapsed_time": "1:27:47", "remaining_time": "0:10:16", "throughput": 4229.74, "total_tokens": 22280424}
|
| 6850 |
+
{"current_steps": 34165, "total_steps": 38160, "loss": 0.3043, "lr": 0.0009909898422519197, "epoch": 17.906184486373167, "percentage": 89.53, "elapsed_time": "1:27:48", "remaining_time": "0:10:16", "throughput": 4229.85, "total_tokens": 22284808}
|
| 6851 |
+
{"current_steps": 34170, "total_steps": 38160, "loss": 0.3094, "lr": 0.0009885390275892463, "epoch": 17.90880503144654, "percentage": 89.54, "elapsed_time": "1:27:49", "remaining_time": "0:10:15", "throughput": 4229.87, "total_tokens": 22287912}
|
| 6852 |
+
{"current_steps": 34175, "total_steps": 38160, "loss": 0.402, "lr": 0.0009860911439643588, "epoch": 17.911425576519918, "percentage": 89.56, "elapsed_time": "1:27:49", "remaining_time": "0:10:14", "throughput": 4229.91, "total_tokens": 22291112}
|
| 6853 |
+
{"current_steps": 34180, "total_steps": 38160, "loss": 0.4163, "lr": 0.0009836461918893225, "epoch": 17.91404612159329, "percentage": 89.57, "elapsed_time": "1:27:50", "remaining_time": "0:10:13", "throughput": 4229.93, "total_tokens": 22294280}
|
| 6854 |
+
{"current_steps": 34185, "total_steps": 38160, "loss": 0.2493, "lr": 0.0009812041718755965, "epoch": 17.916666666666668, "percentage": 89.58, "elapsed_time": "1:27:51", "remaining_time": "0:10:12", "throughput": 4229.96, "total_tokens": 22297640}
|
| 6855 |
+
{"current_steps": 34190, "total_steps": 38160, "loss": 0.3122, "lr": 0.0009787650844340222, "epoch": 17.91928721174004, "percentage": 89.6, "elapsed_time": "1:27:52", "remaining_time": "0:10:12", "throughput": 4229.98, "total_tokens": 22300712}
|
| 6856 |
+
{"current_steps": 34195, "total_steps": 38160, "loss": 0.3131, "lr": 0.0009763289300748334, "epoch": 17.921907756813418, "percentage": 89.61, "elapsed_time": "1:27:52", "remaining_time": "0:10:11", "throughput": 4230.02, "total_tokens": 22304072}
|
| 6857 |
+
{"current_steps": 34200, "total_steps": 38160, "loss": 0.4873, "lr": 0.0009738957093076389, "epoch": 17.92452830188679, "percentage": 89.62, "elapsed_time": "1:27:53", "remaining_time": "0:10:10", "throughput": 4230.05, "total_tokens": 22307336}
|
| 6858 |
+
{"current_steps": 34205, "total_steps": 38160, "loss": 0.3474, "lr": 0.0009714654226414449, "epoch": 17.92714884696017, "percentage": 89.64, "elapsed_time": "1:27:54", "remaining_time": "0:10:09", "throughput": 4230.05, "total_tokens": 22310216}
|
| 6859 |
+
{"current_steps": 34210, "total_steps": 38160, "loss": 0.3276, "lr": 0.0009690380705846363, "epoch": 17.929769392033542, "percentage": 89.65, "elapsed_time": "1:27:55", "remaining_time": "0:10:09", "throughput": 4230.16, "total_tokens": 22314216}
|
| 6860 |
+
{"current_steps": 34215, "total_steps": 38160, "loss": 0.2742, "lr": 0.0009666136536449904, "epoch": 17.93238993710692, "percentage": 89.66, "elapsed_time": "1:27:55", "remaining_time": "0:10:08", "throughput": 4230.18, "total_tokens": 22317416}
|
| 6861 |
+
{"current_steps": 34220, "total_steps": 38160, "loss": 0.3216, "lr": 0.0009641921723296697, "epoch": 17.935010482180292, "percentage": 89.68, "elapsed_time": "1:27:56", "remaining_time": "0:10:07", "throughput": 4230.2, "total_tokens": 22320456}
|
| 6862 |
+
{"current_steps": 34225, "total_steps": 38160, "loss": 0.2737, "lr": 0.0009617736271452143, "epoch": 17.93763102725367, "percentage": 89.69, "elapsed_time": "1:27:57", "remaining_time": "0:10:06", "throughput": 4230.18, "total_tokens": 22323048}
|
| 6863 |
+
{"current_steps": 34230, "total_steps": 38160, "loss": 0.2651, "lr": 0.0009593580185975625, "epoch": 17.940251572327043, "percentage": 89.7, "elapsed_time": "1:27:57", "remaining_time": "0:10:05", "throughput": 4230.24, "total_tokens": 22326568}
|
| 6864 |
+
{"current_steps": 34235, "total_steps": 38160, "loss": 0.3905, "lr": 0.0009569453471920269, "epoch": 17.94287211740042, "percentage": 89.71, "elapsed_time": "1:27:58", "remaining_time": "0:10:05", "throughput": 4230.28, "total_tokens": 22329864}
|
| 6865 |
+
{"current_steps": 34240, "total_steps": 38160, "loss": 0.5082, "lr": 0.0009545356134333121, "epoch": 17.945492662473793, "percentage": 89.73, "elapsed_time": "1:27:59", "remaining_time": "0:10:04", "throughput": 4230.31, "total_tokens": 22333000}
|
| 6866 |
+
{"current_steps": 34245, "total_steps": 38160, "loss": 0.308, "lr": 0.0009521288178255099, "epoch": 17.94811320754717, "percentage": 89.74, "elapsed_time": "1:28:00", "remaining_time": "0:10:03", "throughput": 4230.35, "total_tokens": 22336328}
|
| 6867 |
+
{"current_steps": 34250, "total_steps": 38160, "loss": 0.2763, "lr": 0.0009497249608720909, "epoch": 17.950733752620543, "percentage": 89.75, "elapsed_time": "1:28:00", "remaining_time": "0:10:02", "throughput": 4230.32, "total_tokens": 22338920}
|
| 6868 |
+
{"current_steps": 34255, "total_steps": 38160, "loss": 0.5491, "lr": 0.000947324043075916, "epoch": 17.95335429769392, "percentage": 89.77, "elapsed_time": "1:28:01", "remaining_time": "0:10:02", "throughput": 4230.38, "total_tokens": 22342568}
|
| 6869 |
+
{"current_steps": 34260, "total_steps": 38160, "loss": 0.3798, "lr": 0.0009449260649392338, "epoch": 17.955974842767297, "percentage": 89.78, "elapsed_time": "1:28:02", "remaining_time": "0:10:01", "throughput": 4230.38, "total_tokens": 22345512}
|
| 6870 |
+
{"current_steps": 34265, "total_steps": 38160, "loss": 0.3943, "lr": 0.000942531026963666, "epoch": 17.95859538784067, "percentage": 89.79, "elapsed_time": "1:28:02", "remaining_time": "0:10:00", "throughput": 4230.39, "total_tokens": 22348488}
|
| 6871 |
+
{"current_steps": 34270, "total_steps": 38160, "loss": 0.3295, "lr": 0.0009401389296502305, "epoch": 17.961215932914047, "percentage": 89.81, "elapsed_time": "1:28:03", "remaining_time": "0:09:59", "throughput": 4230.44, "total_tokens": 22351848}
|
| 6872 |
+
{"current_steps": 34275, "total_steps": 38160, "loss": 0.2755, "lr": 0.0009377497734993317, "epoch": 17.96383647798742, "percentage": 89.82, "elapsed_time": "1:28:04", "remaining_time": "0:09:58", "throughput": 4230.45, "total_tokens": 22354856}
|
| 6873 |
+
{"current_steps": 34280, "total_steps": 38160, "loss": 0.228, "lr": 0.0009353635590107467, "epoch": 17.966457023060798, "percentage": 89.83, "elapsed_time": "1:28:05", "remaining_time": "0:09:58", "throughput": 4230.49, "total_tokens": 22358152}
|
| 6874 |
+
{"current_steps": 34285, "total_steps": 38160, "loss": 0.4043, "lr": 0.0009329802866836461, "epoch": 17.96907756813417, "percentage": 89.85, "elapsed_time": "1:28:05", "remaining_time": "0:09:57", "throughput": 4230.55, "total_tokens": 22361704}
|
| 6875 |
+
{"current_steps": 34290, "total_steps": 38160, "loss": 0.4321, "lr": 0.0009305999570165879, "epoch": 17.971698113207548, "percentage": 89.86, "elapsed_time": "1:28:06", "remaining_time": "0:09:56", "throughput": 4230.56, "total_tokens": 22364680}
|
| 6876 |
+
{"current_steps": 34295, "total_steps": 38160, "loss": 0.2864, "lr": 0.0009282225705075037, "epoch": 17.97431865828092, "percentage": 89.87, "elapsed_time": "1:28:07", "remaining_time": "0:09:55", "throughput": 4230.58, "total_tokens": 22367752}
|
| 6877 |
+
{"current_steps": 34300, "total_steps": 38160, "loss": 0.3297, "lr": 0.0009258481276537222, "epoch": 17.9769392033543, "percentage": 89.88, "elapsed_time": "1:28:07", "remaining_time": "0:09:55", "throughput": 4230.59, "total_tokens": 22370632}
|
| 6878 |
+
{"current_steps": 34305, "total_steps": 38160, "loss": 0.3379, "lr": 0.0009234766289519413, "epoch": 17.979559748427672, "percentage": 89.9, "elapsed_time": "1:28:08", "remaining_time": "0:09:54", "throughput": 4230.57, "total_tokens": 22373224}
|
| 6879 |
+
{"current_steps": 34310, "total_steps": 38160, "loss": 0.3462, "lr": 0.0009211080748982575, "epoch": 17.98218029350105, "percentage": 89.91, "elapsed_time": "1:28:09", "remaining_time": "0:09:53", "throughput": 4230.84, "total_tokens": 22379816}
|
| 6880 |
+
{"current_steps": 34315, "total_steps": 38160, "loss": 0.2748, "lr": 0.000918742465988146, "epoch": 17.984800838574422, "percentage": 89.92, "elapsed_time": "1:28:10", "remaining_time": "0:09:52", "throughput": 4230.89, "total_tokens": 22383112}
|
| 6881 |
+
{"current_steps": 34320, "total_steps": 38160, "loss": 0.4234, "lr": 0.0009163798027164609, "epoch": 17.9874213836478, "percentage": 89.94, "elapsed_time": "1:28:11", "remaining_time": "0:09:52", "throughput": 4230.92, "total_tokens": 22386472}
|
| 6882 |
+
{"current_steps": 34325, "total_steps": 38160, "loss": 0.3726, "lr": 0.0009140200855774489, "epoch": 17.990041928721173, "percentage": 89.95, "elapsed_time": "1:28:11", "remaining_time": "0:09:51", "throughput": 4230.89, "total_tokens": 22388968}
|
| 6883 |
+
{"current_steps": 34330, "total_steps": 38160, "loss": 0.2874, "lr": 0.0009116633150647329, "epoch": 17.99266247379455, "percentage": 89.96, "elapsed_time": "1:28:12", "remaining_time": "0:09:50", "throughput": 4230.91, "total_tokens": 22392008}
|
| 6884 |
+
{"current_steps": 34335, "total_steps": 38160, "loss": 0.2973, "lr": 0.0009093094916713207, "epoch": 17.995283018867923, "percentage": 89.98, "elapsed_time": "1:28:13", "remaining_time": "0:09:49", "throughput": 4230.91, "total_tokens": 22394984}
|
| 6885 |
+
{"current_steps": 34340, "total_steps": 38160, "loss": 0.3645, "lr": 0.0009069586158896097, "epoch": 17.9979035639413, "percentage": 89.99, "elapsed_time": "1:28:13", "remaining_time": "0:09:48", "throughput": 4230.96, "total_tokens": 22398376}
|
| 6886 |
+
{"current_steps": 34344, "total_steps": 38160, "eval_loss": 0.5047969222068787, "epoch": 18.0, "percentage": 90.0, "elapsed_time": "1:28:28", "remaining_time": "0:09:49", "throughput": 4219.86, "total_tokens": 22400368}
|
| 6887 |
+
{"current_steps": 34345, "total_steps": 38160, "loss": 0.4825, "lr": 0.0009046106882113752, "epoch": 18.000524109014677, "percentage": 90.0, "elapsed_time": "1:28:30", "remaining_time": "0:09:49", "throughput": 4218.61, "total_tokens": 22400816}
|
| 6888 |
+
{"current_steps": 34350, "total_steps": 38160, "loss": 0.359, "lr": 0.0009022657091277742, "epoch": 18.00314465408805, "percentage": 90.02, "elapsed_time": "1:28:30", "remaining_time": "0:09:49", "throughput": 4218.63, "total_tokens": 22403888}
|
| 6889 |
+
{"current_steps": 34355, "total_steps": 38160, "loss": 0.417, "lr": 0.000899923679129354, "epoch": 18.005765199161427, "percentage": 90.03, "elapsed_time": "1:28:31", "remaining_time": "0:09:48", "throughput": 4218.76, "total_tokens": 22408208}
|
| 6890 |
+
{"current_steps": 34360, "total_steps": 38160, "loss": 0.3425, "lr": 0.0008975845987060348, "epoch": 18.0083857442348, "percentage": 90.04, "elapsed_time": "1:28:32", "remaining_time": "0:09:47", "throughput": 4218.82, "total_tokens": 22411856}
|