Training in progress, step 36252
Browse files- adapter_model.safetensors +1 -1
- trainer_log.jsonl +383 -0
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1074144
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:46e3a67e96ecb2c86395c3436af4d6e9f0ba6659e14de486fe46d3fa9fa11127
|
| 3 |
size 1074144
|
trainer_log.jsonl
CHANGED
|
@@ -6889,3 +6889,386 @@
|
|
| 6889 |
{"current_steps": 34355, "total_steps": 38160, "loss": 0.4264, "lr": 1.4998727985489235e-06, "epoch": 18.005765199161427, "percentage": 90.03, "elapsed_time": "1:41:24", "remaining_time": "0:11:13", "throughput": 3694.27, "total_tokens": 22478424}
|
| 6890 |
{"current_steps": 34360, "total_steps": 38160, "loss": 0.3787, "lr": 1.4959743311767249e-06, "epoch": 18.0083857442348, "percentage": 90.04, "elapsed_time": "1:41:25", "remaining_time": "0:11:13", "throughput": 3694.3, "total_tokens": 22481496}
|
| 6891 |
{"current_steps": 34365, "total_steps": 38160, "loss": 0.5309, "lr": 1.4920807805785502e-06, "epoch": 18.011006289308177, "percentage": 90.06, "elapsed_time": "1:41:27", "remaining_time": "0:11:12", "throughput": 3694.43, "total_tokens": 22488472}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6889 |
{"current_steps": 34355, "total_steps": 38160, "loss": 0.4264, "lr": 1.4998727985489235e-06, "epoch": 18.005765199161427, "percentage": 90.03, "elapsed_time": "1:41:24", "remaining_time": "0:11:13", "throughput": 3694.27, "total_tokens": 22478424}
|
| 6890 |
{"current_steps": 34360, "total_steps": 38160, "loss": 0.3787, "lr": 1.4959743311767249e-06, "epoch": 18.0083857442348, "percentage": 90.04, "elapsed_time": "1:41:25", "remaining_time": "0:11:13", "throughput": 3694.3, "total_tokens": 22481496}
|
| 6891 |
{"current_steps": 34365, "total_steps": 38160, "loss": 0.5309, "lr": 1.4920807805785502e-06, "epoch": 18.011006289308177, "percentage": 90.06, "elapsed_time": "1:41:27", "remaining_time": "0:11:12", "throughput": 3694.43, "total_tokens": 22488472}
|
| 6892 |
+
{"current_steps": 34370, "total_steps": 38160, "loss": 0.6181, "lr": 1.4881921475688815e-06, "epoch": 18.01362683438155, "percentage": 90.07, "elapsed_time": "1:41:28", "remaining_time": "0:11:11", "throughput": 3694.36, "total_tokens": 22491384}
|
| 6893 |
+
{"current_steps": 34375, "total_steps": 38160, "loss": 0.3399, "lr": 1.4843084329611844e-06, "epoch": 18.016247379454928, "percentage": 90.08, "elapsed_time": "1:41:28", "remaining_time": "0:11:10", "throughput": 3694.32, "total_tokens": 22494104}
|
| 6894 |
+
{"current_steps": 34380, "total_steps": 38160, "loss": 0.5158, "lr": 1.480429637567879e-06, "epoch": 18.0188679245283, "percentage": 90.09, "elapsed_time": "1:41:29", "remaining_time": "0:11:09", "throughput": 3694.38, "total_tokens": 22497656}
|
| 6895 |
+
{"current_steps": 34385, "total_steps": 38160, "loss": 0.4147, "lr": 1.4765557622003684e-06, "epoch": 18.021488469601678, "percentage": 90.11, "elapsed_time": "1:41:30", "remaining_time": "0:11:08", "throughput": 3694.42, "total_tokens": 22500920}
|
| 6896 |
+
{"current_steps": 34390, "total_steps": 38160, "loss": 0.3791, "lr": 1.4726868076690186e-06, "epoch": 18.02410901467505, "percentage": 90.12, "elapsed_time": "1:41:31", "remaining_time": "0:11:07", "throughput": 3694.44, "total_tokens": 22504408}
|
| 6897 |
+
{"current_steps": 34395, "total_steps": 38160, "loss": 0.4067, "lr": 1.4688227747831767e-06, "epoch": 18.02672955974843, "percentage": 90.13, "elapsed_time": "1:41:32", "remaining_time": "0:11:06", "throughput": 3694.37, "total_tokens": 22506904}
|
| 6898 |
+
{"current_steps": 34400, "total_steps": 38160, "loss": 0.5196, "lr": 1.4649636643511573e-06, "epoch": 18.029350104821802, "percentage": 90.15, "elapsed_time": "1:41:33", "remaining_time": "0:11:05", "throughput": 3694.41, "total_tokens": 22510200}
|
| 6899 |
+
{"current_steps": 34405, "total_steps": 38160, "loss": 0.3482, "lr": 1.4611094771802364e-06, "epoch": 18.03197064989518, "percentage": 90.16, "elapsed_time": "1:41:33", "remaining_time": "0:11:05", "throughput": 3694.38, "total_tokens": 22513112}
|
| 6900 |
+
{"current_steps": 34410, "total_steps": 38160, "loss": 0.4377, "lr": 1.457260214076664e-06, "epoch": 18.034591194968552, "percentage": 90.17, "elapsed_time": "1:41:34", "remaining_time": "0:11:04", "throughput": 3694.42, "total_tokens": 22516824}
|
| 6901 |
+
{"current_steps": 34415, "total_steps": 38160, "loss": 0.398, "lr": 1.4534158758456623e-06, "epoch": 18.03721174004193, "percentage": 90.19, "elapsed_time": "1:41:35", "remaining_time": "0:11:03", "throughput": 3694.4, "total_tokens": 22520184}
|
| 6902 |
+
{"current_steps": 34420, "total_steps": 38160, "loss": 0.3777, "lr": 1.4495764632914271e-06, "epoch": 18.039832285115303, "percentage": 90.2, "elapsed_time": "1:41:36", "remaining_time": "0:11:02", "throughput": 3694.35, "total_tokens": 22523000}
|
| 6903 |
+
{"current_steps": 34425, "total_steps": 38160, "loss": 0.3423, "lr": 1.4457419772171215e-06, "epoch": 18.04245283018868, "percentage": 90.21, "elapsed_time": "1:41:37", "remaining_time": "0:11:01", "throughput": 3694.25, "total_tokens": 22525400}
|
| 6904 |
+
{"current_steps": 34430, "total_steps": 38160, "loss": 0.4499, "lr": 1.4419124184248733e-06, "epoch": 18.045073375262053, "percentage": 90.23, "elapsed_time": "1:41:38", "remaining_time": "0:11:00", "throughput": 3694.24, "total_tokens": 22528344}
|
| 6905 |
+
{"current_steps": 34435, "total_steps": 38160, "loss": 0.458, "lr": 1.4380877877157834e-06, "epoch": 18.04769392033543, "percentage": 90.24, "elapsed_time": "1:41:39", "remaining_time": "0:10:59", "throughput": 3694.17, "total_tokens": 22530776}
|
| 6906 |
+
{"current_steps": 34440, "total_steps": 38160, "loss": 0.3877, "lr": 1.4342680858899143e-06, "epoch": 18.050314465408807, "percentage": 90.25, "elapsed_time": "1:41:40", "remaining_time": "0:10:58", "throughput": 3694.17, "total_tokens": 22534584}
|
| 6907 |
+
{"current_steps": 34445, "total_steps": 38160, "loss": 0.4328, "lr": 1.4304533137463134e-06, "epoch": 18.05293501048218, "percentage": 90.26, "elapsed_time": "1:41:41", "remaining_time": "0:10:58", "throughput": 3694.31, "total_tokens": 22539192}
|
| 6908 |
+
{"current_steps": 34450, "total_steps": 38160, "loss": 0.3348, "lr": 1.4266434720829808e-06, "epoch": 18.055555555555557, "percentage": 90.28, "elapsed_time": "1:41:41", "remaining_time": "0:10:57", "throughput": 3694.23, "total_tokens": 22541624}
|
| 6909 |
+
{"current_steps": 34455, "total_steps": 38160, "loss": 0.4318, "lr": 1.4228385616968987e-06, "epoch": 18.05817610062893, "percentage": 90.29, "elapsed_time": "1:41:42", "remaining_time": "0:10:56", "throughput": 3694.27, "total_tokens": 22544888}
|
| 6910 |
+
{"current_steps": 34460, "total_steps": 38160, "loss": 0.4076, "lr": 1.419038583384008e-06, "epoch": 18.060796645702307, "percentage": 90.3, "elapsed_time": "1:41:43", "remaining_time": "0:10:55", "throughput": 3694.23, "total_tokens": 22547704}
|
| 6911 |
+
{"current_steps": 34465, "total_steps": 38160, "loss": 0.3685, "lr": 1.4152435379392148e-06, "epoch": 18.06341719077568, "percentage": 90.32, "elapsed_time": "1:41:44", "remaining_time": "0:10:54", "throughput": 3694.3, "total_tokens": 22551352}
|
| 6912 |
+
{"current_steps": 34470, "total_steps": 38160, "loss": 0.4936, "lr": 1.411453426156406e-06, "epoch": 18.066037735849058, "percentage": 90.33, "elapsed_time": "1:41:45", "remaining_time": "0:10:53", "throughput": 3694.29, "total_tokens": 22554520}
|
| 6913 |
+
{"current_steps": 34475, "total_steps": 38160, "loss": 0.4288, "lr": 1.4076682488284282e-06, "epoch": 18.06865828092243, "percentage": 90.34, "elapsed_time": "1:41:46", "remaining_time": "0:10:52", "throughput": 3694.29, "total_tokens": 22557528}
|
| 6914 |
+
{"current_steps": 34480, "total_steps": 38160, "loss": 0.3678, "lr": 1.4038880067470978e-06, "epoch": 18.071278825995808, "percentage": 90.36, "elapsed_time": "1:41:46", "remaining_time": "0:10:51", "throughput": 3694.26, "total_tokens": 22560632}
|
| 6915 |
+
{"current_steps": 34485, "total_steps": 38160, "loss": 0.4504, "lr": 1.400112700703196e-06, "epoch": 18.07389937106918, "percentage": 90.37, "elapsed_time": "1:41:47", "remaining_time": "0:10:50", "throughput": 3694.24, "total_tokens": 22563896}
|
| 6916 |
+
{"current_steps": 34490, "total_steps": 38160, "loss": 0.4463, "lr": 1.3963423314864805e-06, "epoch": 18.07651991614256, "percentage": 90.38, "elapsed_time": "1:41:48", "remaining_time": "0:10:50", "throughput": 3694.15, "total_tokens": 22566136}
|
| 6917 |
+
{"current_steps": 34495, "total_steps": 38160, "loss": 0.3429, "lr": 1.3925768998856587e-06, "epoch": 18.079140461215932, "percentage": 90.4, "elapsed_time": "1:41:49", "remaining_time": "0:10:49", "throughput": 3694.12, "total_tokens": 22568824}
|
| 6918 |
+
{"current_steps": 34500, "total_steps": 38160, "loss": 0.5611, "lr": 1.388816406688423e-06, "epoch": 18.08176100628931, "percentage": 90.41, "elapsed_time": "1:41:50", "remaining_time": "0:10:48", "throughput": 3694.28, "total_tokens": 22573304}
|
| 6919 |
+
{"current_steps": 34505, "total_steps": 38160, "loss": 0.6834, "lr": 1.3850608526814246e-06, "epoch": 18.084381551362682, "percentage": 90.42, "elapsed_time": "1:41:51", "remaining_time": "0:10:47", "throughput": 3694.41, "total_tokens": 22577304}
|
| 6920 |
+
{"current_steps": 34510, "total_steps": 38160, "loss": 0.5134, "lr": 1.3813102386502853e-06, "epoch": 18.08700209643606, "percentage": 90.44, "elapsed_time": "1:41:51", "remaining_time": "0:10:46", "throughput": 3694.47, "total_tokens": 22580568}
|
| 6921 |
+
{"current_steps": 34515, "total_steps": 38160, "loss": 0.4965, "lr": 1.3775645653795881e-06, "epoch": 18.089622641509433, "percentage": 90.45, "elapsed_time": "1:41:52", "remaining_time": "0:10:45", "throughput": 3694.45, "total_tokens": 22583416}
|
| 6922 |
+
{"current_steps": 34520, "total_steps": 38160, "loss": 0.329, "lr": 1.373823833652882e-06, "epoch": 18.09224318658281, "percentage": 90.46, "elapsed_time": "1:41:53", "remaining_time": "0:10:44", "throughput": 3694.55, "total_tokens": 22587544}
|
| 6923 |
+
{"current_steps": 34525, "total_steps": 38160, "loss": 0.5345, "lr": 1.3700880442526876e-06, "epoch": 18.094863731656183, "percentage": 90.47, "elapsed_time": "1:41:54", "remaining_time": "0:10:43", "throughput": 3694.57, "total_tokens": 22590712}
|
| 6924 |
+
{"current_steps": 34530, "total_steps": 38160, "loss": 0.4029, "lr": 1.3663571979604884e-06, "epoch": 18.09748427672956, "percentage": 90.49, "elapsed_time": "1:41:55", "remaining_time": "0:10:42", "throughput": 3694.55, "total_tokens": 22593528}
|
| 6925 |
+
{"current_steps": 34535, "total_steps": 38160, "loss": 0.5333, "lr": 1.3626312955567406e-06, "epoch": 18.100104821802937, "percentage": 90.5, "elapsed_time": "1:41:56", "remaining_time": "0:10:42", "throughput": 3694.65, "total_tokens": 22597496}
|
| 6926 |
+
{"current_steps": 34540, "total_steps": 38160, "loss": 0.49, "lr": 1.358910337820854e-06, "epoch": 18.10272536687631, "percentage": 90.51, "elapsed_time": "1:41:57", "remaining_time": "0:10:41", "throughput": 3694.57, "total_tokens": 22599960}
|
| 6927 |
+
{"current_steps": 34545, "total_steps": 38160, "loss": 0.6337, "lr": 1.3551943255312172e-06, "epoch": 18.105345911949687, "percentage": 90.53, "elapsed_time": "1:41:57", "remaining_time": "0:10:40", "throughput": 3694.6, "total_tokens": 22602872}
|
| 6928 |
+
{"current_steps": 34550, "total_steps": 38160, "loss": 0.4911, "lr": 1.3514832594651694e-06, "epoch": 18.10796645702306, "percentage": 90.54, "elapsed_time": "1:41:58", "remaining_time": "0:10:39", "throughput": 3694.57, "total_tokens": 22605592}
|
| 6929 |
+
{"current_steps": 34555, "total_steps": 38160, "loss": 0.5924, "lr": 1.347777140399026e-06, "epoch": 18.110587002096437, "percentage": 90.55, "elapsed_time": "1:41:59", "remaining_time": "0:10:38", "throughput": 3694.61, "total_tokens": 22609112}
|
| 6930 |
+
{"current_steps": 34560, "total_steps": 38160, "loss": 0.4816, "lr": 1.344075969108069e-06, "epoch": 18.11320754716981, "percentage": 90.57, "elapsed_time": "1:42:00", "remaining_time": "0:10:37", "throughput": 3694.56, "total_tokens": 22611736}
|
| 6931 |
+
{"current_steps": 34565, "total_steps": 38160, "loss": 0.3603, "lr": 1.3403797463665352e-06, "epoch": 18.115828092243188, "percentage": 90.58, "elapsed_time": "1:42:01", "remaining_time": "0:10:36", "throughput": 3694.62, "total_tokens": 22615896}
|
| 6932 |
+
{"current_steps": 34570, "total_steps": 38160, "loss": 0.3549, "lr": 1.3366884729476365e-06, "epoch": 18.11844863731656, "percentage": 90.59, "elapsed_time": "1:42:02", "remaining_time": "0:10:35", "throughput": 3694.56, "total_tokens": 22618296}
|
| 6933 |
+
{"current_steps": 34575, "total_steps": 38160, "loss": 0.4978, "lr": 1.3330021496235467e-06, "epoch": 18.121069182389938, "percentage": 90.61, "elapsed_time": "1:42:02", "remaining_time": "0:10:34", "throughput": 3694.66, "total_tokens": 22621976}
|
| 6934 |
+
{"current_steps": 34580, "total_steps": 38160, "loss": 0.3193, "lr": 1.3293207771653965e-06, "epoch": 18.12368972746331, "percentage": 90.62, "elapsed_time": "1:42:03", "remaining_time": "0:10:33", "throughput": 3694.69, "total_tokens": 22625720}
|
| 6935 |
+
{"current_steps": 34585, "total_steps": 38160, "loss": 0.4398, "lr": 1.3256443563432891e-06, "epoch": 18.12631027253669, "percentage": 90.63, "elapsed_time": "1:42:04", "remaining_time": "0:10:33", "throughput": 3694.75, "total_tokens": 22629496}
|
| 6936 |
+
{"current_steps": 34590, "total_steps": 38160, "loss": 0.5061, "lr": 1.3219728879262956e-06, "epoch": 18.128930817610062, "percentage": 90.64, "elapsed_time": "1:42:05", "remaining_time": "0:10:32", "throughput": 3694.82, "total_tokens": 22632920}
|
| 6937 |
+
{"current_steps": 34595, "total_steps": 38160, "loss": 0.3753, "lr": 1.3183063726824375e-06, "epoch": 18.13155136268344, "percentage": 90.66, "elapsed_time": "1:42:06", "remaining_time": "0:10:31", "throughput": 3694.9, "total_tokens": 22636536}
|
| 6938 |
+
{"current_steps": 34600, "total_steps": 38160, "loss": 0.6382, "lr": 1.3146448113787151e-06, "epoch": 18.134171907756812, "percentage": 90.67, "elapsed_time": "1:42:07", "remaining_time": "0:10:30", "throughput": 3694.93, "total_tokens": 22639800}
|
| 6939 |
+
{"current_steps": 34605, "total_steps": 38160, "loss": 0.5246, "lr": 1.3109882047810773e-06, "epoch": 18.13679245283019, "percentage": 90.68, "elapsed_time": "1:42:08", "remaining_time": "0:10:29", "throughput": 3694.88, "total_tokens": 22642520}
|
| 6940 |
+
{"current_steps": 34610, "total_steps": 38160, "loss": 0.3765, "lr": 1.307336553654448e-06, "epoch": 18.139412997903563, "percentage": 90.7, "elapsed_time": "1:42:08", "remaining_time": "0:10:28", "throughput": 3694.92, "total_tokens": 22645784}
|
| 6941 |
+
{"current_steps": 34615, "total_steps": 38160, "loss": 0.3768, "lr": 1.303689858762716e-06, "epoch": 18.14203354297694, "percentage": 90.71, "elapsed_time": "1:42:09", "remaining_time": "0:10:27", "throughput": 3694.86, "total_tokens": 22648664}
|
| 6942 |
+
{"current_steps": 34620, "total_steps": 38160, "loss": 0.5112, "lr": 1.3000481208687188e-06, "epoch": 18.144654088050313, "percentage": 90.72, "elapsed_time": "1:42:10", "remaining_time": "0:10:26", "throughput": 3694.9, "total_tokens": 22651832}
|
| 6943 |
+
{"current_steps": 34625, "total_steps": 38160, "loss": 0.4592, "lr": 1.2964113407342743e-06, "epoch": 18.14727463312369, "percentage": 90.74, "elapsed_time": "1:42:11", "remaining_time": "0:10:25", "throughput": 3694.9, "total_tokens": 22654808}
|
| 6944 |
+
{"current_steps": 34630, "total_steps": 38160, "loss": 0.4465, "lr": 1.292779519120152e-06, "epoch": 18.149895178197063, "percentage": 90.75, "elapsed_time": "1:42:12", "remaining_time": "0:10:25", "throughput": 3694.91, "total_tokens": 22657624}
|
| 6945 |
+
{"current_steps": 34635, "total_steps": 38160, "loss": 0.4212, "lr": 1.2891526567860856e-06, "epoch": 18.15251572327044, "percentage": 90.76, "elapsed_time": "1:42:12", "remaining_time": "0:10:24", "throughput": 3694.83, "total_tokens": 22659960}
|
| 6946 |
+
{"current_steps": 34640, "total_steps": 38160, "loss": 0.4654, "lr": 1.2855307544907825e-06, "epoch": 18.155136268343817, "percentage": 90.78, "elapsed_time": "1:42:14", "remaining_time": "0:10:23", "throughput": 3694.98, "total_tokens": 22665080}
|
| 6947 |
+
{"current_steps": 34645, "total_steps": 38160, "loss": 0.5964, "lr": 1.281913812991889e-06, "epoch": 18.15775681341719, "percentage": 90.79, "elapsed_time": "1:42:14", "remaining_time": "0:10:22", "throughput": 3694.91, "total_tokens": 22667544}
|
| 6948 |
+
{"current_steps": 34650, "total_steps": 38160, "loss": 0.3906, "lr": 1.2783018330460389e-06, "epoch": 18.160377358490567, "percentage": 90.8, "elapsed_time": "1:42:15", "remaining_time": "0:10:21", "throughput": 3694.96, "total_tokens": 22671032}
|
| 6949 |
+
{"current_steps": 34655, "total_steps": 38160, "loss": 0.3558, "lr": 1.2746948154088111e-06, "epoch": 18.16299790356394, "percentage": 90.81, "elapsed_time": "1:42:16", "remaining_time": "0:10:20", "throughput": 3695.08, "total_tokens": 22675416}
|
| 6950 |
+
{"current_steps": 34660, "total_steps": 38160, "loss": 0.4692, "lr": 1.2710927608347605e-06, "epoch": 18.165618448637318, "percentage": 90.83, "elapsed_time": "1:42:17", "remaining_time": "0:10:19", "throughput": 3695.02, "total_tokens": 22678008}
|
| 6951 |
+
{"current_steps": 34665, "total_steps": 38160, "loss": 0.3522, "lr": 1.2674956700773872e-06, "epoch": 18.16823899371069, "percentage": 90.84, "elapsed_time": "1:42:18", "remaining_time": "0:10:18", "throughput": 3694.96, "total_tokens": 22680408}
|
| 6952 |
+
{"current_steps": 34670, "total_steps": 38160, "loss": 0.4688, "lr": 1.2639035438891666e-06, "epoch": 18.170859538784068, "percentage": 90.85, "elapsed_time": "1:42:19", "remaining_time": "0:10:17", "throughput": 3695.14, "total_tokens": 22685176}
|
| 6953 |
+
{"current_steps": 34675, "total_steps": 38160, "loss": 0.2967, "lr": 1.260316383021526e-06, "epoch": 18.17348008385744, "percentage": 90.87, "elapsed_time": "1:42:20", "remaining_time": "0:10:17", "throughput": 3695.07, "total_tokens": 22687960}
|
| 6954 |
+
{"current_steps": 34680, "total_steps": 38160, "loss": 0.2888, "lr": 1.2567341882248645e-06, "epoch": 18.17610062893082, "percentage": 90.88, "elapsed_time": "1:42:20", "remaining_time": "0:10:16", "throughput": 3695.04, "total_tokens": 22690840}
|
| 6955 |
+
{"current_steps": 34685, "total_steps": 38160, "loss": 0.3996, "lr": 1.2531569602485305e-06, "epoch": 18.178721174004192, "percentage": 90.89, "elapsed_time": "1:42:21", "remaining_time": "0:10:15", "throughput": 3695.11, "total_tokens": 22694424}
|
| 6956 |
+
{"current_steps": 34690, "total_steps": 38160, "loss": 0.4265, "lr": 1.2495846998408445e-06, "epoch": 18.18134171907757, "percentage": 90.91, "elapsed_time": "1:42:22", "remaining_time": "0:10:14", "throughput": 3695.15, "total_tokens": 22697976}
|
| 6957 |
+
{"current_steps": 34695, "total_steps": 38160, "loss": 0.5046, "lr": 1.246017407749084e-06, "epoch": 18.183962264150942, "percentage": 90.92, "elapsed_time": "1:42:23", "remaining_time": "0:10:13", "throughput": 3695.18, "total_tokens": 22701176}
|
| 6958 |
+
{"current_steps": 34700, "total_steps": 38160, "loss": 0.4394, "lr": 1.2424550847194739e-06, "epoch": 18.18658280922432, "percentage": 90.93, "elapsed_time": "1:42:24", "remaining_time": "0:10:12", "throughput": 3695.16, "total_tokens": 22703864}
|
| 6959 |
+
{"current_steps": 34705, "total_steps": 38160, "loss": 0.3485, "lr": 1.2388977314972238e-06, "epoch": 18.189203354297693, "percentage": 90.95, "elapsed_time": "1:42:25", "remaining_time": "0:10:11", "throughput": 3695.1, "total_tokens": 22706616}
|
| 6960 |
+
{"current_steps": 34710, "total_steps": 38160, "loss": 0.3958, "lr": 1.235345348826486e-06, "epoch": 18.19182389937107, "percentage": 90.96, "elapsed_time": "1:42:25", "remaining_time": "0:10:10", "throughput": 3695.05, "total_tokens": 22709272}
|
| 6961 |
+
{"current_steps": 34715, "total_steps": 38160, "loss": 0.365, "lr": 1.2317979374503825e-06, "epoch": 18.194444444444443, "percentage": 90.97, "elapsed_time": "1:42:26", "remaining_time": "0:10:09", "throughput": 3695.1, "total_tokens": 22712568}
|
| 6962 |
+
{"current_steps": 34720, "total_steps": 38160, "loss": 0.4084, "lr": 1.228255498110989e-06, "epoch": 18.19706498951782, "percentage": 90.99, "elapsed_time": "1:42:27", "remaining_time": "0:10:09", "throughput": 3695.08, "total_tokens": 22715576}
|
| 6963 |
+
{"current_steps": 34725, "total_steps": 38160, "loss": 0.4953, "lr": 1.224718031549338e-06, "epoch": 18.199685534591197, "percentage": 91.0, "elapsed_time": "1:42:28", "remaining_time": "0:10:08", "throughput": 3695.08, "total_tokens": 22718584}
|
| 6964 |
+
{"current_steps": 34730, "total_steps": 38160, "loss": 0.3806, "lr": 1.2211855385054344e-06, "epoch": 18.20230607966457, "percentage": 91.01, "elapsed_time": "1:42:29", "remaining_time": "0:10:07", "throughput": 3695.19, "total_tokens": 22722456}
|
| 6965 |
+
{"current_steps": 34735, "total_steps": 38160, "loss": 0.4921, "lr": 1.217658019718232e-06, "epoch": 18.204926624737947, "percentage": 91.02, "elapsed_time": "1:42:30", "remaining_time": "0:10:06", "throughput": 3695.27, "total_tokens": 22726392}
|
| 6966 |
+
{"current_steps": 34740, "total_steps": 38160, "loss": 0.446, "lr": 1.2141354759256484e-06, "epoch": 18.20754716981132, "percentage": 91.04, "elapsed_time": "1:42:30", "remaining_time": "0:10:05", "throughput": 3695.31, "total_tokens": 22729560}
|
| 6967 |
+
{"current_steps": 34745, "total_steps": 38160, "loss": 0.4449, "lr": 1.2106179078645641e-06, "epoch": 18.210167714884697, "percentage": 91.05, "elapsed_time": "1:42:31", "remaining_time": "0:10:04", "throughput": 3695.38, "total_tokens": 22733496}
|
| 6968 |
+
{"current_steps": 34750, "total_steps": 38160, "loss": 0.3099, "lr": 1.2071053162708096e-06, "epoch": 18.21278825995807, "percentage": 91.06, "elapsed_time": "1:42:32", "remaining_time": "0:10:03", "throughput": 3695.27, "total_tokens": 22736120}
|
| 6969 |
+
{"current_steps": 34755, "total_steps": 38160, "loss": 0.4647, "lr": 1.203597701879175e-06, "epoch": 18.215408805031448, "percentage": 91.08, "elapsed_time": "1:42:33", "remaining_time": "0:10:02", "throughput": 3695.26, "total_tokens": 22738872}
|
| 6970 |
+
{"current_steps": 34760, "total_steps": 38160, "loss": 0.5077, "lr": 1.2000950654234177e-06, "epoch": 18.21802935010482, "percentage": 91.09, "elapsed_time": "1:42:34", "remaining_time": "0:10:01", "throughput": 3695.21, "total_tokens": 22741336}
|
| 6971 |
+
{"current_steps": 34765, "total_steps": 38160, "loss": 0.418, "lr": 1.1965974076362491e-06, "epoch": 18.220649895178198, "percentage": 91.1, "elapsed_time": "1:42:35", "remaining_time": "0:10:01", "throughput": 3695.29, "total_tokens": 22744632}
|
| 6972 |
+
{"current_steps": 34770, "total_steps": 38160, "loss": 0.5372, "lr": 1.193104729249342e-06, "epoch": 18.22327044025157, "percentage": 91.12, "elapsed_time": "1:42:36", "remaining_time": "0:10:00", "throughput": 3695.4, "total_tokens": 22749048}
|
| 6973 |
+
{"current_steps": 34775, "total_steps": 38160, "loss": 0.4029, "lr": 1.189617030993323e-06, "epoch": 18.22589098532495, "percentage": 91.13, "elapsed_time": "1:42:36", "remaining_time": "0:09:59", "throughput": 3695.52, "total_tokens": 22753048}
|
| 6974 |
+
{"current_steps": 34780, "total_steps": 38160, "loss": 0.4733, "lr": 1.1861343135977753e-06, "epoch": 18.228511530398322, "percentage": 91.14, "elapsed_time": "1:42:37", "remaining_time": "0:09:58", "throughput": 3695.57, "total_tokens": 22756440}
|
| 6975 |
+
{"current_steps": 34785, "total_steps": 38160, "loss": 0.4738, "lr": 1.182656577791244e-06, "epoch": 18.2311320754717, "percentage": 91.16, "elapsed_time": "1:42:38", "remaining_time": "0:09:57", "throughput": 3695.49, "total_tokens": 22759096}
|
| 6976 |
+
{"current_steps": 34790, "total_steps": 38160, "loss": 0.3594, "lr": 1.1791838243012359e-06, "epoch": 18.233752620545072, "percentage": 91.17, "elapsed_time": "1:42:39", "remaining_time": "0:09:56", "throughput": 3695.48, "total_tokens": 22762424}
|
| 6977 |
+
{"current_steps": 34795, "total_steps": 38160, "loss": 0.3997, "lr": 1.1757160538542117e-06, "epoch": 18.23637316561845, "percentage": 91.18, "elapsed_time": "1:42:40", "remaining_time": "0:09:55", "throughput": 3695.38, "total_tokens": 22764600}
|
| 6978 |
+
{"current_steps": 34800, "total_steps": 38160, "loss": 0.4721, "lr": 1.1722532671755882e-06, "epoch": 18.238993710691823, "percentage": 91.19, "elapsed_time": "1:42:41", "remaining_time": "0:09:54", "throughput": 3695.41, "total_tokens": 22767672}
|
| 6979 |
+
{"current_steps": 34805, "total_steps": 38160, "loss": 0.4799, "lr": 1.168795464989736e-06, "epoch": 18.2416142557652, "percentage": 91.21, "elapsed_time": "1:42:41", "remaining_time": "0:09:53", "throughput": 3695.46, "total_tokens": 22770776}
|
| 6980 |
+
{"current_steps": 34810, "total_steps": 38160, "loss": 0.2839, "lr": 1.165342648019993e-06, "epoch": 18.244234800838573, "percentage": 91.22, "elapsed_time": "1:42:42", "remaining_time": "0:09:53", "throughput": 3695.58, "total_tokens": 22774968}
|
| 6981 |
+
{"current_steps": 34815, "total_steps": 38160, "loss": 0.449, "lr": 1.1618948169886457e-06, "epoch": 18.24685534591195, "percentage": 91.23, "elapsed_time": "1:42:43", "remaining_time": "0:09:52", "throughput": 3695.56, "total_tokens": 22778040}
|
| 6982 |
+
{"current_steps": 34820, "total_steps": 38160, "loss": 0.5861, "lr": 1.1584519726169446e-06, "epoch": 18.249475890985323, "percentage": 91.25, "elapsed_time": "1:42:44", "remaining_time": "0:09:51", "throughput": 3695.53, "total_tokens": 22780760}
|
| 6983 |
+
{"current_steps": 34825, "total_steps": 38160, "loss": 0.4031, "lr": 1.1550141156250942e-06, "epoch": 18.2520964360587, "percentage": 91.26, "elapsed_time": "1:42:45", "remaining_time": "0:09:50", "throughput": 3695.5, "total_tokens": 22783480}
|
| 6984 |
+
{"current_steps": 34830, "total_steps": 38160, "loss": 0.4763, "lr": 1.1515812467322556e-06, "epoch": 18.254716981132077, "percentage": 91.27, "elapsed_time": "1:42:46", "remaining_time": "0:09:49", "throughput": 3695.59, "total_tokens": 22787704}
|
| 6985 |
+
{"current_steps": 34835, "total_steps": 38160, "loss": 0.3693, "lr": 1.1481533666565375e-06, "epoch": 18.25733752620545, "percentage": 91.29, "elapsed_time": "1:42:47", "remaining_time": "0:09:48", "throughput": 3695.67, "total_tokens": 22791384}
|
| 6986 |
+
{"current_steps": 34840, "total_steps": 38160, "loss": 0.4622, "lr": 1.144730476115019e-06, "epoch": 18.259958071278827, "percentage": 91.3, "elapsed_time": "1:42:47", "remaining_time": "0:09:47", "throughput": 3695.77, "total_tokens": 22795000}
|
| 6987 |
+
{"current_steps": 34845, "total_steps": 38160, "loss": 0.4056, "lr": 1.1413125758237304e-06, "epoch": 18.2625786163522, "percentage": 91.31, "elapsed_time": "1:42:48", "remaining_time": "0:09:46", "throughput": 3695.8, "total_tokens": 22798072}
|
| 6988 |
+
{"current_steps": 34850, "total_steps": 38160, "loss": 0.432, "lr": 1.1378996664976577e-06, "epoch": 18.265199161425578, "percentage": 91.33, "elapsed_time": "1:42:49", "remaining_time": "0:09:45", "throughput": 3695.84, "total_tokens": 22801240}
|
| 6989 |
+
{"current_steps": 34855, "total_steps": 38160, "loss": 0.4295, "lr": 1.134491748850741e-06, "epoch": 18.26781970649895, "percentage": 91.34, "elapsed_time": "1:42:50", "remaining_time": "0:09:45", "throughput": 3695.84, "total_tokens": 22804056}
|
| 6990 |
+
{"current_steps": 34860, "total_steps": 38160, "loss": 0.4523, "lr": 1.13108882359588e-06, "epoch": 18.270440251572328, "percentage": 91.35, "elapsed_time": "1:42:51", "remaining_time": "0:09:44", "throughput": 3695.88, "total_tokens": 22807480}
|
| 6991 |
+
{"current_steps": 34865, "total_steps": 38160, "loss": 0.4964, "lr": 1.127690891444924e-06, "epoch": 18.2730607966457, "percentage": 91.37, "elapsed_time": "1:42:51", "remaining_time": "0:09:43", "throughput": 3695.9, "total_tokens": 22810648}
|
| 6992 |
+
{"current_steps": 34870, "total_steps": 38160, "loss": 0.4444, "lr": 1.1242979531086829e-06, "epoch": 18.27568134171908, "percentage": 91.38, "elapsed_time": "1:42:52", "remaining_time": "0:09:42", "throughput": 3695.97, "total_tokens": 22814520}
|
| 6993 |
+
{"current_steps": 34875, "total_steps": 38160, "loss": 0.4878, "lr": 1.1209100092969244e-06, "epoch": 18.278301886792452, "percentage": 91.39, "elapsed_time": "1:42:53", "remaining_time": "0:09:41", "throughput": 3696.05, "total_tokens": 22818264}
|
| 6994 |
+
{"current_steps": 34880, "total_steps": 38160, "loss": 0.4227, "lr": 1.1175270607183625e-06, "epoch": 18.28092243186583, "percentage": 91.4, "elapsed_time": "1:42:54", "remaining_time": "0:09:40", "throughput": 3696.03, "total_tokens": 22821208}
|
| 6995 |
+
{"current_steps": 34885, "total_steps": 38160, "loss": 0.4104, "lr": 1.114149108080678e-06, "epoch": 18.283542976939202, "percentage": 91.42, "elapsed_time": "1:42:55", "remaining_time": "0:09:39", "throughput": 3696.01, "total_tokens": 22824696}
|
| 6996 |
+
{"current_steps": 34890, "total_steps": 38160, "loss": 0.3701, "lr": 1.1107761520904925e-06, "epoch": 18.28616352201258, "percentage": 91.43, "elapsed_time": "1:42:56", "remaining_time": "0:09:38", "throughput": 3696.03, "total_tokens": 22828024}
|
| 6997 |
+
{"current_steps": 34895, "total_steps": 38160, "loss": 0.4299, "lr": 1.1074081934533908e-06, "epoch": 18.288784067085953, "percentage": 91.44, "elapsed_time": "1:42:57", "remaining_time": "0:09:37", "throughput": 3695.95, "total_tokens": 22830456}
|
| 6998 |
+
{"current_steps": 34900, "total_steps": 38160, "loss": 0.3751, "lr": 1.1040452328739153e-06, "epoch": 18.29140461215933, "percentage": 91.46, "elapsed_time": "1:42:57", "remaining_time": "0:09:37", "throughput": 3695.98, "total_tokens": 22833592}
|
| 6999 |
+
{"current_steps": 34905, "total_steps": 38160, "loss": 0.4885, "lr": 1.1006872710555615e-06, "epoch": 18.294025157232703, "percentage": 91.47, "elapsed_time": "1:42:58", "remaining_time": "0:09:36", "throughput": 3695.98, "total_tokens": 22836696}
|
| 7000 |
+
{"current_steps": 34910, "total_steps": 38160, "loss": 0.3641, "lr": 1.0973343087007703e-06, "epoch": 18.29664570230608, "percentage": 91.48, "elapsed_time": "1:42:59", "remaining_time": "0:09:35", "throughput": 3696.03, "total_tokens": 22839960}
|
| 7001 |
+
{"current_steps": 34915, "total_steps": 38160, "loss": 0.4526, "lr": 1.0939863465109473e-06, "epoch": 18.299266247379457, "percentage": 91.5, "elapsed_time": "1:43:00", "remaining_time": "0:09:34", "throughput": 3696.04, "total_tokens": 22843000}
|
| 7002 |
+
{"current_steps": 34920, "total_steps": 38160, "loss": 0.3516, "lr": 1.0906433851864433e-06, "epoch": 18.30188679245283, "percentage": 91.51, "elapsed_time": "1:43:01", "remaining_time": "0:09:33", "throughput": 3696.12, "total_tokens": 22847032}
|
| 7003 |
+
{"current_steps": 34925, "total_steps": 38160, "loss": 0.4094, "lr": 1.0873054254265713e-06, "epoch": 18.304507337526207, "percentage": 91.52, "elapsed_time": "1:43:02", "remaining_time": "0:09:32", "throughput": 3696.14, "total_tokens": 22849976}
|
| 7004 |
+
{"current_steps": 34930, "total_steps": 38160, "loss": 0.4162, "lr": 1.0839724679295976e-06, "epoch": 18.30712788259958, "percentage": 91.54, "elapsed_time": "1:43:02", "remaining_time": "0:09:31", "throughput": 3696.1, "total_tokens": 22852888}
|
| 7005 |
+
{"current_steps": 34935, "total_steps": 38160, "loss": 0.3957, "lr": 1.0806445133927311e-06, "epoch": 18.309748427672957, "percentage": 91.55, "elapsed_time": "1:43:03", "remaining_time": "0:09:30", "throughput": 3696.04, "total_tokens": 22855672}
|
| 7006 |
+
{"current_steps": 34940, "total_steps": 38160, "loss": 0.4416, "lr": 1.0773215625121457e-06, "epoch": 18.31236897274633, "percentage": 91.56, "elapsed_time": "1:43:04", "remaining_time": "0:09:29", "throughput": 3696.07, "total_tokens": 22859032}
|
| 7007 |
+
{"current_steps": 34945, "total_steps": 38160, "loss": 0.3566, "lr": 1.0740036159829715e-06, "epoch": 18.314989517819708, "percentage": 91.57, "elapsed_time": "1:43:05", "remaining_time": "0:09:29", "throughput": 3696.15, "total_tokens": 22863160}
|
| 7008 |
+
{"current_steps": 34950, "total_steps": 38160, "loss": 0.4337, "lr": 1.0706906744992724e-06, "epoch": 18.31761006289308, "percentage": 91.59, "elapsed_time": "1:43:06", "remaining_time": "0:09:28", "throughput": 3696.15, "total_tokens": 22866104}
|
| 7009 |
+
{"current_steps": 34955, "total_steps": 38160, "loss": 0.4532, "lr": 1.0673827387540857e-06, "epoch": 18.320230607966458, "percentage": 91.6, "elapsed_time": "1:43:07", "remaining_time": "0:09:27", "throughput": 3696.15, "total_tokens": 22869112}
|
| 7010 |
+
{"current_steps": 34960, "total_steps": 38160, "loss": 0.3681, "lr": 1.0640798094393916e-06, "epoch": 18.32285115303983, "percentage": 91.61, "elapsed_time": "1:43:08", "remaining_time": "0:09:26", "throughput": 3696.17, "total_tokens": 22872184}
|
| 7011 |
+
{"current_steps": 34965, "total_steps": 38160, "loss": 0.3927, "lr": 1.060781887246126e-06, "epoch": 18.32547169811321, "percentage": 91.63, "elapsed_time": "1:43:08", "remaining_time": "0:09:25", "throughput": 3696.32, "total_tokens": 22876376}
|
| 7012 |
+
{"current_steps": 34970, "total_steps": 38160, "loss": 0.3884, "lr": 1.057488972864179e-06, "epoch": 18.328092243186582, "percentage": 91.64, "elapsed_time": "1:43:09", "remaining_time": "0:09:24", "throughput": 3696.38, "total_tokens": 22879640}
|
| 7013 |
+
{"current_steps": 34975, "total_steps": 38160, "loss": 0.5015, "lr": 1.0542010669823855e-06, "epoch": 18.33071278825996, "percentage": 91.65, "elapsed_time": "1:43:10", "remaining_time": "0:09:23", "throughput": 3696.43, "total_tokens": 22883832}
|
| 7014 |
+
{"current_steps": 34980, "total_steps": 38160, "loss": 0.4023, "lr": 1.0509181702885395e-06, "epoch": 18.333333333333332, "percentage": 91.67, "elapsed_time": "1:43:11", "remaining_time": "0:09:22", "throughput": 3696.39, "total_tokens": 22886328}
|
| 7015 |
+
{"current_steps": 34985, "total_steps": 38160, "loss": 0.4594, "lr": 1.047640283469392e-06, "epoch": 18.33595387840671, "percentage": 91.68, "elapsed_time": "1:43:12", "remaining_time": "0:09:21", "throughput": 3696.37, "total_tokens": 22889080}
|
| 7016 |
+
{"current_steps": 34990, "total_steps": 38160, "loss": 0.4589, "lr": 1.0443674072106308e-06, "epoch": 18.338574423480082, "percentage": 91.69, "elapsed_time": "1:43:13", "remaining_time": "0:09:21", "throughput": 3696.31, "total_tokens": 22891576}
|
| 7017 |
+
{"current_steps": 34995, "total_steps": 38160, "loss": 0.6458, "lr": 1.041099542196905e-06, "epoch": 18.34119496855346, "percentage": 91.71, "elapsed_time": "1:43:13", "remaining_time": "0:09:20", "throughput": 3696.36, "total_tokens": 22894808}
|
| 7018 |
+
{"current_steps": 35000, "total_steps": 38160, "loss": 0.4294, "lr": 1.0378366891118234e-06, "epoch": 18.343815513626833, "percentage": 91.72, "elapsed_time": "1:43:14", "remaining_time": "0:09:19", "throughput": 3696.38, "total_tokens": 22897912}
|
| 7019 |
+
{"current_steps": 35005, "total_steps": 38160, "loss": 0.4709, "lr": 1.0345788486379265e-06, "epoch": 18.34643605870021, "percentage": 91.73, "elapsed_time": "1:43:15", "remaining_time": "0:09:18", "throughput": 3696.4, "total_tokens": 22900824}
|
| 7020 |
+
{"current_steps": 35010, "total_steps": 38160, "loss": 0.4792, "lr": 1.0313260214567272e-06, "epoch": 18.349056603773583, "percentage": 91.75, "elapsed_time": "1:43:16", "remaining_time": "0:09:17", "throughput": 3696.44, "total_tokens": 22903864}
|
| 7021 |
+
{"current_steps": 35015, "total_steps": 38160, "loss": 0.3539, "lr": 1.0280782082486729e-06, "epoch": 18.35167714884696, "percentage": 91.76, "elapsed_time": "1:43:16", "remaining_time": "0:09:16", "throughput": 3696.43, "total_tokens": 22906744}
|
| 7022 |
+
{"current_steps": 35020, "total_steps": 38160, "loss": 0.4429, "lr": 1.02483540969317e-06, "epoch": 18.354297693920337, "percentage": 91.77, "elapsed_time": "1:43:17", "remaining_time": "0:09:15", "throughput": 3696.53, "total_tokens": 22910904}
|
| 7023 |
+
{"current_steps": 35025, "total_steps": 38160, "loss": 0.443, "lr": 1.0215976264685763e-06, "epoch": 18.35691823899371, "percentage": 91.78, "elapsed_time": "1:43:18", "remaining_time": "0:09:14", "throughput": 3696.53, "total_tokens": 22914072}
|
| 7024 |
+
{"current_steps": 35030, "total_steps": 38160, "loss": 0.3926, "lr": 1.0183648592522021e-06, "epoch": 18.359538784067087, "percentage": 91.8, "elapsed_time": "1:43:19", "remaining_time": "0:09:13", "throughput": 3696.51, "total_tokens": 22916856}
|
| 7025 |
+
{"current_steps": 35035, "total_steps": 38160, "loss": 0.3706, "lr": 1.0151371087203015e-06, "epoch": 18.36215932914046, "percentage": 91.81, "elapsed_time": "1:43:20", "remaining_time": "0:09:13", "throughput": 3696.62, "total_tokens": 22922008}
|
| 7026 |
+
{"current_steps": 35040, "total_steps": 38160, "loss": 0.4488, "lr": 1.0119143755480815e-06, "epoch": 18.364779874213838, "percentage": 91.82, "elapsed_time": "1:43:21", "remaining_time": "0:09:12", "throughput": 3696.59, "total_tokens": 22924760}
|
| 7027 |
+
{"current_steps": 35045, "total_steps": 38160, "loss": 0.4954, "lr": 1.0086966604097026e-06, "epoch": 18.36740041928721, "percentage": 91.84, "elapsed_time": "1:43:22", "remaining_time": "0:09:11", "throughput": 3696.58, "total_tokens": 22927416}
|
| 7028 |
+
{"current_steps": 35050, "total_steps": 38160, "loss": 0.5084, "lr": 1.0054839639782766e-06, "epoch": 18.370020964360588, "percentage": 91.85, "elapsed_time": "1:43:23", "remaining_time": "0:09:10", "throughput": 3696.56, "total_tokens": 22930200}
|
| 7029 |
+
{"current_steps": 35055, "total_steps": 38160, "loss": 0.4999, "lr": 1.00227628692586e-06, "epoch": 18.37264150943396, "percentage": 91.86, "elapsed_time": "1:43:23", "remaining_time": "0:09:09", "throughput": 3696.6, "total_tokens": 22933592}
|
| 7030 |
+
{"current_steps": 35060, "total_steps": 38160, "loss": 0.3657, "lr": 9.990736299234692e-07, "epoch": 18.37526205450734, "percentage": 91.88, "elapsed_time": "1:43:24", "remaining_time": "0:09:08", "throughput": 3696.58, "total_tokens": 22936312}
|
| 7031 |
+
{"current_steps": 35065, "total_steps": 38160, "loss": 0.4355, "lr": 9.958759936410573e-07, "epoch": 18.377882599580712, "percentage": 91.89, "elapsed_time": "1:43:25", "remaining_time": "0:09:07", "throughput": 3696.6, "total_tokens": 22939352}
|
| 7032 |
+
{"current_steps": 35070, "total_steps": 38160, "loss": 0.5305, "lr": 9.926833787475304e-07, "epoch": 18.38050314465409, "percentage": 91.9, "elapsed_time": "1:43:26", "remaining_time": "0:09:06", "throughput": 3696.65, "total_tokens": 22942552}
|
| 7033 |
+
{"current_steps": 35075, "total_steps": 38160, "loss": 0.3761, "lr": 9.89495785910749e-07, "epoch": 18.383123689727462, "percentage": 91.92, "elapsed_time": "1:43:27", "remaining_time": "0:09:05", "throughput": 3696.67, "total_tokens": 22945592}
|
| 7034 |
+
{"current_steps": 35080, "total_steps": 38160, "loss": 0.401, "lr": 9.86313215797524e-07, "epoch": 18.38574423480084, "percentage": 91.93, "elapsed_time": "1:43:27", "remaining_time": "0:09:05", "throughput": 3696.64, "total_tokens": 22948280}
|
| 7035 |
+
{"current_steps": 35085, "total_steps": 38160, "loss": 0.4577, "lr": 9.831356690736149e-07, "epoch": 18.388364779874212, "percentage": 91.94, "elapsed_time": "1:43:28", "remaining_time": "0:09:04", "throughput": 3696.67, "total_tokens": 22951512}
|
| 7036 |
+
{"current_steps": 35090, "total_steps": 38160, "loss": 0.5174, "lr": 9.799631464037256e-07, "epoch": 18.39098532494759, "percentage": 91.95, "elapsed_time": "1:43:29", "remaining_time": "0:09:03", "throughput": 3696.72, "total_tokens": 22954904}
|
| 7037 |
+
{"current_steps": 35095, "total_steps": 38160, "loss": 0.3794, "lr": 9.76795648451509e-07, "epoch": 18.393605870020963, "percentage": 91.97, "elapsed_time": "1:43:30", "remaining_time": "0:09:02", "throughput": 3696.78, "total_tokens": 22958136}
|
| 7038 |
+
{"current_steps": 35100, "total_steps": 38160, "loss": 0.3579, "lr": 9.73633175879568e-07, "epoch": 18.39622641509434, "percentage": 91.98, "elapsed_time": "1:43:31", "remaining_time": "0:09:01", "throughput": 3696.83, "total_tokens": 22961720}
|
| 7039 |
+
{"current_steps": 35105, "total_steps": 38160, "loss": 0.4002, "lr": 9.704757293494627e-07, "epoch": 18.398846960167713, "percentage": 91.99, "elapsed_time": "1:43:32", "remaining_time": "0:09:00", "throughput": 3696.83, "total_tokens": 22964856}
|
| 7040 |
+
{"current_steps": 35110, "total_steps": 38160, "loss": 0.3884, "lr": 9.673233095216922e-07, "epoch": 18.40146750524109, "percentage": 92.01, "elapsed_time": "1:43:32", "remaining_time": "0:08:59", "throughput": 3696.86, "total_tokens": 22967864}
|
| 7041 |
+
{"current_steps": 35115, "total_steps": 38160, "loss": 0.4312, "lr": 9.641759170557068e-07, "epoch": 18.404088050314467, "percentage": 92.02, "elapsed_time": "1:43:33", "remaining_time": "0:08:58", "throughput": 3696.95, "total_tokens": 22971704}
|
| 7042 |
+
{"current_steps": 35120, "total_steps": 38160, "loss": 0.4685, "lr": 9.610335526099024e-07, "epoch": 18.40670859538784, "percentage": 92.03, "elapsed_time": "1:43:34", "remaining_time": "0:08:57", "throughput": 3697.01, "total_tokens": 22975384}
|
| 7043 |
+
{"current_steps": 35125, "total_steps": 38160, "loss": 0.3963, "lr": 9.578962168416278e-07, "epoch": 18.409329140461217, "percentage": 92.05, "elapsed_time": "1:43:35", "remaining_time": "0:08:57", "throughput": 3697.02, "total_tokens": 22978488}
|
| 7044 |
+
{"current_steps": 35130, "total_steps": 38160, "loss": 0.5307, "lr": 9.547639104071775e-07, "epoch": 18.41194968553459, "percentage": 92.06, "elapsed_time": "1:43:36", "remaining_time": "0:08:56", "throughput": 3697.08, "total_tokens": 22982136}
|
| 7045 |
+
{"current_steps": 35135, "total_steps": 38160, "loss": 0.5078, "lr": 9.516366339617944e-07, "epoch": 18.414570230607968, "percentage": 92.07, "elapsed_time": "1:43:37", "remaining_time": "0:08:55", "throughput": 3697.0, "total_tokens": 22984440}
|
| 7046 |
+
{"current_steps": 35140, "total_steps": 38160, "loss": 0.414, "lr": 9.485143881596714e-07, "epoch": 18.41719077568134, "percentage": 92.09, "elapsed_time": "1:43:37", "remaining_time": "0:08:54", "throughput": 3697.06, "total_tokens": 22987800}
|
| 7047 |
+
{"current_steps": 35145, "total_steps": 38160, "loss": 0.5192, "lr": 9.453971736539447e-07, "epoch": 18.419811320754718, "percentage": 92.1, "elapsed_time": "1:43:38", "remaining_time": "0:08:53", "throughput": 3697.03, "total_tokens": 22990584}
|
| 7048 |
+
{"current_steps": 35150, "total_steps": 38160, "loss": 0.4008, "lr": 9.42284991096698e-07, "epoch": 18.42243186582809, "percentage": 92.11, "elapsed_time": "1:43:39", "remaining_time": "0:08:52", "throughput": 3697.02, "total_tokens": 22994008}
|
| 7049 |
+
{"current_steps": 35155, "total_steps": 38160, "loss": 0.4817, "lr": 9.391778411389634e-07, "epoch": 18.42505241090147, "percentage": 92.13, "elapsed_time": "1:43:40", "remaining_time": "0:08:51", "throughput": 3697.09, "total_tokens": 22998136}
|
| 7050 |
+
{"current_steps": 35160, "total_steps": 38160, "loss": 0.366, "lr": 9.360757244307267e-07, "epoch": 18.427672955974842, "percentage": 92.14, "elapsed_time": "1:43:41", "remaining_time": "0:08:50", "throughput": 3697.02, "total_tokens": 23000568}
|
| 7051 |
+
{"current_steps": 35165, "total_steps": 38160, "loss": 0.3925, "lr": 9.329786416209157e-07, "epoch": 18.43029350104822, "percentage": 92.15, "elapsed_time": "1:43:42", "remaining_time": "0:08:49", "throughput": 3697.06, "total_tokens": 23004056}
|
| 7052 |
+
{"current_steps": 35170, "total_steps": 38160, "loss": 0.5492, "lr": 9.298865933574014e-07, "epoch": 18.432914046121592, "percentage": 92.16, "elapsed_time": "1:43:43", "remaining_time": "0:08:49", "throughput": 3697.15, "total_tokens": 23007640}
|
| 7053 |
+
{"current_steps": 35175, "total_steps": 38160, "loss": 0.3992, "lr": 9.267995802870022e-07, "epoch": 18.43553459119497, "percentage": 92.18, "elapsed_time": "1:43:43", "remaining_time": "0:08:48", "throughput": 3697.14, "total_tokens": 23010488}
|
| 7054 |
+
{"current_steps": 35180, "total_steps": 38160, "loss": 0.4114, "lr": 9.237176030554878e-07, "epoch": 18.438155136268342, "percentage": 92.19, "elapsed_time": "1:43:44", "remaining_time": "0:08:47", "throughput": 3697.26, "total_tokens": 23014552}
|
| 7055 |
+
{"current_steps": 35185, "total_steps": 38160, "loss": 0.5349, "lr": 9.206406623075758e-07, "epoch": 18.44077568134172, "percentage": 92.2, "elapsed_time": "1:43:45", "remaining_time": "0:08:46", "throughput": 3697.39, "total_tokens": 23018840}
|
| 7056 |
+
{"current_steps": 35190, "total_steps": 38160, "loss": 0.6375, "lr": 9.175687586869292e-07, "epoch": 18.443396226415093, "percentage": 92.22, "elapsed_time": "1:43:46", "remaining_time": "0:08:45", "throughput": 3697.45, "total_tokens": 23022520}
|
| 7057 |
+
{"current_steps": 35195, "total_steps": 38160, "loss": 0.575, "lr": 9.145018928361476e-07, "epoch": 18.44601677148847, "percentage": 92.23, "elapsed_time": "1:43:47", "remaining_time": "0:08:44", "throughput": 3697.5, "total_tokens": 23026104}
|
| 7058 |
+
{"current_steps": 35200, "total_steps": 38160, "loss": 0.5943, "lr": 9.114400653967903e-07, "epoch": 18.448637316561843, "percentage": 92.24, "elapsed_time": "1:43:48", "remaining_time": "0:08:43", "throughput": 3697.62, "total_tokens": 23030296}
|
| 7059 |
+
{"current_steps": 35205, "total_steps": 38160, "loss": 0.4562, "lr": 9.083832770093531e-07, "epoch": 18.45125786163522, "percentage": 92.26, "elapsed_time": "1:43:49", "remaining_time": "0:08:42", "throughput": 3697.7, "total_tokens": 23033944}
|
| 7060 |
+
{"current_steps": 35210, "total_steps": 38160, "loss": 0.5731, "lr": 9.053315283132802e-07, "epoch": 18.453878406708597, "percentage": 92.27, "elapsed_time": "1:43:50", "remaining_time": "0:08:41", "throughput": 3697.82, "total_tokens": 23037912}
|
| 7061 |
+
{"current_steps": 35215, "total_steps": 38160, "loss": 0.6219, "lr": 9.022848199469663e-07, "epoch": 18.45649895178197, "percentage": 92.28, "elapsed_time": "1:43:50", "remaining_time": "0:08:41", "throughput": 3697.83, "total_tokens": 23041048}
|
| 7062 |
+
{"current_steps": 35220, "total_steps": 38160, "loss": 0.4171, "lr": 8.992431525477518e-07, "epoch": 18.459119496855347, "percentage": 92.3, "elapsed_time": "1:43:51", "remaining_time": "0:08:40", "throughput": 3697.84, "total_tokens": 23043992}
|
| 7063 |
+
{"current_steps": 35225, "total_steps": 38160, "loss": 0.5147, "lr": 8.962065267519082e-07, "epoch": 18.46174004192872, "percentage": 92.31, "elapsed_time": "1:43:52", "remaining_time": "0:08:39", "throughput": 3697.88, "total_tokens": 23047352}
|
| 7064 |
+
{"current_steps": 35230, "total_steps": 38160, "loss": 0.5947, "lr": 8.931749431946718e-07, "epoch": 18.464360587002098, "percentage": 92.32, "elapsed_time": "1:43:53", "remaining_time": "0:08:38", "throughput": 3697.92, "total_tokens": 23050488}
|
| 7065 |
+
{"current_steps": 35235, "total_steps": 38160, "loss": 0.5069, "lr": 8.901484025102103e-07, "epoch": 18.46698113207547, "percentage": 92.33, "elapsed_time": "1:43:54", "remaining_time": "0:08:37", "throughput": 3697.93, "total_tokens": 23053496}
|
| 7066 |
+
{"current_steps": 35240, "total_steps": 38160, "loss": 0.4267, "lr": 8.871269053316422e-07, "epoch": 18.469601677148848, "percentage": 92.35, "elapsed_time": "1:43:55", "remaining_time": "0:08:36", "throughput": 3697.96, "total_tokens": 23056760}
|
| 7067 |
+
{"current_steps": 35245, "total_steps": 38160, "loss": 0.4775, "lr": 8.841104522910343e-07, "epoch": 18.47222222222222, "percentage": 92.36, "elapsed_time": "1:43:55", "remaining_time": "0:08:35", "throughput": 3697.98, "total_tokens": 23060152}
|
| 7068 |
+
{"current_steps": 35250, "total_steps": 38160, "loss": 0.4349, "lr": 8.8109904401939e-07, "epoch": 18.4748427672956, "percentage": 92.37, "elapsed_time": "1:43:56", "remaining_time": "0:08:34", "throughput": 3697.99, "total_tokens": 23063448}
|
| 7069 |
+
{"current_steps": 35255, "total_steps": 38160, "loss": 0.4615, "lr": 8.780926811466639e-07, "epoch": 18.47746331236897, "percentage": 92.39, "elapsed_time": "1:43:57", "remaining_time": "0:08:33", "throughput": 3698.01, "total_tokens": 23066776}
|
| 7070 |
+
{"current_steps": 35260, "total_steps": 38160, "loss": 0.538, "lr": 8.7509136430175e-07, "epoch": 18.48008385744235, "percentage": 92.4, "elapsed_time": "1:43:58", "remaining_time": "0:08:33", "throughput": 3698.04, "total_tokens": 23069784}
|
| 7071 |
+
{"current_steps": 35265, "total_steps": 38160, "loss": 0.4854, "lr": 8.720950941124877e-07, "epoch": 18.482704402515722, "percentage": 92.41, "elapsed_time": "1:43:59", "remaining_time": "0:08:32", "throughput": 3698.08, "total_tokens": 23072920}
|
| 7072 |
+
{"current_steps": 35270, "total_steps": 38160, "loss": 0.4727, "lr": 8.691038712056732e-07, "epoch": 18.4853249475891, "percentage": 92.43, "elapsed_time": "1:44:00", "remaining_time": "0:08:31", "throughput": 3698.12, "total_tokens": 23076408}
|
| 7073 |
+
{"current_steps": 35275, "total_steps": 38160, "loss": 0.4933, "lr": 8.66117696207025e-07, "epoch": 18.487945492662472, "percentage": 92.44, "elapsed_time": "1:44:00", "remaining_time": "0:08:30", "throughput": 3698.12, "total_tokens": 23079256}
|
| 7074 |
+
{"current_steps": 35280, "total_steps": 38160, "loss": 0.3896, "lr": 8.631365697412186e-07, "epoch": 18.49056603773585, "percentage": 92.45, "elapsed_time": "1:44:01", "remaining_time": "0:08:29", "throughput": 3698.12, "total_tokens": 23082488}
|
| 7075 |
+
{"current_steps": 35285, "total_steps": 38160, "loss": 0.4044, "lr": 8.601604924318802e-07, "epoch": 18.493186582809223, "percentage": 92.47, "elapsed_time": "1:44:03", "remaining_time": "0:08:28", "throughput": 3698.27, "total_tokens": 23089176}
|
| 7076 |
+
{"current_steps": 35290, "total_steps": 38160, "loss": 0.4179, "lr": 8.571894649015588e-07, "epoch": 18.4958071278826, "percentage": 92.48, "elapsed_time": "1:44:04", "remaining_time": "0:08:27", "throughput": 3698.26, "total_tokens": 23091928}
|
| 7077 |
+
{"current_steps": 35295, "total_steps": 38160, "loss": 0.3955, "lr": 8.542234877717686e-07, "epoch": 18.498427672955973, "percentage": 92.49, "elapsed_time": "1:44:04", "remaining_time": "0:08:26", "throughput": 3698.3, "total_tokens": 23095064}
|
| 7078 |
+
{"current_steps": 35300, "total_steps": 38160, "loss": 0.4077, "lr": 8.512625616629549e-07, "epoch": 18.50104821802935, "percentage": 92.51, "elapsed_time": "1:44:05", "remaining_time": "0:08:26", "throughput": 3698.37, "total_tokens": 23098616}
|
| 7079 |
+
{"current_steps": 35305, "total_steps": 38160, "loss": 0.439, "lr": 8.483066871945083e-07, "epoch": 18.503668763102727, "percentage": 92.52, "elapsed_time": "1:44:06", "remaining_time": "0:08:25", "throughput": 3698.39, "total_tokens": 23101784}
|
| 7080 |
+
{"current_steps": 35310, "total_steps": 38160, "loss": 0.3218, "lr": 8.453558649847676e-07, "epoch": 18.5062893081761, "percentage": 92.53, "elapsed_time": "1:44:07", "remaining_time": "0:08:24", "throughput": 3698.48, "total_tokens": 23105816}
|
| 7081 |
+
{"current_steps": 35315, "total_steps": 38160, "loss": 0.2714, "lr": 8.424100956510112e-07, "epoch": 18.508909853249477, "percentage": 92.54, "elapsed_time": "1:44:08", "remaining_time": "0:08:23", "throughput": 3698.57, "total_tokens": 23110232}
|
| 7082 |
+
{"current_steps": 35320, "total_steps": 38160, "loss": 0.3658, "lr": 8.394693798094544e-07, "epoch": 18.51153039832285, "percentage": 92.56, "elapsed_time": "1:44:09", "remaining_time": "0:08:22", "throughput": 3698.63, "total_tokens": 23113624}
|
| 7083 |
+
{"current_steps": 35325, "total_steps": 38160, "loss": 0.4104, "lr": 8.365337180752691e-07, "epoch": 18.514150943396228, "percentage": 92.57, "elapsed_time": "1:44:10", "remaining_time": "0:08:21", "throughput": 3698.7, "total_tokens": 23117176}
|
| 7084 |
+
{"current_steps": 35330, "total_steps": 38160, "loss": 0.4219, "lr": 8.336031110625558e-07, "epoch": 18.5167714884696, "percentage": 92.58, "elapsed_time": "1:44:10", "remaining_time": "0:08:20", "throughput": 3698.69, "total_tokens": 23120472}
|
| 7085 |
+
{"current_steps": 35335, "total_steps": 38160, "loss": 0.3823, "lr": 8.306775593843657e-07, "epoch": 18.519392033542978, "percentage": 92.6, "elapsed_time": "1:44:11", "remaining_time": "0:08:19", "throughput": 3698.75, "total_tokens": 23124344}
|
| 7086 |
+
{"current_steps": 35340, "total_steps": 38160, "loss": 0.4937, "lr": 8.277570636526955e-07, "epoch": 18.52201257861635, "percentage": 92.61, "elapsed_time": "1:44:12", "remaining_time": "0:08:18", "throughput": 3698.78, "total_tokens": 23127608}
|
| 7087 |
+
{"current_steps": 35345, "total_steps": 38160, "loss": 0.4468, "lr": 8.24841624478473e-07, "epoch": 18.52463312368973, "percentage": 92.62, "elapsed_time": "1:44:13", "remaining_time": "0:08:18", "throughput": 3698.77, "total_tokens": 23130552}
|
| 7088 |
+
{"current_steps": 35350, "total_steps": 38160, "loss": 0.5063, "lr": 8.219312424715825e-07, "epoch": 18.5272536687631, "percentage": 92.64, "elapsed_time": "1:44:14", "remaining_time": "0:08:17", "throughput": 3698.82, "total_tokens": 23133784}
|
| 7089 |
+
{"current_steps": 35355, "total_steps": 38160, "loss": 0.4776, "lr": 8.190259182408345e-07, "epoch": 18.52987421383648, "percentage": 92.65, "elapsed_time": "1:44:15", "remaining_time": "0:08:16", "throughput": 3698.95, "total_tokens": 23138072}
|
| 7090 |
+
{"current_steps": 35360, "total_steps": 38160, "loss": 0.4245, "lr": 8.161256523939925e-07, "epoch": 18.532494758909852, "percentage": 92.66, "elapsed_time": "1:44:16", "remaining_time": "0:08:15", "throughput": 3698.97, "total_tokens": 23141080}
|
| 7091 |
+
{"current_steps": 35365, "total_steps": 38160, "loss": 0.5679, "lr": 8.132304455377576e-07, "epoch": 18.53511530398323, "percentage": 92.68, "elapsed_time": "1:44:16", "remaining_time": "0:08:14", "throughput": 3698.97, "total_tokens": 23143960}
|
| 7092 |
+
{"current_steps": 35370, "total_steps": 38160, "loss": 0.5119, "lr": 8.103402982777813e-07, "epoch": 18.537735849056602, "percentage": 92.69, "elapsed_time": "1:44:17", "remaining_time": "0:08:13", "throughput": 3699.04, "total_tokens": 23147608}
|
| 7093 |
+
{"current_steps": 35375, "total_steps": 38160, "loss": 0.425, "lr": 8.07455211218644e-07, "epoch": 18.54035639412998, "percentage": 92.7, "elapsed_time": "1:44:18", "remaining_time": "0:08:12", "throughput": 3699.02, "total_tokens": 23150872}
|
| 7094 |
+
{"current_steps": 35380, "total_steps": 38160, "loss": 0.4668, "lr": 8.045751849638738e-07, "epoch": 18.542976939203353, "percentage": 92.71, "elapsed_time": "1:44:19", "remaining_time": "0:08:11", "throughput": 3699.01, "total_tokens": 23153656}
|
| 7095 |
+
{"current_steps": 35385, "total_steps": 38160, "loss": 0.343, "lr": 8.017002201159363e-07, "epoch": 18.54559748427673, "percentage": 92.73, "elapsed_time": "1:44:20", "remaining_time": "0:08:10", "throughput": 3699.08, "total_tokens": 23157176}
|
| 7096 |
+
{"current_steps": 35390, "total_steps": 38160, "loss": 0.3739, "lr": 7.988303172762418e-07, "epoch": 18.548218029350103, "percentage": 92.74, "elapsed_time": "1:44:21", "remaining_time": "0:08:10", "throughput": 3699.1, "total_tokens": 23160280}
|
| 7097 |
+
{"current_steps": 35395, "total_steps": 38160, "loss": 0.2931, "lr": 7.959654770451463e-07, "epoch": 18.55083857442348, "percentage": 92.75, "elapsed_time": "1:44:22", "remaining_time": "0:08:09", "throughput": 3699.27, "total_tokens": 23166552}
|
| 7098 |
+
{"current_steps": 35400, "total_steps": 38160, "loss": 0.3773, "lr": 7.9310570002194e-07, "epoch": 18.553459119496857, "percentage": 92.77, "elapsed_time": "1:44:23", "remaining_time": "0:08:08", "throughput": 3699.31, "total_tokens": 23170008}
|
| 7099 |
+
{"current_steps": 35405, "total_steps": 38160, "loss": 0.4668, "lr": 7.902509868048552e-07, "epoch": 18.55607966457023, "percentage": 92.78, "elapsed_time": "1:44:24", "remaining_time": "0:08:07", "throughput": 3699.36, "total_tokens": 23173336}
|
| 7100 |
+
{"current_steps": 35410, "total_steps": 38160, "loss": 0.5145, "lr": 7.874013379910616e-07, "epoch": 18.558700209643607, "percentage": 92.79, "elapsed_time": "1:44:25", "remaining_time": "0:08:06", "throughput": 3699.4, "total_tokens": 23177336}
|
| 7101 |
+
{"current_steps": 35415, "total_steps": 38160, "loss": 0.476, "lr": 7.845567541766769e-07, "epoch": 18.56132075471698, "percentage": 92.81, "elapsed_time": "1:44:26", "remaining_time": "0:08:05", "throughput": 3699.43, "total_tokens": 23180632}
|
| 7102 |
+
{"current_steps": 35420, "total_steps": 38160, "loss": 0.4971, "lr": 7.817172359567526e-07, "epoch": 18.563941299790358, "percentage": 92.82, "elapsed_time": "1:44:26", "remaining_time": "0:08:04", "throughput": 3699.4, "total_tokens": 23183192}
|
| 7103 |
+
{"current_steps": 35425, "total_steps": 38160, "loss": 0.3775, "lr": 7.788827839252888e-07, "epoch": 18.56656184486373, "percentage": 92.83, "elapsed_time": "1:44:27", "remaining_time": "0:08:03", "throughput": 3699.33, "total_tokens": 23185912}
|
| 7104 |
+
{"current_steps": 35430, "total_steps": 38160, "loss": 0.6211, "lr": 7.760533986752222e-07, "epoch": 18.569182389937108, "percentage": 92.85, "elapsed_time": "1:44:28", "remaining_time": "0:08:03", "throughput": 3699.31, "total_tokens": 23189368}
|
| 7105 |
+
{"current_steps": 35435, "total_steps": 38160, "loss": 0.4336, "lr": 7.732290807984155e-07, "epoch": 18.57180293501048, "percentage": 92.86, "elapsed_time": "1:44:29", "remaining_time": "0:08:02", "throughput": 3699.36, "total_tokens": 23192728}
|
| 7106 |
+
{"current_steps": 35440, "total_steps": 38160, "loss": 0.5132, "lr": 7.704098308856933e-07, "epoch": 18.57442348008386, "percentage": 92.87, "elapsed_time": "1:44:30", "remaining_time": "0:08:01", "throughput": 3699.34, "total_tokens": 23195640}
|
| 7107 |
+
{"current_steps": 35445, "total_steps": 38160, "loss": 0.5984, "lr": 7.675956495268088e-07, "epoch": 18.57704402515723, "percentage": 92.89, "elapsed_time": "1:44:31", "remaining_time": "0:08:00", "throughput": 3699.26, "total_tokens": 23198200}
|
| 7108 |
+
{"current_steps": 35450, "total_steps": 38160, "loss": 0.3863, "lr": 7.647865373104551e-07, "epoch": 18.57966457023061, "percentage": 92.9, "elapsed_time": "1:44:31", "remaining_time": "0:07:59", "throughput": 3699.18, "total_tokens": 23200760}
|
| 7109 |
+
{"current_steps": 35455, "total_steps": 38160, "loss": 0.4081, "lr": 7.619824948242732e-07, "epoch": 18.582285115303982, "percentage": 92.91, "elapsed_time": "1:44:32", "remaining_time": "0:07:58", "throughput": 3699.26, "total_tokens": 23204280}
|
| 7110 |
+
{"current_steps": 35460, "total_steps": 38160, "loss": 0.5586, "lr": 7.5918352265483e-07, "epoch": 18.58490566037736, "percentage": 92.92, "elapsed_time": "1:44:33", "remaining_time": "0:07:57", "throughput": 3699.34, "total_tokens": 23207800}
|
| 7111 |
+
{"current_steps": 35465, "total_steps": 38160, "loss": 0.3935, "lr": 7.563896213876348e-07, "epoch": 18.587526205450732, "percentage": 92.94, "elapsed_time": "1:44:34", "remaining_time": "0:07:56", "throughput": 3699.33, "total_tokens": 23210648}
|
| 7112 |
+
{"current_steps": 35470, "total_steps": 38160, "loss": 0.3137, "lr": 7.53600791607148e-07, "epoch": 18.59014675052411, "percentage": 92.95, "elapsed_time": "1:44:35", "remaining_time": "0:07:55", "throughput": 3699.34, "total_tokens": 23213752}
|
| 7113 |
+
{"current_steps": 35475, "total_steps": 38160, "loss": 0.3457, "lr": 7.508170338967585e-07, "epoch": 18.592767295597483, "percentage": 92.96, "elapsed_time": "1:44:35", "remaining_time": "0:07:55", "throughput": 3699.33, "total_tokens": 23216408}
|
| 7114 |
+
{"current_steps": 35480, "total_steps": 38160, "loss": 0.4056, "lr": 7.480383488387976e-07, "epoch": 18.59538784067086, "percentage": 92.98, "elapsed_time": "1:44:36", "remaining_time": "0:07:54", "throughput": 3699.37, "total_tokens": 23219480}
|
| 7115 |
+
{"current_steps": 35485, "total_steps": 38160, "loss": 0.5387, "lr": 7.452647370145338e-07, "epoch": 18.598008385744233, "percentage": 92.99, "elapsed_time": "1:44:37", "remaining_time": "0:07:53", "throughput": 3699.38, "total_tokens": 23222872}
|
| 7116 |
+
{"current_steps": 35490, "total_steps": 38160, "loss": 0.5807, "lr": 7.424961990041696e-07, "epoch": 18.60062893081761, "percentage": 93.0, "elapsed_time": "1:44:38", "remaining_time": "0:07:52", "throughput": 3699.48, "total_tokens": 23226584}
|
| 7117 |
+
{"current_steps": 35495, "total_steps": 38160, "loss": 0.445, "lr": 7.397327353868583e-07, "epoch": 18.603249475890987, "percentage": 93.02, "elapsed_time": "1:44:39", "remaining_time": "0:07:51", "throughput": 3699.44, "total_tokens": 23229112}
|
| 7118 |
+
{"current_steps": 35500, "total_steps": 38160, "loss": 0.3234, "lr": 7.369743467406848e-07, "epoch": 18.60587002096436, "percentage": 93.03, "elapsed_time": "1:44:39", "remaining_time": "0:07:50", "throughput": 3699.39, "total_tokens": 23231800}
|
| 7119 |
+
{"current_steps": 35505, "total_steps": 38160, "loss": 0.3985, "lr": 7.34221033642668e-07, "epoch": 18.608490566037737, "percentage": 93.04, "elapsed_time": "1:44:40", "remaining_time": "0:07:49", "throughput": 3699.44, "total_tokens": 23235192}
|
| 7120 |
+
{"current_steps": 35510, "total_steps": 38160, "loss": 0.4381, "lr": 7.314727966687751e-07, "epoch": 18.61111111111111, "percentage": 93.06, "elapsed_time": "1:44:41", "remaining_time": "0:07:48", "throughput": 3699.45, "total_tokens": 23238008}
|
| 7121 |
+
{"current_steps": 35515, "total_steps": 38160, "loss": 0.4998, "lr": 7.287296363939045e-07, "epoch": 18.613731656184488, "percentage": 93.07, "elapsed_time": "1:44:42", "remaining_time": "0:07:47", "throughput": 3699.41, "total_tokens": 23240536}
|
| 7122 |
+
{"current_steps": 35520, "total_steps": 38160, "loss": 0.5748, "lr": 7.259915533918888e-07, "epoch": 18.61635220125786, "percentage": 93.08, "elapsed_time": "1:44:42", "remaining_time": "0:07:46", "throughput": 3699.44, "total_tokens": 23243576}
|
| 7123 |
+
{"current_steps": 35525, "total_steps": 38160, "loss": 0.4372, "lr": 7.232585482355086e-07, "epoch": 18.618972746331238, "percentage": 93.09, "elapsed_time": "1:44:43", "remaining_time": "0:07:46", "throughput": 3699.51, "total_tokens": 23247256}
|
| 7124 |
+
{"current_steps": 35530, "total_steps": 38160, "loss": 0.3352, "lr": 7.205306214964736e-07, "epoch": 18.62159329140461, "percentage": 93.11, "elapsed_time": "1:44:44", "remaining_time": "0:07:45", "throughput": 3699.55, "total_tokens": 23250456}
|
| 7125 |
+
{"current_steps": 35535, "total_steps": 38160, "loss": 0.4192, "lr": 7.178077737454436e-07, "epoch": 18.62421383647799, "percentage": 93.12, "elapsed_time": "1:44:45", "remaining_time": "0:07:44", "throughput": 3699.55, "total_tokens": 23253272}
|
| 7126 |
+
{"current_steps": 35540, "total_steps": 38160, "loss": 0.47, "lr": 7.150900055519993e-07, "epoch": 18.62683438155136, "percentage": 93.13, "elapsed_time": "1:44:46", "remaining_time": "0:07:43", "throughput": 3699.62, "total_tokens": 23256824}
|
| 7127 |
+
{"current_steps": 35545, "total_steps": 38160, "loss": 0.5624, "lr": 7.123773174846693e-07, "epoch": 18.62945492662474, "percentage": 93.15, "elapsed_time": "1:44:47", "remaining_time": "0:07:42", "throughput": 3699.74, "total_tokens": 23261048}
|
| 7128 |
+
{"current_steps": 35550, "total_steps": 38160, "loss": 0.4842, "lr": 7.096697101109134e-07, "epoch": 18.632075471698112, "percentage": 93.16, "elapsed_time": "1:44:48", "remaining_time": "0:07:41", "throughput": 3699.77, "total_tokens": 23264216}
|
| 7129 |
+
{"current_steps": 35555, "total_steps": 38160, "loss": 0.4323, "lr": 7.069671839971371e-07, "epoch": 18.63469601677149, "percentage": 93.17, "elapsed_time": "1:44:48", "remaining_time": "0:07:40", "throughput": 3699.81, "total_tokens": 23267448}
|
| 7130 |
+
{"current_steps": 35560, "total_steps": 38160, "loss": 0.4396, "lr": 7.042697397086795e-07, "epoch": 18.637316561844862, "percentage": 93.19, "elapsed_time": "1:44:49", "remaining_time": "0:07:39", "throughput": 3699.91, "total_tokens": 23271448}
|
| 7131 |
+
{"current_steps": 35565, "total_steps": 38160, "loss": 0.501, "lr": 7.015773778098089e-07, "epoch": 18.63993710691824, "percentage": 93.2, "elapsed_time": "1:44:50", "remaining_time": "0:07:38", "throughput": 3699.92, "total_tokens": 23274296}
|
| 7132 |
+
{"current_steps": 35570, "total_steps": 38160, "loss": 0.3999, "lr": 6.988900988637415e-07, "epoch": 18.642557651991613, "percentage": 93.21, "elapsed_time": "1:44:51", "remaining_time": "0:07:38", "throughput": 3699.84, "total_tokens": 23276664}
|
| 7133 |
+
{"current_steps": 35575, "total_steps": 38160, "loss": 0.4041, "lr": 6.962079034326218e-07, "epoch": 18.64517819706499, "percentage": 93.23, "elapsed_time": "1:44:52", "remaining_time": "0:07:37", "throughput": 3699.94, "total_tokens": 23280632}
|
| 7134 |
+
{"current_steps": 35580, "total_steps": 38160, "loss": 0.307, "lr": 6.935307920775346e-07, "epoch": 18.647798742138363, "percentage": 93.24, "elapsed_time": "1:44:53", "remaining_time": "0:07:36", "throughput": 3700.03, "total_tokens": 23285624}
|
| 7135 |
+
{"current_steps": 35585, "total_steps": 38160, "loss": 0.3964, "lr": 6.90858765358507e-07, "epoch": 18.65041928721174, "percentage": 93.25, "elapsed_time": "1:44:54", "remaining_time": "0:07:35", "throughput": 3700.03, "total_tokens": 23288632}
|
| 7136 |
+
{"current_steps": 35590, "total_steps": 38160, "loss": 0.4267, "lr": 6.881918238344915e-07, "epoch": 18.653039832285117, "percentage": 93.27, "elapsed_time": "1:44:54", "remaining_time": "0:07:34", "throughput": 3700.06, "total_tokens": 23291800}
|
| 7137 |
+
{"current_steps": 35595, "total_steps": 38160, "loss": 0.4329, "lr": 6.855299680633808e-07, "epoch": 18.65566037735849, "percentage": 93.28, "elapsed_time": "1:44:56", "remaining_time": "0:07:33", "throughput": 3700.11, "total_tokens": 23296088}
|
| 7138 |
+
{"current_steps": 35600, "total_steps": 38160, "loss": 0.3788, "lr": 6.828731986020104e-07, "epoch": 18.658280922431867, "percentage": 93.29, "elapsed_time": "1:44:56", "remaining_time": "0:07:32", "throughput": 3700.14, "total_tokens": 23299096}
|
| 7139 |
+
{"current_steps": 35605, "total_steps": 38160, "loss": 0.4889, "lr": 6.802215160061381e-07, "epoch": 18.66090146750524, "percentage": 93.3, "elapsed_time": "1:44:57", "remaining_time": "0:07:31", "throughput": 3700.08, "total_tokens": 23301624}
|
| 7140 |
+
{"current_steps": 35610, "total_steps": 38160, "loss": 0.5239, "lr": 6.775749208304704e-07, "epoch": 18.663522012578618, "percentage": 93.32, "elapsed_time": "1:44:58", "remaining_time": "0:07:31", "throughput": 3700.13, "total_tokens": 23305240}
|
| 7141 |
+
{"current_steps": 35615, "total_steps": 38160, "loss": 0.4099, "lr": 6.749334136286478e-07, "epoch": 18.66614255765199, "percentage": 93.33, "elapsed_time": "1:44:59", "remaining_time": "0:07:30", "throughput": 3700.12, "total_tokens": 23308344}
|
| 7142 |
+
{"current_steps": 35620, "total_steps": 38160, "loss": 0.4972, "lr": 6.722969949532365e-07, "epoch": 18.668763102725368, "percentage": 93.34, "elapsed_time": "1:45:00", "remaining_time": "0:07:29", "throughput": 3700.24, "total_tokens": 23313272}
|
| 7143 |
+
{"current_steps": 35625, "total_steps": 38160, "loss": 0.5377, "lr": 6.69665665355751e-07, "epoch": 18.67138364779874, "percentage": 93.36, "elapsed_time": "1:45:01", "remaining_time": "0:07:28", "throughput": 3700.24, "total_tokens": 23316088}
|
| 7144 |
+
{"current_steps": 35630, "total_steps": 38160, "loss": 0.4086, "lr": 6.670394253866314e-07, "epoch": 18.67400419287212, "percentage": 93.37, "elapsed_time": "1:45:02", "remaining_time": "0:07:27", "throughput": 3700.38, "total_tokens": 23320312}
|
| 7145 |
+
{"current_steps": 35635, "total_steps": 38160, "loss": 0.4022, "lr": 6.644182755952605e-07, "epoch": 18.67662473794549, "percentage": 93.38, "elapsed_time": "1:45:02", "remaining_time": "0:07:26", "throughput": 3700.36, "total_tokens": 23322936}
|
| 7146 |
+
{"current_steps": 35640, "total_steps": 38160, "loss": 0.5261, "lr": 6.618022165299526e-07, "epoch": 18.67924528301887, "percentage": 93.4, "elapsed_time": "1:45:03", "remaining_time": "0:07:25", "throughput": 3700.44, "total_tokens": 23326968}
|
| 7147 |
+
{"current_steps": 35645, "total_steps": 38160, "loss": 0.4589, "lr": 6.591912487379531e-07, "epoch": 18.681865828092242, "percentage": 93.41, "elapsed_time": "1:45:04", "remaining_time": "0:07:24", "throughput": 3700.5, "total_tokens": 23330264}
|
| 7148 |
+
{"current_steps": 35650, "total_steps": 38160, "loss": 0.3765, "lr": 6.565853727654503e-07, "epoch": 18.68448637316562, "percentage": 93.42, "elapsed_time": "1:45:05", "remaining_time": "0:07:23", "throughput": 3700.56, "total_tokens": 23333912}
|
| 7149 |
+
{"current_steps": 35655, "total_steps": 38160, "loss": 0.3713, "lr": 6.539845891575636e-07, "epoch": 18.687106918238992, "percentage": 93.44, "elapsed_time": "1:45:06", "remaining_time": "0:07:23", "throughput": 3700.55, "total_tokens": 23336920}
|
| 7150 |
+
{"current_steps": 35660, "total_steps": 38160, "loss": 0.4963, "lr": 6.513888984583466e-07, "epoch": 18.68972746331237, "percentage": 93.45, "elapsed_time": "1:45:07", "remaining_time": "0:07:22", "throughput": 3700.49, "total_tokens": 23339800}
|
| 7151 |
+
{"current_steps": 35665, "total_steps": 38160, "loss": 0.4064, "lr": 6.487983012107873e-07, "epoch": 18.692348008385743, "percentage": 93.46, "elapsed_time": "1:45:08", "remaining_time": "0:07:21", "throughput": 3700.52, "total_tokens": 23343608}
|
| 7152 |
+
{"current_steps": 35670, "total_steps": 38160, "loss": 0.488, "lr": 6.462127979568133e-07, "epoch": 18.69496855345912, "percentage": 93.47, "elapsed_time": "1:45:08", "remaining_time": "0:07:20", "throughput": 3700.52, "total_tokens": 23346456}
|
| 7153 |
+
{"current_steps": 35675, "total_steps": 38160, "loss": 0.4639, "lr": 6.436323892372781e-07, "epoch": 18.697589098532493, "percentage": 93.49, "elapsed_time": "1:45:09", "remaining_time": "0:07:19", "throughput": 3700.51, "total_tokens": 23349336}
|
| 7154 |
+
{"current_steps": 35680, "total_steps": 38160, "loss": 0.3661, "lr": 6.410570755919721e-07, "epoch": 18.70020964360587, "percentage": 93.5, "elapsed_time": "1:45:10", "remaining_time": "0:07:18", "throughput": 3700.52, "total_tokens": 23352344}
|
| 7155 |
+
{"current_steps": 35685, "total_steps": 38160, "loss": 0.3513, "lr": 6.384868575596281e-07, "epoch": 18.702830188679247, "percentage": 93.51, "elapsed_time": "1:45:11", "remaining_time": "0:07:17", "throughput": 3700.44, "total_tokens": 23355064}
|
| 7156 |
+
{"current_steps": 35690, "total_steps": 38160, "loss": 0.4394, "lr": 6.359217356779024e-07, "epoch": 18.70545073375262, "percentage": 93.53, "elapsed_time": "1:45:12", "remaining_time": "0:07:16", "throughput": 3700.52, "total_tokens": 23358680}
|
| 7157 |
+
{"current_steps": 35695, "total_steps": 38160, "loss": 0.3622, "lr": 6.333617104833878e-07, "epoch": 18.708071278825997, "percentage": 93.54, "elapsed_time": "1:45:13", "remaining_time": "0:07:15", "throughput": 3700.47, "total_tokens": 23361496}
|
| 7158 |
+
{"current_steps": 35700, "total_steps": 38160, "loss": 0.5159, "lr": 6.308067825116142e-07, "epoch": 18.71069182389937, "percentage": 93.55, "elapsed_time": "1:45:14", "remaining_time": "0:07:15", "throughput": 3700.62, "total_tokens": 23366264}
|
| 7159 |
+
{"current_steps": 35705, "total_steps": 38160, "loss": 0.4328, "lr": 6.282569522970433e-07, "epoch": 18.713312368972748, "percentage": 93.57, "elapsed_time": "1:45:14", "remaining_time": "0:07:14", "throughput": 3700.56, "total_tokens": 23368888}
|
| 7160 |
+
{"current_steps": 35710, "total_steps": 38160, "loss": 0.3771, "lr": 6.257122203730731e-07, "epoch": 18.71593291404612, "percentage": 93.58, "elapsed_time": "1:45:15", "remaining_time": "0:07:13", "throughput": 3700.6, "total_tokens": 23372216}
|
| 7161 |
+
{"current_steps": 35715, "total_steps": 38160, "loss": 0.4463, "lr": 6.231725872720251e-07, "epoch": 18.718553459119498, "percentage": 93.59, "elapsed_time": "1:45:16", "remaining_time": "0:07:12", "throughput": 3700.7, "total_tokens": 23376248}
|
| 7162 |
+
{"current_steps": 35720, "total_steps": 38160, "loss": 0.4255, "lr": 6.206380535251716e-07, "epoch": 18.72117400419287, "percentage": 93.61, "elapsed_time": "1:45:17", "remaining_time": "0:07:11", "throughput": 3700.71, "total_tokens": 23379544}
|
| 7163 |
+
{"current_steps": 35725, "total_steps": 38160, "loss": 0.5164, "lr": 6.181086196626995e-07, "epoch": 18.72379454926625, "percentage": 93.62, "elapsed_time": "1:45:18", "remaining_time": "0:07:10", "throughput": 3700.82, "total_tokens": 23383576}
|
| 7164 |
+
{"current_steps": 35730, "total_steps": 38160, "loss": 0.5373, "lr": 6.155842862137384e-07, "epoch": 18.72641509433962, "percentage": 93.63, "elapsed_time": "1:45:19", "remaining_time": "0:07:09", "throughput": 3700.92, "total_tokens": 23387352}
|
| 7165 |
+
{"current_steps": 35735, "total_steps": 38160, "loss": 0.2946, "lr": 6.130650537063548e-07, "epoch": 18.729035639413, "percentage": 93.65, "elapsed_time": "1:45:20", "remaining_time": "0:07:08", "throughput": 3700.98, "total_tokens": 23390808}
|
| 7166 |
+
{"current_steps": 35740, "total_steps": 38160, "loss": 0.392, "lr": 6.105509226675438e-07, "epoch": 18.731656184486372, "percentage": 93.66, "elapsed_time": "1:45:20", "remaining_time": "0:07:08", "throughput": 3700.93, "total_tokens": 23393432}
|
| 7167 |
+
{"current_steps": 35745, "total_steps": 38160, "loss": 0.3136, "lr": 6.08041893623229e-07, "epoch": 18.73427672955975, "percentage": 93.67, "elapsed_time": "1:45:21", "remaining_time": "0:07:07", "throughput": 3700.93, "total_tokens": 23396536}
|
| 7168 |
+
{"current_steps": 35750, "total_steps": 38160, "loss": 0.2998, "lr": 6.055379670982713e-07, "epoch": 18.736897274633122, "percentage": 93.68, "elapsed_time": "1:45:22", "remaining_time": "0:07:06", "throughput": 3700.92, "total_tokens": 23399416}
|
| 7169 |
+
{"current_steps": 35755, "total_steps": 38160, "loss": 0.3901, "lr": 6.030391436164628e-07, "epoch": 18.7395178197065, "percentage": 93.7, "elapsed_time": "1:45:23", "remaining_time": "0:07:05", "throughput": 3700.92, "total_tokens": 23402392}
|
| 7170 |
+
{"current_steps": 35760, "total_steps": 38160, "loss": 0.369, "lr": 6.005454237005298e-07, "epoch": 18.742138364779873, "percentage": 93.71, "elapsed_time": "1:45:24", "remaining_time": "0:07:04", "throughput": 3700.86, "total_tokens": 23404792}
|
| 7171 |
+
{"current_steps": 35765, "total_steps": 38160, "loss": 0.3559, "lr": 5.980568078721327e-07, "epoch": 18.74475890985325, "percentage": 93.72, "elapsed_time": "1:45:25", "remaining_time": "0:07:03", "throughput": 3700.91, "total_tokens": 23408472}
|
| 7172 |
+
{"current_steps": 35770, "total_steps": 38160, "loss": 0.4075, "lr": 5.955732966518606e-07, "epoch": 18.747379454926623, "percentage": 93.74, "elapsed_time": "1:45:25", "remaining_time": "0:07:02", "throughput": 3700.95, "total_tokens": 23411768}
|
| 7173 |
+
{"current_steps": 35775, "total_steps": 38160, "loss": 0.4719, "lr": 5.930948905592343e-07, "epoch": 18.75, "percentage": 93.75, "elapsed_time": "1:45:26", "remaining_time": "0:07:01", "throughput": 3700.99, "total_tokens": 23415128}
|
| 7174 |
+
{"current_steps": 35780, "total_steps": 38160, "loss": 0.418, "lr": 5.906215901127054e-07, "epoch": 18.752620545073377, "percentage": 93.76, "elapsed_time": "1:45:27", "remaining_time": "0:07:00", "throughput": 3700.97, "total_tokens": 23418008}
|
| 7175 |
+
{"current_steps": 35785, "total_steps": 38160, "loss": 0.4349, "lr": 5.881533958296631e-07, "epoch": 18.75524109014675, "percentage": 93.78, "elapsed_time": "1:45:28", "remaining_time": "0:07:00", "throughput": 3700.93, "total_tokens": 23420888}
|
| 7176 |
+
{"current_steps": 35790, "total_steps": 38160, "loss": 0.4763, "lr": 5.856903082264248e-07, "epoch": 18.757861635220127, "percentage": 93.79, "elapsed_time": "1:45:29", "remaining_time": "0:06:59", "throughput": 3700.82, "total_tokens": 23423128}
|
| 7177 |
+
{"current_steps": 35795, "total_steps": 38160, "loss": 0.4603, "lr": 5.832323278182422e-07, "epoch": 18.7604821802935, "percentage": 93.8, "elapsed_time": "1:45:30", "remaining_time": "0:06:58", "throughput": 3700.8, "total_tokens": 23426136}
|
| 7178 |
+
{"current_steps": 35800, "total_steps": 38160, "loss": 0.4457, "lr": 5.807794551192986e-07, "epoch": 18.763102725366878, "percentage": 93.82, "elapsed_time": "1:45:30", "remaining_time": "0:06:57", "throughput": 3700.83, "total_tokens": 23429144}
|
| 7179 |
+
{"current_steps": 35805, "total_steps": 38160, "loss": 0.4195, "lr": 5.783316906427e-07, "epoch": 18.76572327044025, "percentage": 93.83, "elapsed_time": "1:45:31", "remaining_time": "0:06:56", "throughput": 3700.85, "total_tokens": 23432216}
|
| 7180 |
+
{"current_steps": 35810, "total_steps": 38160, "loss": 0.3945, "lr": 5.758890349004925e-07, "epoch": 18.768343815513628, "percentage": 93.84, "elapsed_time": "1:45:32", "remaining_time": "0:06:55", "throughput": 3700.86, "total_tokens": 23435000}
|
| 7181 |
+
{"current_steps": 35815, "total_steps": 38160, "loss": 0.3441, "lr": 5.734514884036563e-07, "epoch": 18.770964360587, "percentage": 93.85, "elapsed_time": "1:45:33", "remaining_time": "0:06:54", "throughput": 3700.83, "total_tokens": 23438008}
|
| 7182 |
+
{"current_steps": 35820, "total_steps": 38160, "loss": 0.5925, "lr": 5.710190516620945e-07, "epoch": 18.77358490566038, "percentage": 93.87, "elapsed_time": "1:45:33", "remaining_time": "0:06:53", "throughput": 3700.75, "total_tokens": 23440504}
|
| 7183 |
+
{"current_steps": 35825, "total_steps": 38160, "loss": 0.4645, "lr": 5.685917251846501e-07, "epoch": 18.77620545073375, "percentage": 93.88, "elapsed_time": "1:45:34", "remaining_time": "0:06:52", "throughput": 3700.7, "total_tokens": 23443096}
|
| 7184 |
+
{"current_steps": 35830, "total_steps": 38160, "loss": 0.3967, "lr": 5.661695094790892e-07, "epoch": 18.77882599580713, "percentage": 93.89, "elapsed_time": "1:45:35", "remaining_time": "0:06:51", "throughput": 3700.78, "total_tokens": 23446392}
|
| 7185 |
+
{"current_steps": 35835, "total_steps": 38160, "loss": 0.3512, "lr": 5.637524050521064e-07, "epoch": 18.781446540880502, "percentage": 93.91, "elapsed_time": "1:45:36", "remaining_time": "0:06:51", "throughput": 3700.74, "total_tokens": 23449208}
|
| 7186 |
+
{"current_steps": 35840, "total_steps": 38160, "loss": 0.4228, "lr": 5.613404124093391e-07, "epoch": 18.78406708595388, "percentage": 93.92, "elapsed_time": "1:45:37", "remaining_time": "0:06:50", "throughput": 3700.7, "total_tokens": 23452376}
|
| 7187 |
+
{"current_steps": 35845, "total_steps": 38160, "loss": 0.3403, "lr": 5.589335320553446e-07, "epoch": 18.786687631027252, "percentage": 93.93, "elapsed_time": "1:45:38", "remaining_time": "0:06:49", "throughput": 3700.77, "total_tokens": 23455768}
|
| 7188 |
+
{"current_steps": 35850, "total_steps": 38160, "loss": 0.4138, "lr": 5.565317644936202e-07, "epoch": 18.78930817610063, "percentage": 93.95, "elapsed_time": "1:45:38", "remaining_time": "0:06:48", "throughput": 3700.75, "total_tokens": 23458680}
|
| 7189 |
+
{"current_steps": 35855, "total_steps": 38160, "loss": 0.4506, "lr": 5.541351102265863e-07, "epoch": 18.791928721174003, "percentage": 93.96, "elapsed_time": "1:45:39", "remaining_time": "0:06:47", "throughput": 3700.83, "total_tokens": 23462456}
|
| 7190 |
+
{"current_steps": 35860, "total_steps": 38160, "loss": 0.5138, "lr": 5.51743569755589e-07, "epoch": 18.79454926624738, "percentage": 93.97, "elapsed_time": "1:45:40", "remaining_time": "0:06:46", "throughput": 3700.88, "total_tokens": 23465656}
|
| 7191 |
+
{"current_steps": 35865, "total_steps": 38160, "loss": 0.5377, "lr": 5.493571435809197e-07, "epoch": 18.797169811320753, "percentage": 93.99, "elapsed_time": "1:45:41", "remaining_time": "0:06:45", "throughput": 3700.85, "total_tokens": 23468472}
|
| 7192 |
+
{"current_steps": 35870, "total_steps": 38160, "loss": 0.4005, "lr": 5.469758322017849e-07, "epoch": 18.79979035639413, "percentage": 94.0, "elapsed_time": "1:45:42", "remaining_time": "0:06:44", "throughput": 3700.89, "total_tokens": 23472248}
|
| 7193 |
+
{"current_steps": 35875, "total_steps": 38160, "loss": 0.3645, "lr": 5.445996361163358e-07, "epoch": 18.802410901467507, "percentage": 94.01, "elapsed_time": "1:45:43", "remaining_time": "0:06:44", "throughput": 3700.82, "total_tokens": 23474680}
|
| 7194 |
+
{"current_steps": 35880, "total_steps": 38160, "loss": 0.523, "lr": 5.422285558216389e-07, "epoch": 18.80503144654088, "percentage": 94.03, "elapsed_time": "1:45:43", "remaining_time": "0:06:43", "throughput": 3700.78, "total_tokens": 23477368}
|
| 7195 |
+
{"current_steps": 35885, "total_steps": 38160, "loss": 0.4943, "lr": 5.398625918137001e-07, "epoch": 18.807651991614257, "percentage": 94.04, "elapsed_time": "1:45:44", "remaining_time": "0:06:42", "throughput": 3700.85, "total_tokens": 23481112}
|
| 7196 |
+
{"current_steps": 35890, "total_steps": 38160, "loss": 0.4554, "lr": 5.375017445874486e-07, "epoch": 18.81027253668763, "percentage": 94.05, "elapsed_time": "1:45:45", "remaining_time": "0:06:41", "throughput": 3700.82, "total_tokens": 23484088}
|
| 7197 |
+
{"current_steps": 35895, "total_steps": 38160, "loss": 0.4117, "lr": 5.351460146367504e-07, "epoch": 18.812893081761008, "percentage": 94.06, "elapsed_time": "1:45:46", "remaining_time": "0:06:40", "throughput": 3700.85, "total_tokens": 23487288}
|
| 7198 |
+
{"current_steps": 35900, "total_steps": 38160, "loss": 0.4713, "lr": 5.327954024543946e-07, "epoch": 18.81551362683438, "percentage": 94.08, "elapsed_time": "1:45:47", "remaining_time": "0:06:39", "throughput": 3700.89, "total_tokens": 23490680}
|
| 7199 |
+
{"current_steps": 35905, "total_steps": 38160, "loss": 0.4636, "lr": 5.304499085321074e-07, "epoch": 18.818134171907758, "percentage": 94.09, "elapsed_time": "1:45:48", "remaining_time": "0:06:38", "throughput": 3700.82, "total_tokens": 23493144}
|
| 7200 |
+
{"current_steps": 35910, "total_steps": 38160, "loss": 0.4938, "lr": 5.281095333605352e-07, "epoch": 18.82075471698113, "percentage": 94.1, "elapsed_time": "1:45:48", "remaining_time": "0:06:37", "throughput": 3700.77, "total_tokens": 23495608}
|
| 7201 |
+
{"current_steps": 35915, "total_steps": 38160, "loss": 0.3774, "lr": 5.257742774292556e-07, "epoch": 18.82337526205451, "percentage": 94.12, "elapsed_time": "1:45:49", "remaining_time": "0:06:36", "throughput": 3700.83, "total_tokens": 23499544}
|
| 7202 |
+
{"current_steps": 35920, "total_steps": 38160, "loss": 0.4009, "lr": 5.234441412267837e-07, "epoch": 18.82599580712788, "percentage": 94.13, "elapsed_time": "1:45:50", "remaining_time": "0:06:36", "throughput": 3700.8, "total_tokens": 23502168}
|
| 7203 |
+
{"current_steps": 35925, "total_steps": 38160, "loss": 0.4489, "lr": 5.211191252405517e-07, "epoch": 18.82861635220126, "percentage": 94.14, "elapsed_time": "1:45:51", "remaining_time": "0:06:35", "throughput": 3700.88, "total_tokens": 23505816}
|
| 7204 |
+
{"current_steps": 35930, "total_steps": 38160, "loss": 0.4625, "lr": 5.187992299569316e-07, "epoch": 18.831236897274632, "percentage": 94.16, "elapsed_time": "1:45:52", "remaining_time": "0:06:34", "throughput": 3700.93, "total_tokens": 23509176}
|
| 7205 |
+
{"current_steps": 35935, "total_steps": 38160, "loss": 0.4875, "lr": 5.164844558612131e-07, "epoch": 18.83385744234801, "percentage": 94.17, "elapsed_time": "1:45:53", "remaining_time": "0:06:33", "throughput": 3700.92, "total_tokens": 23512024}
|
| 7206 |
+
{"current_steps": 35940, "total_steps": 38160, "loss": 0.5067, "lr": 5.141748034376254e-07, "epoch": 18.836477987421382, "percentage": 94.18, "elapsed_time": "1:45:53", "remaining_time": "0:06:32", "throughput": 3700.88, "total_tokens": 23514552}
|
| 7207 |
+
{"current_steps": 35945, "total_steps": 38160, "loss": 0.5094, "lr": 5.118702731693209e-07, "epoch": 18.83909853249476, "percentage": 94.2, "elapsed_time": "1:45:54", "remaining_time": "0:06:31", "throughput": 3700.89, "total_tokens": 23517528}
|
| 7208 |
+
{"current_steps": 35950, "total_steps": 38160, "loss": 0.4128, "lr": 5.095708655383751e-07, "epoch": 18.841719077568133, "percentage": 94.21, "elapsed_time": "1:45:55", "remaining_time": "0:06:30", "throughput": 3700.96, "total_tokens": 23521272}
|
| 7209 |
+
{"current_steps": 35955, "total_steps": 38160, "loss": 0.4743, "lr": 5.07276581025809e-07, "epoch": 18.84433962264151, "percentage": 94.22, "elapsed_time": "1:45:56", "remaining_time": "0:06:29", "throughput": 3701.01, "total_tokens": 23524632}
|
| 7210 |
+
{"current_steps": 35960, "total_steps": 38160, "loss": 0.4021, "lr": 5.049874201115495e-07, "epoch": 18.846960167714883, "percentage": 94.23, "elapsed_time": "1:45:57", "remaining_time": "0:06:28", "throughput": 3701.16, "total_tokens": 23531160}
|
| 7211 |
+
{"current_steps": 35965, "total_steps": 38160, "loss": 0.5249, "lr": 5.027033832744693e-07, "epoch": 18.84958071278826, "percentage": 94.25, "elapsed_time": "1:45:58", "remaining_time": "0:06:28", "throughput": 3701.17, "total_tokens": 23534104}
|
| 7212 |
+
{"current_steps": 35970, "total_steps": 38160, "loss": 0.4564, "lr": 5.004244709923639e-07, "epoch": 18.852201257861637, "percentage": 94.26, "elapsed_time": "1:45:59", "remaining_time": "0:06:27", "throughput": 3701.13, "total_tokens": 23536856}
|
| 7213 |
+
{"current_steps": 35975, "total_steps": 38160, "loss": 0.5829, "lr": 4.981506837419519e-07, "epoch": 18.85482180293501, "percentage": 94.27, "elapsed_time": "1:46:00", "remaining_time": "0:06:26", "throughput": 3701.17, "total_tokens": 23540152}
|
| 7214 |
+
{"current_steps": 35980, "total_steps": 38160, "loss": 0.4906, "lr": 4.958820219988836e-07, "epoch": 18.857442348008387, "percentage": 94.29, "elapsed_time": "1:46:01", "remaining_time": "0:06:25", "throughput": 3701.22, "total_tokens": 23543640}
|
| 7215 |
+
{"current_steps": 35985, "total_steps": 38160, "loss": 0.4443, "lr": 4.936184862377402e-07, "epoch": 18.86006289308176, "percentage": 94.3, "elapsed_time": "1:46:01", "remaining_time": "0:06:24", "throughput": 3701.27, "total_tokens": 23547064}
|
| 7216 |
+
{"current_steps": 35990, "total_steps": 38160, "loss": 0.5028, "lr": 4.913600769320237e-07, "epoch": 18.862683438155138, "percentage": 94.31, "elapsed_time": "1:46:02", "remaining_time": "0:06:23", "throughput": 3701.28, "total_tokens": 23549944}
|
| 7217 |
+
{"current_steps": 35995, "total_steps": 38160, "loss": 0.4863, "lr": 4.891067945541728e-07, "epoch": 18.86530398322851, "percentage": 94.33, "elapsed_time": "1:46:03", "remaining_time": "0:06:22", "throughput": 3701.35, "total_tokens": 23553560}
|
| 7218 |
+
{"current_steps": 36000, "total_steps": 38160, "loss": 0.4641, "lr": 4.868586395755437e-07, "epoch": 18.867924528301888, "percentage": 94.34, "elapsed_time": "1:46:04", "remaining_time": "0:06:21", "throughput": 3701.37, "total_tokens": 23556504}
|
| 7219 |
+
{"current_steps": 36005, "total_steps": 38160, "loss": 0.5846, "lr": 4.846156124664242e-07, "epoch": 18.87054507337526, "percentage": 94.35, "elapsed_time": "1:46:05", "remaining_time": "0:06:20", "throughput": 3701.45, "total_tokens": 23560376}
|
| 7220 |
+
{"current_steps": 36010, "total_steps": 38160, "loss": 0.3293, "lr": 4.823777136960362e-07, "epoch": 18.87316561844864, "percentage": 94.37, "elapsed_time": "1:46:05", "remaining_time": "0:06:20", "throughput": 3701.42, "total_tokens": 23563064}
|
| 7221 |
+
{"current_steps": 36015, "total_steps": 38160, "loss": 0.3912, "lr": 4.801449437325162e-07, "epoch": 18.87578616352201, "percentage": 94.38, "elapsed_time": "1:46:06", "remaining_time": "0:06:19", "throughput": 3701.47, "total_tokens": 23566456}
|
| 7222 |
+
{"current_steps": 36020, "total_steps": 38160, "loss": 0.3739, "lr": 4.779173030429379e-07, "epoch": 18.87840670859539, "percentage": 94.39, "elapsed_time": "1:46:07", "remaining_time": "0:06:18", "throughput": 3701.55, "total_tokens": 23569976}
|
| 7223 |
+
{"current_steps": 36025, "total_steps": 38160, "loss": 0.5555, "lr": 4.7569479209329804e-07, "epoch": 18.881027253668762, "percentage": 94.41, "elapsed_time": "1:46:08", "remaining_time": "0:06:17", "throughput": 3701.54, "total_tokens": 23573368}
|
| 7224 |
+
{"current_steps": 36030, "total_steps": 38160, "loss": 0.3739, "lr": 4.734774113485163e-07, "epoch": 18.88364779874214, "percentage": 94.42, "elapsed_time": "1:46:09", "remaining_time": "0:06:16", "throughput": 3701.6, "total_tokens": 23576952}
|
| 7225 |
+
{"current_steps": 36035, "total_steps": 38160, "loss": 0.4276, "lr": 4.7126516127244945e-07, "epoch": 18.886268343815512, "percentage": 94.43, "elapsed_time": "1:46:10", "remaining_time": "0:06:15", "throughput": 3701.61, "total_tokens": 23580248}
|
| 7226 |
+
{"current_steps": 36040, "total_steps": 38160, "loss": 0.3557, "lr": 4.690580423278718e-07, "epoch": 18.88888888888889, "percentage": 94.44, "elapsed_time": "1:46:11", "remaining_time": "0:06:14", "throughput": 3701.59, "total_tokens": 23583128}
|
| 7227 |
+
{"current_steps": 36045, "total_steps": 38160, "loss": 0.4854, "lr": 4.668560549764861e-07, "epoch": 18.891509433962263, "percentage": 94.46, "elapsed_time": "1:46:11", "remaining_time": "0:06:13", "throughput": 3701.61, "total_tokens": 23586104}
|
| 7228 |
+
{"current_steps": 36050, "total_steps": 38160, "loss": 0.5109, "lr": 4.646591996789268e-07, "epoch": 18.89412997903564, "percentage": 94.47, "elapsed_time": "1:46:12", "remaining_time": "0:06:12", "throughput": 3701.63, "total_tokens": 23589400}
|
| 7229 |
+
{"current_steps": 36055, "total_steps": 38160, "loss": 0.5031, "lr": 4.6246747689474847e-07, "epoch": 18.896750524109013, "percentage": 94.48, "elapsed_time": "1:46:13", "remaining_time": "0:06:12", "throughput": 3701.62, "total_tokens": 23592184}
|
| 7230 |
+
{"current_steps": 36060, "total_steps": 38160, "loss": 0.5069, "lr": 4.6028088708243434e-07, "epoch": 18.89937106918239, "percentage": 94.5, "elapsed_time": "1:46:14", "remaining_time": "0:06:11", "throughput": 3701.63, "total_tokens": 23595064}
|
| 7231 |
+
{"current_steps": 36065, "total_steps": 38160, "loss": 0.3783, "lr": 4.580994306993991e-07, "epoch": 18.901991614255767, "percentage": 94.51, "elapsed_time": "1:46:14", "remaining_time": "0:06:10", "throughput": 3701.57, "total_tokens": 23597432}
|
| 7232 |
+
{"current_steps": 36070, "total_steps": 38160, "loss": 0.4672, "lr": 4.5592310820196936e-07, "epoch": 18.90461215932914, "percentage": 94.52, "elapsed_time": "1:46:15", "remaining_time": "0:06:09", "throughput": 3701.6, "total_tokens": 23600600}
|
| 7233 |
+
{"current_steps": 36075, "total_steps": 38160, "loss": 0.4487, "lr": 4.537519200454116e-07, "epoch": 18.907232704402517, "percentage": 94.54, "elapsed_time": "1:46:16", "remaining_time": "0:06:08", "throughput": 3701.67, "total_tokens": 23604440}
|
| 7234 |
+
{"current_steps": 36080, "total_steps": 38160, "loss": 0.5593, "lr": 4.515858666839151e-07, "epoch": 18.90985324947589, "percentage": 94.55, "elapsed_time": "1:46:17", "remaining_time": "0:06:07", "throughput": 3701.7, "total_tokens": 23607736}
|
| 7235 |
+
{"current_steps": 36085, "total_steps": 38160, "loss": 0.5197, "lr": 4.494249485705926e-07, "epoch": 18.912473794549268, "percentage": 94.56, "elapsed_time": "1:46:19", "remaining_time": "0:06:06", "throughput": 3701.89, "total_tokens": 23615096}
|
| 7236 |
+
{"current_steps": 36090, "total_steps": 38160, "loss": 0.3672, "lr": 4.472691661574824e-07, "epoch": 18.91509433962264, "percentage": 94.58, "elapsed_time": "1:46:20", "remaining_time": "0:06:05", "throughput": 3701.95, "total_tokens": 23618584}
|
| 7237 |
+
{"current_steps": 36095, "total_steps": 38160, "loss": 0.5941, "lr": 4.4511851989555156e-07, "epoch": 18.917714884696018, "percentage": 94.59, "elapsed_time": "1:46:20", "remaining_time": "0:06:05", "throughput": 3702.06, "total_tokens": 23622808}
|
| 7238 |
+
{"current_steps": 36100, "total_steps": 38160, "loss": 0.4686, "lr": 4.429730102346874e-07, "epoch": 18.92033542976939, "percentage": 94.6, "elapsed_time": "1:46:21", "remaining_time": "0:06:04", "throughput": 3702.05, "total_tokens": 23625752}
|
| 7239 |
+
{"current_steps": 36105, "total_steps": 38160, "loss": 0.4813, "lr": 4.4083263762370873e-07, "epoch": 18.92295597484277, "percentage": 94.61, "elapsed_time": "1:46:22", "remaining_time": "0:06:03", "throughput": 3702.06, "total_tokens": 23629144}
|
| 7240 |
+
{"current_steps": 36110, "total_steps": 38160, "loss": 0.3718, "lr": 4.386974025103574e-07, "epoch": 18.92557651991614, "percentage": 94.63, "elapsed_time": "1:46:23", "remaining_time": "0:06:02", "throughput": 3702.07, "total_tokens": 23632440}
|
| 7241 |
+
{"current_steps": 36115, "total_steps": 38160, "loss": 0.3823, "lr": 4.365673053413011e-07, "epoch": 18.92819706498952, "percentage": 94.64, "elapsed_time": "1:46:24", "remaining_time": "0:06:01", "throughput": 3702.1, "total_tokens": 23635448}
|
| 7242 |
+
{"current_steps": 36120, "total_steps": 38160, "loss": 0.5813, "lr": 4.34442346562125e-07, "epoch": 18.930817610062892, "percentage": 94.65, "elapsed_time": "1:46:25", "remaining_time": "0:06:00", "throughput": 3702.2, "total_tokens": 23639160}
|
| 7243 |
+
{"current_steps": 36125, "total_steps": 38160, "loss": 0.3631, "lr": 4.323225266173542e-07, "epoch": 18.93343815513627, "percentage": 94.67, "elapsed_time": "1:46:25", "remaining_time": "0:05:59", "throughput": 3702.31, "total_tokens": 23642904}
|
| 7244 |
+
{"current_steps": 36130, "total_steps": 38160, "loss": 0.2888, "lr": 4.302078459504283e-07, "epoch": 18.936058700209642, "percentage": 94.68, "elapsed_time": "1:46:26", "remaining_time": "0:05:58", "throughput": 3702.35, "total_tokens": 23646200}
|
| 7245 |
+
{"current_steps": 36135, "total_steps": 38160, "loss": 0.4155, "lr": 4.2809830500371294e-07, "epoch": 18.93867924528302, "percentage": 94.69, "elapsed_time": "1:46:27", "remaining_time": "0:05:57", "throughput": 3702.38, "total_tokens": 23649304}
|
| 7246 |
+
{"current_steps": 36140, "total_steps": 38160, "loss": 0.5367, "lr": 4.2599390421850236e-07, "epoch": 18.941299790356393, "percentage": 94.71, "elapsed_time": "1:46:28", "remaining_time": "0:05:57", "throughput": 3702.33, "total_tokens": 23652024}
|
| 7247 |
+
{"current_steps": 36145, "total_steps": 38160, "loss": 0.419, "lr": 4.2389464403501113e-07, "epoch": 18.94392033542977, "percentage": 94.72, "elapsed_time": "1:46:29", "remaining_time": "0:05:56", "throughput": 3702.37, "total_tokens": 23655608}
|
| 7248 |
+
{"current_steps": 36150, "total_steps": 38160, "loss": 0.435, "lr": 4.218005248923823e-07, "epoch": 18.946540880503143, "percentage": 94.73, "elapsed_time": "1:46:30", "remaining_time": "0:05:55", "throughput": 3702.4, "total_tokens": 23658616}
|
| 7249 |
+
{"current_steps": 36155, "total_steps": 38160, "loss": 0.3863, "lr": 4.1971154722867946e-07, "epoch": 18.94916142557652, "percentage": 94.75, "elapsed_time": "1:46:30", "remaining_time": "0:05:54", "throughput": 3702.44, "total_tokens": 23661976}
|
| 7250 |
+
{"current_steps": 36160, "total_steps": 38160, "loss": 0.4078, "lr": 4.176277114808946e-07, "epoch": 18.951781970649897, "percentage": 94.76, "elapsed_time": "1:46:31", "remaining_time": "0:05:53", "throughput": 3702.45, "total_tokens": 23665080}
|
| 7251 |
+
{"current_steps": 36165, "total_steps": 38160, "loss": 0.5342, "lr": 4.155490180849431e-07, "epoch": 18.95440251572327, "percentage": 94.77, "elapsed_time": "1:46:32", "remaining_time": "0:05:52", "throughput": 3702.38, "total_tokens": 23667736}
|
| 7252 |
+
{"current_steps": 36170, "total_steps": 38160, "loss": 0.3093, "lr": 4.1347546747566304e-07, "epoch": 18.957023060796647, "percentage": 94.79, "elapsed_time": "1:46:33", "remaining_time": "0:05:51", "throughput": 3702.58, "total_tokens": 23672824}
|
| 7253 |
+
{"current_steps": 36175, "total_steps": 38160, "loss": 0.4392, "lr": 4.114070600868131e-07, "epoch": 18.95964360587002, "percentage": 94.8, "elapsed_time": "1:46:34", "remaining_time": "0:05:50", "throughput": 3702.62, "total_tokens": 23676024}
|
| 7254 |
+
{"current_steps": 36180, "total_steps": 38160, "loss": 0.4801, "lr": 4.093437963510888e-07, "epoch": 18.962264150943398, "percentage": 94.81, "elapsed_time": "1:46:35", "remaining_time": "0:05:49", "throughput": 3702.64, "total_tokens": 23678936}
|
| 7255 |
+
{"current_steps": 36185, "total_steps": 38160, "loss": 0.5579, "lr": 4.072856767000949e-07, "epoch": 18.96488469601677, "percentage": 94.82, "elapsed_time": "1:46:36", "remaining_time": "0:05:49", "throughput": 3702.75, "total_tokens": 23683160}
|
| 7256 |
+
{"current_steps": 36190, "total_steps": 38160, "loss": 0.4122, "lr": 4.052327015643703e-07, "epoch": 18.967505241090148, "percentage": 94.84, "elapsed_time": "1:46:36", "remaining_time": "0:05:48", "throughput": 3702.75, "total_tokens": 23685912}
|
| 7257 |
+
{"current_steps": 36195, "total_steps": 38160, "loss": 0.4298, "lr": 4.031848713733744e-07, "epoch": 18.97012578616352, "percentage": 94.85, "elapsed_time": "1:46:37", "remaining_time": "0:05:47", "throughput": 3702.72, "total_tokens": 23688536}
|
| 7258 |
+
{"current_steps": 36200, "total_steps": 38160, "loss": 0.4092, "lr": 4.011421865554837e-07, "epoch": 18.9727463312369, "percentage": 94.86, "elapsed_time": "1:46:38", "remaining_time": "0:05:46", "throughput": 3702.75, "total_tokens": 23691608}
|
| 7259 |
+
{"current_steps": 36205, "total_steps": 38160, "loss": 0.4185, "lr": 3.9910464753801224e-07, "epoch": 18.97536687631027, "percentage": 94.88, "elapsed_time": "1:46:39", "remaining_time": "0:05:45", "throughput": 3702.79, "total_tokens": 23694936}
|
| 7260 |
+
{"current_steps": 36210, "total_steps": 38160, "loss": 0.4577, "lr": 3.9707225474718557e-07, "epoch": 18.97798742138365, "percentage": 94.89, "elapsed_time": "1:46:39", "remaining_time": "0:05:44", "throughput": 3702.85, "total_tokens": 23698168}
|
| 7261 |
+
{"current_steps": 36215, "total_steps": 38160, "loss": 0.4518, "lr": 3.950450086081553e-07, "epoch": 18.980607966457022, "percentage": 94.9, "elapsed_time": "1:46:40", "remaining_time": "0:05:43", "throughput": 3702.87, "total_tokens": 23701080}
|
| 7262 |
+
{"current_steps": 36220, "total_steps": 38160, "loss": 0.3948, "lr": 3.930229095450072e-07, "epoch": 18.9832285115304, "percentage": 94.92, "elapsed_time": "1:46:41", "remaining_time": "0:05:42", "throughput": 3702.86, "total_tokens": 23704088}
|
| 7263 |
+
{"current_steps": 36225, "total_steps": 38160, "loss": 0.5251, "lr": 3.9100595798073345e-07, "epoch": 18.985849056603772, "percentage": 94.93, "elapsed_time": "1:46:42", "remaining_time": "0:05:41", "throughput": 3702.81, "total_tokens": 23706520}
|
| 7264 |
+
{"current_steps": 36230, "total_steps": 38160, "loss": 0.4356, "lr": 3.8899415433725493e-07, "epoch": 18.98846960167715, "percentage": 94.94, "elapsed_time": "1:46:43", "remaining_time": "0:05:41", "throughput": 3702.85, "total_tokens": 23709656}
|
| 7265 |
+
{"current_steps": 36235, "total_steps": 38160, "loss": 0.4116, "lr": 3.8698749903542666e-07, "epoch": 18.991090146750523, "percentage": 94.96, "elapsed_time": "1:46:43", "remaining_time": "0:05:40", "throughput": 3702.82, "total_tokens": 23712280}
|
| 7266 |
+
{"current_steps": 36240, "total_steps": 38160, "loss": 0.4047, "lr": 3.8498599249501e-07, "epoch": 18.9937106918239, "percentage": 94.97, "elapsed_time": "1:46:44", "remaining_time": "0:05:39", "throughput": 3702.83, "total_tokens": 23715224}
|
| 7267 |
+
{"current_steps": 36245, "total_steps": 38160, "loss": 0.3177, "lr": 3.8298963513470345e-07, "epoch": 18.996331236897273, "percentage": 94.98, "elapsed_time": "1:46:45", "remaining_time": "0:05:38", "throughput": 3702.94, "total_tokens": 23719224}
|
| 7268 |
+
{"current_steps": 36250, "total_steps": 38160, "loss": 0.4458, "lr": 3.8099842737211734e-07, "epoch": 18.99895178197065, "percentage": 94.99, "elapsed_time": "1:46:46", "remaining_time": "0:05:37", "throughput": 3702.91, "total_tokens": 23721848}
|
| 7269 |
+
{"current_steps": 36252, "total_steps": 38160, "eval_loss": 0.4471572935581207, "epoch": 19.0, "percentage": 95.0, "elapsed_time": "1:46:56", "remaining_time": "0:05:37", "throughput": 3697.18, "total_tokens": 23722376}
|
| 7270 |
+
{"current_steps": 36255, "total_steps": 38160, "loss": 0.4043, "lr": 3.7901236962379627e-07, "epoch": 19.001572327044027, "percentage": 95.01, "elapsed_time": "1:46:59", "remaining_time": "0:05:37", "throughput": 3695.65, "total_tokens": 23723848}
|
| 7271 |
+
{"current_steps": 36260, "total_steps": 38160, "loss": 0.4779, "lr": 3.7703146230519126e-07, "epoch": 19.0041928721174, "percentage": 95.02, "elapsed_time": "1:47:00", "remaining_time": "0:05:36", "throughput": 3695.75, "total_tokens": 23727912}
|
| 7272 |
+
{"current_steps": 36265, "total_steps": 38160, "loss": 0.4018, "lr": 3.7505570583069306e-07, "epoch": 19.006813417190777, "percentage": 95.03, "elapsed_time": "1:47:01", "remaining_time": "0:05:35", "throughput": 3695.63, "total_tokens": 23730696}
|
| 7273 |
+
{"current_steps": 36270, "total_steps": 38160, "loss": 0.3709, "lr": 3.7308510061360435e-07, "epoch": 19.00943396226415, "percentage": 95.05, "elapsed_time": "1:47:02", "remaining_time": "0:05:34", "throughput": 3695.63, "total_tokens": 23733960}
|
| 7274 |
+
{"current_steps": 36275, "total_steps": 38160, "loss": 0.4815, "lr": 3.7111964706615374e-07, "epoch": 19.012054507337528, "percentage": 95.06, "elapsed_time": "1:47:02", "remaining_time": "0:05:33", "throughput": 3695.62, "total_tokens": 23736712}
|