Training in progress, step 36252
Browse files- adapter_model.safetensors +1 -1
- trainer_log.jsonl +387 -0
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 819328
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3d6c89997633d3e512adcada12449788f87c93bb5960178aae2aed58b5b7613d
|
| 3 |
size 819328
|
trainer_log.jsonl
CHANGED
|
@@ -6888,3 +6888,390 @@
|
|
| 6888 |
{"current_steps": 34350, "total_steps": 38160, "loss": 0.359, "lr": 0.0009022657091277742, "epoch": 18.00314465408805, "percentage": 90.02, "elapsed_time": "1:28:30", "remaining_time": "0:09:49", "throughput": 4218.63, "total_tokens": 22403888}
|
| 6889 |
{"current_steps": 34355, "total_steps": 38160, "loss": 0.417, "lr": 0.000899923679129354, "epoch": 18.005765199161427, "percentage": 90.03, "elapsed_time": "1:28:31", "remaining_time": "0:09:48", "throughput": 4218.76, "total_tokens": 22408208}
|
| 6890 |
{"current_steps": 34360, "total_steps": 38160, "loss": 0.3425, "lr": 0.0008975845987060348, "epoch": 18.0083857442348, "percentage": 90.04, "elapsed_time": "1:28:32", "remaining_time": "0:09:47", "throughput": 4218.82, "total_tokens": 22411856}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6888 |
{"current_steps": 34350, "total_steps": 38160, "loss": 0.359, "lr": 0.0009022657091277742, "epoch": 18.00314465408805, "percentage": 90.02, "elapsed_time": "1:28:30", "remaining_time": "0:09:49", "throughput": 4218.63, "total_tokens": 22403888}
|
| 6889 |
{"current_steps": 34355, "total_steps": 38160, "loss": 0.417, "lr": 0.000899923679129354, "epoch": 18.005765199161427, "percentage": 90.03, "elapsed_time": "1:28:31", "remaining_time": "0:09:48", "throughput": 4218.76, "total_tokens": 22408208}
|
| 6890 |
{"current_steps": 34360, "total_steps": 38160, "loss": 0.3425, "lr": 0.0008975845987060348, "epoch": 18.0083857442348, "percentage": 90.04, "elapsed_time": "1:28:32", "remaining_time": "0:09:47", "throughput": 4218.82, "total_tokens": 22411856}
|
| 6891 |
+
{"current_steps": 34365, "total_steps": 38160, "loss": 0.3321, "lr": 0.0008952484683471301, "epoch": 18.011006289308177, "percentage": 90.06, "elapsed_time": "1:28:33", "remaining_time": "0:09:46", "throughput": 4218.87, "total_tokens": 22415248}
|
| 6892 |
+
{"current_steps": 34370, "total_steps": 38160, "loss": 0.3714, "lr": 0.0008929152885413288, "epoch": 18.01362683438155, "percentage": 90.07, "elapsed_time": "1:28:33", "remaining_time": "0:09:45", "throughput": 4218.89, "total_tokens": 22418416}
|
| 6893 |
+
{"current_steps": 34375, "total_steps": 38160, "loss": 0.3373, "lr": 0.0008905850597767107, "epoch": 18.016247379454928, "percentage": 90.08, "elapsed_time": "1:28:34", "remaining_time": "0:09:45", "throughput": 4218.91, "total_tokens": 22421456}
|
| 6894 |
+
{"current_steps": 34380, "total_steps": 38160, "loss": 0.2199, "lr": 0.0008882577825407272, "epoch": 18.0188679245283, "percentage": 90.09, "elapsed_time": "1:28:35", "remaining_time": "0:09:44", "throughput": 4218.93, "total_tokens": 22424464}
|
| 6895 |
+
{"current_steps": 34385, "total_steps": 38160, "loss": 0.3068, "lr": 0.000885933457320221, "epoch": 18.021488469601678, "percentage": 90.11, "elapsed_time": "1:28:35", "remaining_time": "0:09:43", "throughput": 4218.92, "total_tokens": 22427152}
|
| 6896 |
+
{"current_steps": 34390, "total_steps": 38160, "loss": 0.3041, "lr": 0.0008836120846014111, "epoch": 18.02410901467505, "percentage": 90.12, "elapsed_time": "1:28:36", "remaining_time": "0:09:42", "throughput": 4219.06, "total_tokens": 22431696}
|
| 6897 |
+
{"current_steps": 34395, "total_steps": 38160, "loss": 0.4782, "lr": 0.000881293664869906, "epoch": 18.02672955974843, "percentage": 90.13, "elapsed_time": "1:28:37", "remaining_time": "0:09:42", "throughput": 4219.07, "total_tokens": 22434704}
|
| 6898 |
+
{"current_steps": 34400, "total_steps": 38160, "loss": 0.3723, "lr": 0.0008789781986106942, "epoch": 18.029350104821802, "percentage": 90.15, "elapsed_time": "1:28:38", "remaining_time": "0:09:41", "throughput": 4219.09, "total_tokens": 22437776}
|
| 6899 |
+
{"current_steps": 34405, "total_steps": 38160, "loss": 0.3124, "lr": 0.0008766656863081417, "epoch": 18.03197064989518, "percentage": 90.16, "elapsed_time": "1:28:38", "remaining_time": "0:09:40", "throughput": 4219.11, "total_tokens": 22440912}
|
| 6900 |
+
{"current_steps": 34410, "total_steps": 38160, "loss": 0.3828, "lr": 0.0008743561284459983, "epoch": 18.034591194968552, "percentage": 90.17, "elapsed_time": "1:28:39", "remaining_time": "0:09:39", "throughput": 4219.14, "total_tokens": 22444112}
|
| 6901 |
+
{"current_steps": 34415, "total_steps": 38160, "loss": 0.3471, "lr": 0.0008720495255073973, "epoch": 18.03721174004193, "percentage": 90.19, "elapsed_time": "1:28:40", "remaining_time": "0:09:38", "throughput": 4219.24, "total_tokens": 22448080}
|
| 6902 |
+
{"current_steps": 34420, "total_steps": 38160, "loss": 0.3514, "lr": 0.0008697458779748562, "epoch": 18.039832285115303, "percentage": 90.2, "elapsed_time": "1:28:41", "remaining_time": "0:09:38", "throughput": 4219.21, "total_tokens": 22450800}
|
| 6903 |
+
{"current_steps": 34425, "total_steps": 38160, "loss": 0.4211, "lr": 0.0008674451863302728, "epoch": 18.04245283018868, "percentage": 90.21, "elapsed_time": "1:28:41", "remaining_time": "0:09:37", "throughput": 4219.22, "total_tokens": 22453648}
|
| 6904 |
+
{"current_steps": 34430, "total_steps": 38160, "loss": 0.3153, "lr": 0.0008651474510549239, "epoch": 18.045073375262053, "percentage": 90.23, "elapsed_time": "1:28:42", "remaining_time": "0:09:36", "throughput": 4219.36, "total_tokens": 22458480}
|
| 6905 |
+
{"current_steps": 34435, "total_steps": 38160, "loss": 0.3671, "lr": 0.0008628526726294699, "epoch": 18.04769392033543, "percentage": 90.24, "elapsed_time": "1:28:43", "remaining_time": "0:09:35", "throughput": 4219.41, "total_tokens": 22461840}
|
| 6906 |
+
{"current_steps": 34440, "total_steps": 38160, "loss": 0.267, "lr": 0.0008605608515339485, "epoch": 18.050314465408807, "percentage": 90.25, "elapsed_time": "1:28:44", "remaining_time": "0:09:35", "throughput": 4219.45, "total_tokens": 22465104}
|
| 6907 |
+
{"current_steps": 34445, "total_steps": 38160, "loss": 0.3475, "lr": 0.0008582719882477879, "epoch": 18.05293501048218, "percentage": 90.26, "elapsed_time": "1:28:44", "remaining_time": "0:09:34", "throughput": 4219.53, "total_tokens": 22468976}
|
| 6908 |
+
{"current_steps": 34450, "total_steps": 38160, "loss": 0.2974, "lr": 0.0008559860832497883, "epoch": 18.055555555555557, "percentage": 90.28, "elapsed_time": "1:28:45", "remaining_time": "0:09:33", "throughput": 4219.53, "total_tokens": 22471888}
|
| 6909 |
+
{"current_steps": 34455, "total_steps": 38160, "loss": 0.3787, "lr": 0.0008537031370181392, "epoch": 18.05817610062893, "percentage": 90.29, "elapsed_time": "1:28:46", "remaining_time": "0:09:32", "throughput": 4219.52, "total_tokens": 22474640}
|
| 6910 |
+
{"current_steps": 34460, "total_steps": 38160, "loss": 0.4198, "lr": 0.0008514231500304048, "epoch": 18.060796645702307, "percentage": 90.3, "elapsed_time": "1:28:46", "remaining_time": "0:09:31", "throughput": 4219.48, "total_tokens": 22477168}
|
| 6911 |
+
{"current_steps": 34465, "total_steps": 38160, "loss": 0.4019, "lr": 0.0008491461227635288, "epoch": 18.06341719077568, "percentage": 90.32, "elapsed_time": "1:28:47", "remaining_time": "0:09:31", "throughput": 4219.5, "total_tokens": 22480240}
|
| 6912 |
+
{"current_steps": 34470, "total_steps": 38160, "loss": 0.3759, "lr": 0.0008468720556938436, "epoch": 18.066037735849058, "percentage": 90.33, "elapsed_time": "1:28:48", "remaining_time": "0:09:30", "throughput": 4219.52, "total_tokens": 22483344}
|
| 6913 |
+
{"current_steps": 34475, "total_steps": 38160, "loss": 0.2726, "lr": 0.0008446009492970568, "epoch": 18.06865828092243, "percentage": 90.34, "elapsed_time": "1:28:49", "remaining_time": "0:09:29", "throughput": 4219.49, "total_tokens": 22485840}
|
| 6914 |
+
{"current_steps": 34480, "total_steps": 38160, "loss": 0.3376, "lr": 0.0008423328040482585, "epoch": 18.071278825995808, "percentage": 90.36, "elapsed_time": "1:28:49", "remaining_time": "0:09:28", "throughput": 4219.55, "total_tokens": 22489392}
|
| 6915 |
+
{"current_steps": 34485, "total_steps": 38160, "loss": 0.2905, "lr": 0.0008400676204219176, "epoch": 18.07389937106918, "percentage": 90.37, "elapsed_time": "1:28:50", "remaining_time": "0:09:28", "throughput": 4219.5, "total_tokens": 22491696}
|
| 6916 |
+
{"current_steps": 34490, "total_steps": 38160, "loss": 0.3216, "lr": 0.0008378053988918882, "epoch": 18.07651991614256, "percentage": 90.38, "elapsed_time": "1:28:51", "remaining_time": "0:09:27", "throughput": 4219.49, "total_tokens": 22494448}
|
| 6917 |
+
{"current_steps": 34495, "total_steps": 38160, "loss": 0.3229, "lr": 0.0008355461399313951, "epoch": 18.079140461215932, "percentage": 90.4, "elapsed_time": "1:28:51", "remaining_time": "0:09:26", "throughput": 4219.48, "total_tokens": 22497264}
|
| 6918 |
+
{"current_steps": 34500, "total_steps": 38160, "loss": 0.2848, "lr": 0.0008332898440130536, "epoch": 18.08176100628931, "percentage": 90.41, "elapsed_time": "1:28:52", "remaining_time": "0:09:25", "throughput": 4219.48, "total_tokens": 22500080}
|
| 6919 |
+
{"current_steps": 34505, "total_steps": 38160, "loss": 0.3647, "lr": 0.0008310365116088547, "epoch": 18.084381551362682, "percentage": 90.42, "elapsed_time": "1:28:53", "remaining_time": "0:09:24", "throughput": 4219.49, "total_tokens": 22503152}
|
| 6920 |
+
{"current_steps": 34510, "total_steps": 38160, "loss": 0.4054, "lr": 0.0008287861431901711, "epoch": 18.08700209643606, "percentage": 90.44, "elapsed_time": "1:28:53", "remaining_time": "0:09:24", "throughput": 4219.52, "total_tokens": 22506288}
|
| 6921 |
+
{"current_steps": 34515, "total_steps": 38160, "loss": 0.3749, "lr": 0.0008265387392277528, "epoch": 18.089622641509433, "percentage": 90.45, "elapsed_time": "1:28:54", "remaining_time": "0:09:23", "throughput": 4219.45, "total_tokens": 22508496}
|
| 6922 |
+
{"current_steps": 34520, "total_steps": 38160, "loss": 0.3949, "lr": 0.000824294300191729, "epoch": 18.09224318658281, "percentage": 90.46, "elapsed_time": "1:28:55", "remaining_time": "0:09:22", "throughput": 4219.56, "total_tokens": 22512496}
|
| 6923 |
+
{"current_steps": 34525, "total_steps": 38160, "loss": 0.344, "lr": 0.0008220528265516125, "epoch": 18.094863731656183, "percentage": 90.47, "elapsed_time": "1:28:56", "remaining_time": "0:09:21", "throughput": 4219.6, "total_tokens": 22515888}
|
| 6924 |
+
{"current_steps": 34530, "total_steps": 38160, "loss": 0.3039, "lr": 0.0008198143187762929, "epoch": 18.09748427672956, "percentage": 90.49, "elapsed_time": "1:28:56", "remaining_time": "0:09:21", "throughput": 4219.62, "total_tokens": 22518928}
|
| 6925 |
+
{"current_steps": 34535, "total_steps": 38160, "loss": 0.3293, "lr": 0.0008175787773340443, "epoch": 18.100104821802937, "percentage": 90.5, "elapsed_time": "1:28:57", "remaining_time": "0:09:20", "throughput": 4219.73, "total_tokens": 22523120}
|
| 6926 |
+
{"current_steps": 34540, "total_steps": 38160, "loss": 0.2634, "lr": 0.0008153462026925123, "epoch": 18.10272536687631, "percentage": 90.51, "elapsed_time": "1:28:58", "remaining_time": "0:09:19", "throughput": 4219.67, "total_tokens": 22525392}
|
| 6927 |
+
{"current_steps": 34545, "total_steps": 38160, "loss": 0.307, "lr": 0.0008131165953187302, "epoch": 18.105345911949687, "percentage": 90.53, "elapsed_time": "1:28:58", "remaining_time": "0:09:18", "throughput": 4219.65, "total_tokens": 22527920}
|
| 6928 |
+
{"current_steps": 34550, "total_steps": 38160, "loss": 0.3841, "lr": 0.0008108899556791016, "epoch": 18.10796645702306, "percentage": 90.54, "elapsed_time": "1:28:59", "remaining_time": "0:09:17", "throughput": 4219.69, "total_tokens": 22531472}
|
| 6929 |
+
{"current_steps": 34555, "total_steps": 38160, "loss": 0.4028, "lr": 0.0008086662842394154, "epoch": 18.110587002096437, "percentage": 90.55, "elapsed_time": "1:29:00", "remaining_time": "0:09:17", "throughput": 4219.72, "total_tokens": 22534672}
|
| 6930 |
+
{"current_steps": 34560, "total_steps": 38160, "loss": 0.3838, "lr": 0.0008064455814648414, "epoch": 18.11320754716981, "percentage": 90.57, "elapsed_time": "1:29:00", "remaining_time": "0:09:16", "throughput": 4219.71, "total_tokens": 22537424}
|
| 6931 |
+
{"current_steps": 34565, "total_steps": 38160, "loss": 0.4421, "lr": 0.0008042278478199211, "epoch": 18.115828092243188, "percentage": 90.58, "elapsed_time": "1:29:01", "remaining_time": "0:09:15", "throughput": 4219.76, "total_tokens": 22540944}
|
| 6932 |
+
{"current_steps": 34570, "total_steps": 38160, "loss": 0.3126, "lr": 0.0008020130837685818, "epoch": 18.11844863731656, "percentage": 90.59, "elapsed_time": "1:29:02", "remaining_time": "0:09:14", "throughput": 4219.82, "total_tokens": 22544368}
|
| 6933 |
+
{"current_steps": 34575, "total_steps": 38160, "loss": 0.3698, "lr": 0.000799801289774128, "epoch": 18.121069182389938, "percentage": 90.61, "elapsed_time": "1:29:03", "remaining_time": "0:09:14", "throughput": 4219.84, "total_tokens": 22547568}
|
| 6934 |
+
{"current_steps": 34580, "total_steps": 38160, "loss": 0.3381, "lr": 0.0007975924662992378, "epoch": 18.12368972746331, "percentage": 90.62, "elapsed_time": "1:29:04", "remaining_time": "0:09:13", "throughput": 4219.94, "total_tokens": 22551504}
|
| 6935 |
+
{"current_steps": 34585, "total_steps": 38160, "loss": 0.3209, "lr": 0.0007953866138059734, "epoch": 18.12631027253669, "percentage": 90.63, "elapsed_time": "1:29:04", "remaining_time": "0:09:12", "throughput": 4219.93, "total_tokens": 22554416}
|
| 6936 |
+
{"current_steps": 34590, "total_steps": 38160, "loss": 0.2312, "lr": 0.0007931837327557772, "epoch": 18.128930817610062, "percentage": 90.64, "elapsed_time": "1:29:05", "remaining_time": "0:09:11", "throughput": 4219.93, "total_tokens": 22557232}
|
| 6937 |
+
{"current_steps": 34595, "total_steps": 38160, "loss": 0.3184, "lr": 0.0007909838236094624, "epoch": 18.13155136268344, "percentage": 90.66, "elapsed_time": "1:29:06", "remaining_time": "0:09:10", "throughput": 4220.02, "total_tokens": 22560976}
|
| 6938 |
+
{"current_steps": 34600, "total_steps": 38160, "loss": 0.4884, "lr": 0.000788786886827229, "epoch": 18.134171907756812, "percentage": 90.67, "elapsed_time": "1:29:06", "remaining_time": "0:09:10", "throughput": 4220.05, "total_tokens": 22564272}
|
| 6939 |
+
{"current_steps": 34605, "total_steps": 38160, "loss": 0.6176, "lr": 0.0007865929228686463, "epoch": 18.13679245283019, "percentage": 90.68, "elapsed_time": "1:29:07", "remaining_time": "0:09:09", "throughput": 4220.06, "total_tokens": 22567280}
|
| 6940 |
+
{"current_steps": 34610, "total_steps": 38160, "loss": 0.3573, "lr": 0.0007844019321926688, "epoch": 18.139412997903563, "percentage": 90.7, "elapsed_time": "1:29:08", "remaining_time": "0:09:08", "throughput": 4220.14, "total_tokens": 22571120}
|
| 6941 |
+
{"current_steps": 34615, "total_steps": 38160, "loss": 0.3094, "lr": 0.0007822139152576296, "epoch": 18.14203354297694, "percentage": 90.71, "elapsed_time": "1:29:09", "remaining_time": "0:09:07", "throughput": 4220.19, "total_tokens": 22574672}
|
| 6942 |
+
{"current_steps": 34620, "total_steps": 38160, "loss": 0.4041, "lr": 0.0007800288725212311, "epoch": 18.144654088050313, "percentage": 90.72, "elapsed_time": "1:29:09", "remaining_time": "0:09:07", "throughput": 4220.2, "total_tokens": 22577616}
|
| 6943 |
+
{"current_steps": 34625, "total_steps": 38160, "loss": 0.4002, "lr": 0.0007778468044405645, "epoch": 18.14727463312369, "percentage": 90.74, "elapsed_time": "1:29:10", "remaining_time": "0:09:06", "throughput": 4220.15, "total_tokens": 22579888}
|
| 6944 |
+
{"current_steps": 34630, "total_steps": 38160, "loss": 0.3913, "lr": 0.000775667711472091, "epoch": 18.149895178197063, "percentage": 90.75, "elapsed_time": "1:29:11", "remaining_time": "0:09:05", "throughput": 4220.16, "total_tokens": 22582896}
|
| 6945 |
+
{"current_steps": 34635, "total_steps": 38160, "loss": 0.2865, "lr": 0.0007734915940716513, "epoch": 18.15251572327044, "percentage": 90.76, "elapsed_time": "1:29:11", "remaining_time": "0:09:04", "throughput": 4220.14, "total_tokens": 22585616}
|
| 6946 |
+
{"current_steps": 34640, "total_steps": 38160, "loss": 0.4406, "lr": 0.0007713184526944694, "epoch": 18.155136268343817, "percentage": 90.78, "elapsed_time": "1:29:12", "remaining_time": "0:09:03", "throughput": 4220.13, "total_tokens": 22588496}
|
| 6947 |
+
{"current_steps": 34645, "total_steps": 38160, "loss": 0.3136, "lr": 0.0007691482877951333, "epoch": 18.15775681341719, "percentage": 90.79, "elapsed_time": "1:29:13", "remaining_time": "0:09:03", "throughput": 4220.09, "total_tokens": 22590896}
|
| 6948 |
+
{"current_steps": 34650, "total_steps": 38160, "loss": 0.6035, "lr": 0.0007669810998276233, "epoch": 18.160377358490567, "percentage": 90.8, "elapsed_time": "1:29:13", "remaining_time": "0:09:02", "throughput": 4220.04, "total_tokens": 22593104}
|
| 6949 |
+
{"current_steps": 34655, "total_steps": 38160, "loss": 0.358, "lr": 0.0007648168892452866, "epoch": 18.16299790356394, "percentage": 90.81, "elapsed_time": "1:29:14", "remaining_time": "0:09:01", "throughput": 4220.1, "total_tokens": 22596656}
|
| 6950 |
+
{"current_steps": 34660, "total_steps": 38160, "loss": 0.3327, "lr": 0.0007626556565008563, "epoch": 18.165618448637318, "percentage": 90.83, "elapsed_time": "1:29:15", "remaining_time": "0:09:00", "throughput": 4220.09, "total_tokens": 22599312}
|
| 6951 |
+
{"current_steps": 34665, "total_steps": 38160, "loss": 0.2805, "lr": 0.0007604974020464322, "epoch": 18.16823899371069, "percentage": 90.84, "elapsed_time": "1:29:16", "remaining_time": "0:09:00", "throughput": 4220.2, "total_tokens": 22603440}
|
| 6952 |
+
{"current_steps": 34670, "total_steps": 38160, "loss": 0.3727, "lr": 0.0007583421263334999, "epoch": 18.170859538784068, "percentage": 90.85, "elapsed_time": "1:29:16", "remaining_time": "0:08:59", "throughput": 4220.22, "total_tokens": 22606512}
|
| 6953 |
+
{"current_steps": 34675, "total_steps": 38160, "loss": 0.4269, "lr": 0.0007561898298129154, "epoch": 18.17348008385744, "percentage": 90.87, "elapsed_time": "1:29:17", "remaining_time": "0:08:58", "throughput": 4220.24, "total_tokens": 22609584}
|
| 6954 |
+
{"current_steps": 34680, "total_steps": 38160, "loss": 0.3922, "lr": 0.0007540405129349187, "epoch": 18.17610062893082, "percentage": 90.88, "elapsed_time": "1:29:18", "remaining_time": "0:08:57", "throughput": 4220.34, "total_tokens": 22613712}
|
| 6955 |
+
{"current_steps": 34685, "total_steps": 38160, "loss": 0.3266, "lr": 0.0007518941761491182, "epoch": 18.178721174004192, "percentage": 90.89, "elapsed_time": "1:29:18", "remaining_time": "0:08:56", "throughput": 4220.38, "total_tokens": 22617008}
|
| 6956 |
+
{"current_steps": 34690, "total_steps": 38160, "loss": 0.2492, "lr": 0.0007497508199045066, "epoch": 18.18134171907757, "percentage": 90.91, "elapsed_time": "1:29:19", "remaining_time": "0:08:56", "throughput": 4220.4, "total_tokens": 22620112}
|
| 6957 |
+
{"current_steps": 34695, "total_steps": 38160, "loss": 0.2676, "lr": 0.0007476104446494502, "epoch": 18.183962264150942, "percentage": 90.92, "elapsed_time": "1:29:20", "remaining_time": "0:08:55", "throughput": 4220.36, "total_tokens": 22622608}
|
| 6958 |
+
{"current_steps": 34700, "total_steps": 38160, "loss": 0.4439, "lr": 0.0007454730508316842, "epoch": 18.18658280922432, "percentage": 90.93, "elapsed_time": "1:29:21", "remaining_time": "0:08:54", "throughput": 4220.44, "total_tokens": 22626416}
|
| 6959 |
+
{"current_steps": 34705, "total_steps": 38160, "loss": 0.2636, "lr": 0.0007433386388983343, "epoch": 18.189203354297693, "percentage": 90.95, "elapsed_time": "1:29:21", "remaining_time": "0:08:53", "throughput": 4220.46, "total_tokens": 22629328}
|
| 6960 |
+
{"current_steps": 34710, "total_steps": 38160, "loss": 0.4345, "lr": 0.0007412072092958915, "epoch": 18.19182389937107, "percentage": 90.96, "elapsed_time": "1:29:22", "remaining_time": "0:08:53", "throughput": 4220.45, "total_tokens": 22632144}
|
| 6961 |
+
{"current_steps": 34715, "total_steps": 38160, "loss": 0.3324, "lr": 0.0007390787624702294, "epoch": 18.194444444444443, "percentage": 90.97, "elapsed_time": "1:29:23", "remaining_time": "0:08:52", "throughput": 4220.44, "total_tokens": 22634992}
|
| 6962 |
+
{"current_steps": 34720, "total_steps": 38160, "loss": 0.3424, "lr": 0.0007369532988665933, "epoch": 18.19706498951782, "percentage": 90.99, "elapsed_time": "1:29:23", "remaining_time": "0:08:51", "throughput": 4220.46, "total_tokens": 22638256}
|
| 6963 |
+
{"current_steps": 34725, "total_steps": 38160, "loss": 0.3583, "lr": 0.0007348308189296026, "epoch": 18.199685534591197, "percentage": 91.0, "elapsed_time": "1:29:24", "remaining_time": "0:08:50", "throughput": 4220.52, "total_tokens": 22641712}
|
| 6964 |
+
{"current_steps": 34730, "total_steps": 38160, "loss": 0.3613, "lr": 0.0007327113231032605, "epoch": 18.20230607966457, "percentage": 91.01, "elapsed_time": "1:29:25", "remaining_time": "0:08:49", "throughput": 4220.64, "total_tokens": 22645904}
|
| 6965 |
+
{"current_steps": 34735, "total_steps": 38160, "loss": 0.3808, "lr": 0.000730594811830939, "epoch": 18.204926624737947, "percentage": 91.02, "elapsed_time": "1:29:26", "remaining_time": "0:08:49", "throughput": 4220.65, "total_tokens": 22648944}
|
| 6966 |
+
{"current_steps": 34740, "total_steps": 38160, "loss": 0.399, "lr": 0.000728481285555389, "epoch": 18.20754716981132, "percentage": 91.04, "elapsed_time": "1:29:26", "remaining_time": "0:08:48", "throughput": 4220.65, "total_tokens": 22651888}
|
| 6967 |
+
{"current_steps": 34745, "total_steps": 38160, "loss": 0.4015, "lr": 0.0007263707447187383, "epoch": 18.210167714884697, "percentage": 91.05, "elapsed_time": "1:29:27", "remaining_time": "0:08:47", "throughput": 4220.68, "total_tokens": 22655120}
|
| 6968 |
+
{"current_steps": 34750, "total_steps": 38160, "loss": 0.6435, "lr": 0.0007242631897624857, "epoch": 18.21278825995807, "percentage": 91.06, "elapsed_time": "1:29:28", "remaining_time": "0:08:46", "throughput": 4220.75, "total_tokens": 22658928}
|
| 6969 |
+
{"current_steps": 34755, "total_steps": 38160, "loss": 0.3831, "lr": 0.0007221586211275049, "epoch": 18.215408805031448, "percentage": 91.08, "elapsed_time": "1:29:29", "remaining_time": "0:08:46", "throughput": 4220.8, "total_tokens": 22662288}
|
| 6970 |
+
{"current_steps": 34760, "total_steps": 38160, "loss": 0.2808, "lr": 0.0007200570392540506, "epoch": 18.21802935010482, "percentage": 91.09, "elapsed_time": "1:29:29", "remaining_time": "0:08:45", "throughput": 4220.77, "total_tokens": 22664848}
|
| 6971 |
+
{"current_steps": 34765, "total_steps": 38160, "loss": 0.2504, "lr": 0.0007179584445817493, "epoch": 18.220649895178198, "percentage": 91.1, "elapsed_time": "1:29:30", "remaining_time": "0:08:44", "throughput": 4220.74, "total_tokens": 22667376}
|
| 6972 |
+
{"current_steps": 34770, "total_steps": 38160, "loss": 0.4334, "lr": 0.0007158628375496051, "epoch": 18.22327044025157, "percentage": 91.12, "elapsed_time": "1:29:31", "remaining_time": "0:08:43", "throughput": 4220.84, "total_tokens": 22671440}
|
| 6973 |
+
{"current_steps": 34775, "total_steps": 38160, "loss": 0.3297, "lr": 0.0007137702185959937, "epoch": 18.22589098532495, "percentage": 91.13, "elapsed_time": "1:29:32", "remaining_time": "0:08:42", "throughput": 4220.93, "total_tokens": 22675408}
|
| 6974 |
+
{"current_steps": 34780, "total_steps": 38160, "loss": 0.2947, "lr": 0.0007116805881586651, "epoch": 18.228511530398322, "percentage": 91.14, "elapsed_time": "1:29:32", "remaining_time": "0:08:42", "throughput": 4220.92, "total_tokens": 22678224}
|
| 6975 |
+
{"current_steps": 34785, "total_steps": 38160, "loss": 0.2669, "lr": 0.0007095939466747464, "epoch": 18.2311320754717, "percentage": 91.16, "elapsed_time": "1:29:33", "remaining_time": "0:08:41", "throughput": 4220.98, "total_tokens": 22681712}
|
| 6976 |
+
{"current_steps": 34790, "total_steps": 38160, "loss": 0.2293, "lr": 0.0007075102945807415, "epoch": 18.233752620545072, "percentage": 91.17, "elapsed_time": "1:29:34", "remaining_time": "0:08:40", "throughput": 4221.02, "total_tokens": 22685008}
|
| 6977 |
+
{"current_steps": 34795, "total_steps": 38160, "loss": 0.3805, "lr": 0.0007054296323125269, "epoch": 18.23637316561845, "percentage": 91.18, "elapsed_time": "1:29:35", "remaining_time": "0:08:39", "throughput": 4221.06, "total_tokens": 22688368}
|
| 6978 |
+
{"current_steps": 34800, "total_steps": 38160, "loss": 0.365, "lr": 0.0007033519603053529, "epoch": 18.238993710691823, "percentage": 91.19, "elapsed_time": "1:29:35", "remaining_time": "0:08:39", "throughput": 4221.06, "total_tokens": 22691184}
|
| 6979 |
+
{"current_steps": 34805, "total_steps": 38160, "loss": 0.3789, "lr": 0.0007012772789938415, "epoch": 18.2416142557652, "percentage": 91.21, "elapsed_time": "1:29:36", "remaining_time": "0:08:38", "throughput": 4221.14, "total_tokens": 22695024}
|
| 6980 |
+
{"current_steps": 34810, "total_steps": 38160, "loss": 0.3339, "lr": 0.0006992055888119957, "epoch": 18.244234800838573, "percentage": 91.22, "elapsed_time": "1:29:37", "remaining_time": "0:08:37", "throughput": 4221.25, "total_tokens": 22699088}
|
| 6981 |
+
{"current_steps": 34815, "total_steps": 38160, "loss": 0.3018, "lr": 0.0006971368901931873, "epoch": 18.24685534591195, "percentage": 91.23, "elapsed_time": "1:29:38", "remaining_time": "0:08:36", "throughput": 4221.29, "total_tokens": 22702480}
|
| 6982 |
+
{"current_steps": 34820, "total_steps": 38160, "loss": 0.227, "lr": 0.0006950711835701667, "epoch": 18.249475890985323, "percentage": 91.25, "elapsed_time": "1:29:38", "remaining_time": "0:08:35", "throughput": 4221.33, "total_tokens": 22705840}
|
| 6983 |
+
{"current_steps": 34825, "total_steps": 38160, "loss": 0.3456, "lr": 0.0006930084693750566, "epoch": 18.2520964360587, "percentage": 91.26, "elapsed_time": "1:29:39", "remaining_time": "0:08:35", "throughput": 4221.33, "total_tokens": 22708752}
|
| 6984 |
+
{"current_steps": 34830, "total_steps": 38160, "loss": 0.4201, "lr": 0.0006909487480393534, "epoch": 18.254716981132077, "percentage": 91.27, "elapsed_time": "1:29:40", "remaining_time": "0:08:34", "throughput": 4221.31, "total_tokens": 22711504}
|
| 6985 |
+
{"current_steps": 34835, "total_steps": 38160, "loss": 0.4985, "lr": 0.0006888920199939224, "epoch": 18.25733752620545, "percentage": 91.29, "elapsed_time": "1:29:40", "remaining_time": "0:08:33", "throughput": 4221.32, "total_tokens": 22714384}
|
| 6986 |
+
{"current_steps": 34840, "total_steps": 38160, "loss": 0.377, "lr": 0.0006868382856690114, "epoch": 18.259958071278827, "percentage": 91.3, "elapsed_time": "1:29:41", "remaining_time": "0:08:32", "throughput": 4221.4, "total_tokens": 22718192}
|
| 6987 |
+
{"current_steps": 34845, "total_steps": 38160, "loss": 0.3976, "lr": 0.0006847875454942382, "epoch": 18.2625786163522, "percentage": 91.31, "elapsed_time": "1:29:42", "remaining_time": "0:08:32", "throughput": 4221.45, "total_tokens": 22721616}
|
| 6988 |
+
{"current_steps": 34850, "total_steps": 38160, "loss": 0.4157, "lr": 0.0006827397998985945, "epoch": 18.265199161425578, "percentage": 91.33, "elapsed_time": "1:29:43", "remaining_time": "0:08:31", "throughput": 4221.45, "total_tokens": 22724624}
|
| 6989 |
+
{"current_steps": 34855, "total_steps": 38160, "loss": 0.4099, "lr": 0.0006806950493104447, "epoch": 18.26781970649895, "percentage": 91.34, "elapsed_time": "1:29:43", "remaining_time": "0:08:30", "throughput": 4221.45, "total_tokens": 22727600}
|
| 6990 |
+
{"current_steps": 34860, "total_steps": 38160, "loss": 0.2462, "lr": 0.000678653294157528, "epoch": 18.270440251572328, "percentage": 91.35, "elapsed_time": "1:29:44", "remaining_time": "0:08:29", "throughput": 4221.47, "total_tokens": 22730608}
|
| 6991 |
+
{"current_steps": 34865, "total_steps": 38160, "loss": 0.3461, "lr": 0.0006766145348669544, "epoch": 18.2730607966457, "percentage": 91.37, "elapsed_time": "1:29:45", "remaining_time": "0:08:28", "throughput": 4221.47, "total_tokens": 22733424}
|
| 6992 |
+
{"current_steps": 34870, "total_steps": 38160, "loss": 0.3419, "lr": 0.0006745787718652097, "epoch": 18.27568134171908, "percentage": 91.38, "elapsed_time": "1:29:45", "remaining_time": "0:08:28", "throughput": 4221.53, "total_tokens": 22737008}
|
| 6993 |
+
{"current_steps": 34875, "total_steps": 38160, "loss": 0.335, "lr": 0.0006725460055781546, "epoch": 18.278301886792452, "percentage": 91.39, "elapsed_time": "1:29:46", "remaining_time": "0:08:27", "throughput": 4221.56, "total_tokens": 22740272}
|
| 6994 |
+
{"current_steps": 34880, "total_steps": 38160, "loss": 0.3231, "lr": 0.0006705162364310174, "epoch": 18.28092243186583, "percentage": 91.4, "elapsed_time": "1:29:47", "remaining_time": "0:08:26", "throughput": 4221.66, "total_tokens": 22744464}
|
| 6995 |
+
{"current_steps": 34885, "total_steps": 38160, "loss": 0.3462, "lr": 0.0006684894648484068, "epoch": 18.283542976939202, "percentage": 91.42, "elapsed_time": "1:29:48", "remaining_time": "0:08:25", "throughput": 4221.65, "total_tokens": 22747152}
|
| 6996 |
+
{"current_steps": 34890, "total_steps": 38160, "loss": 0.3457, "lr": 0.0006664656912542954, "epoch": 18.28616352201258, "percentage": 91.43, "elapsed_time": "1:29:49", "remaining_time": "0:08:25", "throughput": 4221.77, "total_tokens": 22751408}
|
| 6997 |
+
{"current_steps": 34895, "total_steps": 38160, "loss": 0.3733, "lr": 0.0006644449160720345, "epoch": 18.288784067085953, "percentage": 91.44, "elapsed_time": "1:29:49", "remaining_time": "0:08:24", "throughput": 4221.88, "total_tokens": 22755568}
|
| 6998 |
+
{"current_steps": 34900, "total_steps": 38160, "loss": 0.3273, "lr": 0.0006624271397243492, "epoch": 18.29140461215933, "percentage": 91.46, "elapsed_time": "1:29:50", "remaining_time": "0:08:23", "throughput": 4221.9, "total_tokens": 22758576}
|
| 6999 |
+
{"current_steps": 34905, "total_steps": 38160, "loss": 0.3862, "lr": 0.0006604123626333369, "epoch": 18.294025157232703, "percentage": 91.47, "elapsed_time": "1:29:51", "remaining_time": "0:08:22", "throughput": 4221.96, "total_tokens": 22762128}
|
| 7000 |
+
{"current_steps": 34910, "total_steps": 38160, "loss": 0.353, "lr": 0.0006584005852204621, "epoch": 18.29664570230608, "percentage": 91.48, "elapsed_time": "1:29:52", "remaining_time": "0:08:21", "throughput": 4221.94, "total_tokens": 22764784}
|
| 7001 |
+
{"current_steps": 34915, "total_steps": 38160, "loss": 0.3442, "lr": 0.0006563918079065683, "epoch": 18.299266247379457, "percentage": 91.5, "elapsed_time": "1:29:52", "remaining_time": "0:08:21", "throughput": 4222.0, "total_tokens": 22768304}
|
| 7002 |
+
{"current_steps": 34920, "total_steps": 38160, "loss": 0.3436, "lr": 0.000654386031111866, "epoch": 18.30188679245283, "percentage": 91.51, "elapsed_time": "1:29:53", "remaining_time": "0:08:20", "throughput": 4222.04, "total_tokens": 22771632}
|
| 7003 |
+
{"current_steps": 34925, "total_steps": 38160, "loss": 0.2784, "lr": 0.0006523832552559428, "epoch": 18.304507337526207, "percentage": 91.52, "elapsed_time": "1:29:54", "remaining_time": "0:08:19", "throughput": 4222.07, "total_tokens": 22774928}
|
| 7004 |
+
{"current_steps": 34930, "total_steps": 38160, "loss": 0.2817, "lr": 0.0006503834807577585, "epoch": 18.30712788259958, "percentage": 91.54, "elapsed_time": "1:29:55", "remaining_time": "0:08:18", "throughput": 4222.14, "total_tokens": 22778640}
|
| 7005 |
+
{"current_steps": 34935, "total_steps": 38160, "loss": 0.2933, "lr": 0.0006483867080356386, "epoch": 18.309748427672957, "percentage": 91.55, "elapsed_time": "1:29:55", "remaining_time": "0:08:18", "throughput": 4222.18, "total_tokens": 22781936}
|
| 7006 |
+
{"current_steps": 34940, "total_steps": 38160, "loss": 0.3042, "lr": 0.0006463929375072874, "epoch": 18.31236897274633, "percentage": 91.56, "elapsed_time": "1:29:56", "remaining_time": "0:08:17", "throughput": 4222.15, "total_tokens": 22784528}
|
| 7007 |
+
{"current_steps": 34945, "total_steps": 38160, "loss": 0.273, "lr": 0.0006444021695897827, "epoch": 18.314989517819708, "percentage": 91.57, "elapsed_time": "1:29:57", "remaining_time": "0:08:16", "throughput": 4222.39, "total_tokens": 22790704}
|
| 7008 |
+
{"current_steps": 34950, "total_steps": 38160, "loss": 0.3799, "lr": 0.0006424144046995634, "epoch": 18.31761006289308, "percentage": 91.59, "elapsed_time": "1:29:58", "remaining_time": "0:08:15", "throughput": 4222.45, "total_tokens": 22794320}
|
| 7009 |
+
{"current_steps": 34955, "total_steps": 38160, "loss": 0.3949, "lr": 0.0006404296432524514, "epoch": 18.320230607966458, "percentage": 91.6, "elapsed_time": "1:29:59", "remaining_time": "0:08:15", "throughput": 4222.57, "total_tokens": 22798544}
|
| 7010 |
+
{"current_steps": 34960, "total_steps": 38160, "loss": 0.2873, "lr": 0.000638447885663635, "epoch": 18.32285115303983, "percentage": 91.61, "elapsed_time": "1:29:59", "remaining_time": "0:08:14", "throughput": 4222.57, "total_tokens": 22801392}
|
| 7011 |
+
{"current_steps": 34965, "total_steps": 38160, "loss": 0.3363, "lr": 0.0006364691323476756, "epoch": 18.32547169811321, "percentage": 91.63, "elapsed_time": "1:30:00", "remaining_time": "0:08:13", "throughput": 4222.62, "total_tokens": 22804880}
|
| 7012 |
+
{"current_steps": 34970, "total_steps": 38160, "loss": 0.5206, "lr": 0.0006344933837185074, "epoch": 18.328092243186582, "percentage": 91.64, "elapsed_time": "1:30:01", "remaining_time": "0:08:12", "throughput": 4222.65, "total_tokens": 22808112}
|
| 7013 |
+
{"current_steps": 34975, "total_steps": 38160, "loss": 0.3825, "lr": 0.0006325206401894312, "epoch": 18.33071278825996, "percentage": 91.65, "elapsed_time": "1:30:02", "remaining_time": "0:08:11", "throughput": 4222.93, "total_tokens": 22814544}
|
| 7014 |
+
{"current_steps": 34980, "total_steps": 38160, "loss": 0.281, "lr": 0.0006305509021731237, "epoch": 18.333333333333332, "percentage": 91.67, "elapsed_time": "1:30:03", "remaining_time": "0:08:11", "throughput": 4222.91, "total_tokens": 22817200}
|
| 7015 |
+
{"current_steps": 34985, "total_steps": 38160, "loss": 0.269, "lr": 0.0006285841700816353, "epoch": 18.33595387840671, "percentage": 91.68, "elapsed_time": "1:30:03", "remaining_time": "0:08:10", "throughput": 4222.91, "total_tokens": 22820048}
|
| 7016 |
+
{"current_steps": 34990, "total_steps": 38160, "loss": 0.2856, "lr": 0.0006266204443263784, "epoch": 18.338574423480082, "percentage": 91.69, "elapsed_time": "1:30:04", "remaining_time": "0:08:09", "throughput": 4222.94, "total_tokens": 22823280}
|
| 7017 |
+
{"current_steps": 34995, "total_steps": 38160, "loss": 0.395, "lr": 0.000624659725318143, "epoch": 18.34119496855346, "percentage": 91.71, "elapsed_time": "1:30:05", "remaining_time": "0:08:08", "throughput": 4222.95, "total_tokens": 22826224}
|
| 7018 |
+
{"current_steps": 35000, "total_steps": 38160, "loss": 0.2704, "lr": 0.000622702013467094, "epoch": 18.343815513626833, "percentage": 91.72, "elapsed_time": "1:30:05", "remaining_time": "0:08:08", "throughput": 4222.97, "total_tokens": 22829360}
|
| 7019 |
+
{"current_steps": 35005, "total_steps": 38160, "loss": 0.3323, "lr": 0.0006207473091827558, "epoch": 18.34643605870021, "percentage": 91.73, "elapsed_time": "1:30:06", "remaining_time": "0:08:07", "throughput": 4223.01, "total_tokens": 22832688}
|
| 7020 |
+
{"current_steps": 35010, "total_steps": 38160, "loss": 0.3066, "lr": 0.0006187956128740362, "epoch": 18.349056603773583, "percentage": 91.75, "elapsed_time": "1:30:07", "remaining_time": "0:08:06", "throughput": 4223.06, "total_tokens": 22836176}
|
| 7021 |
+
{"current_steps": 35015, "total_steps": 38160, "loss": 0.2505, "lr": 0.0006168469249492037, "epoch": 18.35167714884696, "percentage": 91.76, "elapsed_time": "1:30:08", "remaining_time": "0:08:05", "throughput": 4223.06, "total_tokens": 22839088}
|
| 7022 |
+
{"current_steps": 35020, "total_steps": 38160, "loss": 0.3401, "lr": 0.000614901245815902, "epoch": 18.354297693920337, "percentage": 91.77, "elapsed_time": "1:30:08", "remaining_time": "0:08:04", "throughput": 4223.1, "total_tokens": 22842384}
|
| 7023 |
+
{"current_steps": 35025, "total_steps": 38160, "loss": 0.3488, "lr": 0.0006129585758811456, "epoch": 18.35691823899371, "percentage": 91.78, "elapsed_time": "1:30:09", "remaining_time": "0:08:04", "throughput": 4223.1, "total_tokens": 22845232}
|
| 7024 |
+
{"current_steps": 35030, "total_steps": 38160, "loss": 0.3477, "lr": 0.0006110189155513212, "epoch": 18.359538784067087, "percentage": 91.8, "elapsed_time": "1:30:10", "remaining_time": "0:08:03", "throughput": 4223.1, "total_tokens": 22848016}
|
| 7025 |
+
{"current_steps": 35035, "total_steps": 38160, "loss": 0.4473, "lr": 0.0006090822652321809, "epoch": 18.36215932914046, "percentage": 91.81, "elapsed_time": "1:30:10", "remaining_time": "0:08:02", "throughput": 4223.08, "total_tokens": 22850672}
|
| 7026 |
+
{"current_steps": 35040, "total_steps": 38160, "loss": 0.2436, "lr": 0.0006071486253288488, "epoch": 18.364779874213838, "percentage": 91.82, "elapsed_time": "1:30:11", "remaining_time": "0:08:01", "throughput": 4223.08, "total_tokens": 22853584}
|
| 7027 |
+
{"current_steps": 35045, "total_steps": 38160, "loss": 0.344, "lr": 0.0006052179962458215, "epoch": 18.36740041928721, "percentage": 91.84, "elapsed_time": "1:30:12", "remaining_time": "0:08:01", "throughput": 4223.16, "total_tokens": 22857424}
|
| 7028 |
+
{"current_steps": 35050, "total_steps": 38160, "loss": 0.3233, "lr": 0.0006032903783869659, "epoch": 18.370020964360588, "percentage": 91.85, "elapsed_time": "1:30:13", "remaining_time": "0:08:00", "throughput": 4223.26, "total_tokens": 22861456}
|
| 7029 |
+
{"current_steps": 35055, "total_steps": 38160, "loss": 0.3274, "lr": 0.000601365772155516, "epoch": 18.37264150943396, "percentage": 91.86, "elapsed_time": "1:30:13", "remaining_time": "0:07:59", "throughput": 4223.3, "total_tokens": 22864752}
|
| 7030 |
+
{"current_steps": 35060, "total_steps": 38160, "loss": 0.3064, "lr": 0.0005994441779540816, "epoch": 18.37526205450734, "percentage": 91.88, "elapsed_time": "1:30:14", "remaining_time": "0:07:58", "throughput": 4223.39, "total_tokens": 22868624}
|
| 7031 |
+
{"current_steps": 35065, "total_steps": 38160, "loss": 0.404, "lr": 0.0005975255961846343, "epoch": 18.377882599580712, "percentage": 91.89, "elapsed_time": "1:30:15", "remaining_time": "0:07:57", "throughput": 4223.42, "total_tokens": 22871760}
|
| 7032 |
+
{"current_steps": 35070, "total_steps": 38160, "loss": 0.3111, "lr": 0.0005956100272485182, "epoch": 18.38050314465409, "percentage": 91.9, "elapsed_time": "1:30:16", "remaining_time": "0:07:57", "throughput": 4223.47, "total_tokens": 22875248}
|
| 7033 |
+
{"current_steps": 35075, "total_steps": 38160, "loss": 0.3974, "lr": 0.0005936974715464494, "epoch": 18.383123689727462, "percentage": 91.92, "elapsed_time": "1:30:16", "remaining_time": "0:07:56", "throughput": 4223.52, "total_tokens": 22878640}
|
| 7034 |
+
{"current_steps": 35080, "total_steps": 38160, "loss": 0.3105, "lr": 0.0005917879294785144, "epoch": 18.38574423480084, "percentage": 91.93, "elapsed_time": "1:30:17", "remaining_time": "0:07:55", "throughput": 4223.53, "total_tokens": 22881584}
|
| 7035 |
+
{"current_steps": 35085, "total_steps": 38160, "loss": 0.3108, "lr": 0.0005898814014441689, "epoch": 18.388364779874212, "percentage": 91.94, "elapsed_time": "1:30:18", "remaining_time": "0:07:54", "throughput": 4223.52, "total_tokens": 22884368}
|
| 7036 |
+
{"current_steps": 35090, "total_steps": 38160, "loss": 0.3956, "lr": 0.0005879778878422353, "epoch": 18.39098532494759, "percentage": 91.95, "elapsed_time": "1:30:18", "remaining_time": "0:07:54", "throughput": 4223.49, "total_tokens": 22886896}
|
| 7037 |
+
{"current_steps": 35095, "total_steps": 38160, "loss": 0.4129, "lr": 0.0005860773890709053, "epoch": 18.393605870020963, "percentage": 91.97, "elapsed_time": "1:30:19", "remaining_time": "0:07:53", "throughput": 4223.56, "total_tokens": 22890448}
|
| 7038 |
+
{"current_steps": 35100, "total_steps": 38160, "loss": 0.3616, "lr": 0.0005841799055277408, "epoch": 18.39622641509434, "percentage": 91.98, "elapsed_time": "1:30:20", "remaining_time": "0:07:52", "throughput": 4223.63, "total_tokens": 22894224}
|
| 7039 |
+
{"current_steps": 35105, "total_steps": 38160, "loss": 0.4386, "lr": 0.0005822854376096775, "epoch": 18.398846960167713, "percentage": 91.99, "elapsed_time": "1:30:21", "remaining_time": "0:07:51", "throughput": 4223.68, "total_tokens": 22897680}
|
| 7040 |
+
{"current_steps": 35110, "total_steps": 38160, "loss": 0.3297, "lr": 0.0005803939857130152, "epoch": 18.40146750524109, "percentage": 92.01, "elapsed_time": "1:30:21", "remaining_time": "0:07:51", "throughput": 4223.71, "total_tokens": 22900880}
|
| 7041 |
+
{"current_steps": 35115, "total_steps": 38160, "loss": 0.3649, "lr": 0.000578505550233424, "epoch": 18.404088050314467, "percentage": 92.02, "elapsed_time": "1:30:22", "remaining_time": "0:07:50", "throughput": 4223.72, "total_tokens": 22903984}
|
| 7042 |
+
{"current_steps": 35120, "total_steps": 38160, "loss": 0.3846, "lr": 0.0005766201315659413, "epoch": 18.40670859538784, "percentage": 92.03, "elapsed_time": "1:30:23", "remaining_time": "0:07:49", "throughput": 4223.76, "total_tokens": 22907216}
|
| 7043 |
+
{"current_steps": 35125, "total_steps": 38160, "loss": 0.4766, "lr": 0.0005747377301049767, "epoch": 18.409329140461217, "percentage": 92.05, "elapsed_time": "1:30:24", "remaining_time": "0:07:48", "throughput": 4223.78, "total_tokens": 22910352}
|
| 7044 |
+
{"current_steps": 35130, "total_steps": 38160, "loss": 0.5015, "lr": 0.0005728583462443065, "epoch": 18.41194968553459, "percentage": 92.06, "elapsed_time": "1:30:24", "remaining_time": "0:07:47", "throughput": 4223.82, "total_tokens": 22913776}
|
| 7045 |
+
{"current_steps": 35135, "total_steps": 38160, "loss": 0.4899, "lr": 0.0005709819803770765, "epoch": 18.414570230607968, "percentage": 92.07, "elapsed_time": "1:30:25", "remaining_time": "0:07:47", "throughput": 4223.91, "total_tokens": 22917584}
|
| 7046 |
+
{"current_steps": 35140, "total_steps": 38160, "loss": 0.3327, "lr": 0.0005691086328958029, "epoch": 18.41719077568134, "percentage": 92.09, "elapsed_time": "1:30:26", "remaining_time": "0:07:46", "throughput": 4223.92, "total_tokens": 22920592}
|
| 7047 |
+
{"current_steps": 35145, "total_steps": 38160, "loss": 0.3132, "lr": 0.0005672383041923668, "epoch": 18.419811320754718, "percentage": 92.1, "elapsed_time": "1:30:27", "remaining_time": "0:07:45", "throughput": 4223.9, "total_tokens": 22923248}
|
| 7048 |
+
{"current_steps": 35150, "total_steps": 38160, "loss": 0.302, "lr": 0.0005653709946580188, "epoch": 18.42243186582809, "percentage": 92.11, "elapsed_time": "1:30:27", "remaining_time": "0:07:44", "throughput": 4223.97, "total_tokens": 22926832}
|
| 7049 |
+
{"current_steps": 35155, "total_steps": 38160, "loss": 0.3081, "lr": 0.000563506704683378, "epoch": 18.42505241090147, "percentage": 92.13, "elapsed_time": "1:30:28", "remaining_time": "0:07:44", "throughput": 4224.02, "total_tokens": 22930256}
|
| 7050 |
+
{"current_steps": 35160, "total_steps": 38160, "loss": 0.4575, "lr": 0.0005616454346584359, "epoch": 18.427672955974842, "percentage": 92.14, "elapsed_time": "1:30:29", "remaining_time": "0:07:43", "throughput": 4224.05, "total_tokens": 22933552}
|
| 7051 |
+
{"current_steps": 35165, "total_steps": 38160, "loss": 0.3121, "lr": 0.0005597871849725494, "epoch": 18.43029350104822, "percentage": 92.15, "elapsed_time": "1:30:29", "remaining_time": "0:07:42", "throughput": 4224.02, "total_tokens": 22936112}
|
| 7052 |
+
{"current_steps": 35170, "total_steps": 38160, "loss": 0.4452, "lr": 0.0005579319560144408, "epoch": 18.432914046121592, "percentage": 92.16, "elapsed_time": "1:30:30", "remaining_time": "0:07:41", "throughput": 4224.04, "total_tokens": 22939376}
|
| 7053 |
+
{"current_steps": 35175, "total_steps": 38160, "loss": 0.3045, "lr": 0.0005560797481722013, "epoch": 18.43553459119497, "percentage": 92.18, "elapsed_time": "1:30:31", "remaining_time": "0:07:40", "throughput": 4224.05, "total_tokens": 22942416}
|
| 7054 |
+
{"current_steps": 35180, "total_steps": 38160, "loss": 0.4021, "lr": 0.0005542305618332926, "epoch": 18.438155136268342, "percentage": 92.19, "elapsed_time": "1:30:32", "remaining_time": "0:07:40", "throughput": 4224.07, "total_tokens": 22945520}
|
| 7055 |
+
{"current_steps": 35185, "total_steps": 38160, "loss": 0.4145, "lr": 0.0005523843973845455, "epoch": 18.44077568134172, "percentage": 92.2, "elapsed_time": "1:30:32", "remaining_time": "0:07:39", "throughput": 4224.06, "total_tokens": 22948240}
|
| 7056 |
+
{"current_steps": 35190, "total_steps": 38160, "loss": 0.4106, "lr": 0.0005505412552121575, "epoch": 18.443396226415093, "percentage": 92.22, "elapsed_time": "1:30:33", "remaining_time": "0:07:38", "throughput": 4224.09, "total_tokens": 22951536}
|
| 7057 |
+
{"current_steps": 35195, "total_steps": 38160, "loss": 0.3359, "lr": 0.0005487011357016886, "epoch": 18.44601677148847, "percentage": 92.23, "elapsed_time": "1:30:34", "remaining_time": "0:07:37", "throughput": 4224.17, "total_tokens": 22955280}
|
| 7058 |
+
{"current_steps": 35200, "total_steps": 38160, "loss": 0.3774, "lr": 0.0005468640392380741, "epoch": 18.448637316561843, "percentage": 92.24, "elapsed_time": "1:30:35", "remaining_time": "0:07:37", "throughput": 4224.21, "total_tokens": 22958576}
|
| 7059 |
+
{"current_steps": 35205, "total_steps": 38160, "loss": 0.3827, "lr": 0.0005450299662056118, "epoch": 18.45125786163522, "percentage": 92.26, "elapsed_time": "1:30:35", "remaining_time": "0:07:36", "throughput": 4224.2, "total_tokens": 22961328}
|
| 7060 |
+
{"current_steps": 35210, "total_steps": 38160, "loss": 0.2711, "lr": 0.000543198916987968, "epoch": 18.453878406708597, "percentage": 92.27, "elapsed_time": "1:30:36", "remaining_time": "0:07:35", "throughput": 4224.25, "total_tokens": 22964880}
|
| 7061 |
+
{"current_steps": 35215, "total_steps": 38160, "loss": 0.3339, "lr": 0.0005413708919681798, "epoch": 18.45649895178197, "percentage": 92.28, "elapsed_time": "1:30:37", "remaining_time": "0:07:34", "throughput": 4224.33, "total_tokens": 22968848}
|
| 7062 |
+
{"current_steps": 35220, "total_steps": 38160, "loss": 0.4721, "lr": 0.000539545891528651, "epoch": 18.459119496855347, "percentage": 92.3, "elapsed_time": "1:30:38", "remaining_time": "0:07:33", "throughput": 4224.44, "total_tokens": 22972944}
|
| 7063 |
+
{"current_steps": 35225, "total_steps": 38160, "loss": 0.381, "lr": 0.0005377239160511449, "epoch": 18.46174004192872, "percentage": 92.31, "elapsed_time": "1:30:38", "remaining_time": "0:07:33", "throughput": 4224.47, "total_tokens": 22976208}
|
| 7064 |
+
{"current_steps": 35230, "total_steps": 38160, "loss": 0.3419, "lr": 0.0005359049659168031, "epoch": 18.464360587002098, "percentage": 92.32, "elapsed_time": "1:30:39", "remaining_time": "0:07:32", "throughput": 4224.46, "total_tokens": 22978832}
|
| 7065 |
+
{"current_steps": 35235, "total_steps": 38160, "loss": 0.4098, "lr": 0.0005340890415061261, "epoch": 18.46698113207547, "percentage": 92.33, "elapsed_time": "1:30:40", "remaining_time": "0:07:31", "throughput": 4224.49, "total_tokens": 22982096}
|
| 7066 |
+
{"current_steps": 35240, "total_steps": 38160, "loss": 0.5097, "lr": 0.0005322761431989853, "epoch": 18.469601677148848, "percentage": 92.35, "elapsed_time": "1:30:41", "remaining_time": "0:07:30", "throughput": 4224.58, "total_tokens": 22986032}
|
| 7067 |
+
{"current_steps": 35245, "total_steps": 38160, "loss": 0.2958, "lr": 0.0005304662713746205, "epoch": 18.47222222222222, "percentage": 92.36, "elapsed_time": "1:30:41", "remaining_time": "0:07:30", "throughput": 4224.6, "total_tokens": 22989168}
|
| 7068 |
+
{"current_steps": 35250, "total_steps": 38160, "loss": 0.4169, "lr": 0.000528659426411634, "epoch": 18.4748427672956, "percentage": 92.37, "elapsed_time": "1:30:42", "remaining_time": "0:07:29", "throughput": 4224.66, "total_tokens": 22992784}
|
| 7069 |
+
{"current_steps": 35255, "total_steps": 38160, "loss": 0.2712, "lr": 0.0005268556086879982, "epoch": 18.47746331236897, "percentage": 92.39, "elapsed_time": "1:30:43", "remaining_time": "0:07:28", "throughput": 4224.68, "total_tokens": 22995824}
|
| 7070 |
+
{"current_steps": 35260, "total_steps": 38160, "loss": 0.4034, "lr": 0.0005250548185810499, "epoch": 18.48008385744235, "percentage": 92.4, "elapsed_time": "1:30:43", "remaining_time": "0:07:27", "throughput": 4224.7, "total_tokens": 22998928}
|
| 7071 |
+
{"current_steps": 35265, "total_steps": 38160, "loss": 0.2729, "lr": 0.0005232570564674926, "epoch": 18.482704402515722, "percentage": 92.41, "elapsed_time": "1:30:44", "remaining_time": "0:07:26", "throughput": 4224.66, "total_tokens": 23001424}
|
| 7072 |
+
{"current_steps": 35270, "total_steps": 38160, "loss": 0.3035, "lr": 0.0005214623227234039, "epoch": 18.4853249475891, "percentage": 92.43, "elapsed_time": "1:30:45", "remaining_time": "0:07:26", "throughput": 4224.71, "total_tokens": 23004784}
|
| 7073 |
+
{"current_steps": 35275, "total_steps": 38160, "loss": 0.2946, "lr": 0.000519670617724215, "epoch": 18.487945492662472, "percentage": 92.44, "elapsed_time": "1:30:46", "remaining_time": "0:07:25", "throughput": 4224.73, "total_tokens": 23008048}
|
| 7074 |
+
{"current_steps": 35280, "total_steps": 38160, "loss": 0.2567, "lr": 0.0005178819418447311, "epoch": 18.49056603773585, "percentage": 92.45, "elapsed_time": "1:30:46", "remaining_time": "0:07:24", "throughput": 4224.69, "total_tokens": 23010416}
|
| 7075 |
+
{"current_steps": 35285, "total_steps": 38160, "loss": 0.3381, "lr": 0.000516096295459128, "epoch": 18.493186582809223, "percentage": 92.47, "elapsed_time": "1:30:47", "remaining_time": "0:07:23", "throughput": 4224.71, "total_tokens": 23013616}
|
| 7076 |
+
{"current_steps": 35290, "total_steps": 38160, "loss": 0.347, "lr": 0.0005143136789409352, "epoch": 18.4958071278826, "percentage": 92.48, "elapsed_time": "1:30:48", "remaining_time": "0:07:23", "throughput": 4224.81, "total_tokens": 23017808}
|
| 7077 |
+
{"current_steps": 35295, "total_steps": 38160, "loss": 0.2626, "lr": 0.0005125340926630612, "epoch": 18.498427672955973, "percentage": 92.49, "elapsed_time": "1:30:48", "remaining_time": "0:07:22", "throughput": 4224.81, "total_tokens": 23020720}
|
| 7078 |
+
{"current_steps": 35300, "total_steps": 38160, "loss": 0.2302, "lr": 0.0005107575369977729, "epoch": 18.50104821802935, "percentage": 92.51, "elapsed_time": "1:30:49", "remaining_time": "0:07:21", "throughput": 4224.89, "total_tokens": 23024560}
|
| 7079 |
+
{"current_steps": 35305, "total_steps": 38160, "loss": 0.3257, "lr": 0.0005089840123167049, "epoch": 18.503668763102727, "percentage": 92.52, "elapsed_time": "1:30:50", "remaining_time": "0:07:20", "throughput": 4224.93, "total_tokens": 23027856}
|
| 7080 |
+
{"current_steps": 35310, "total_steps": 38160, "loss": 0.2862, "lr": 0.0005072135189908606, "epoch": 18.5062893081761, "percentage": 92.53, "elapsed_time": "1:30:51", "remaining_time": "0:07:19", "throughput": 4224.95, "total_tokens": 23031024}
|
| 7081 |
+
{"current_steps": 35315, "total_steps": 38160, "loss": 0.2751, "lr": 0.0005054460573906067, "epoch": 18.508909853249477, "percentage": 92.54, "elapsed_time": "1:30:51", "remaining_time": "0:07:19", "throughput": 4224.97, "total_tokens": 23034192}
|
| 7082 |
+
{"current_steps": 35320, "total_steps": 38160, "loss": 0.3813, "lr": 0.0005036816278856726, "epoch": 18.51153039832285, "percentage": 92.56, "elapsed_time": "1:30:52", "remaining_time": "0:07:18", "throughput": 4225.02, "total_tokens": 23037680}
|
| 7083 |
+
{"current_steps": 35325, "total_steps": 38160, "loss": 0.3148, "lr": 0.0005019202308451614, "epoch": 18.514150943396228, "percentage": 92.57, "elapsed_time": "1:30:53", "remaining_time": "0:07:17", "throughput": 4225.04, "total_tokens": 23040784}
|
| 7084 |
+
{"current_steps": 35330, "total_steps": 38160, "loss": 0.3402, "lr": 0.0005001618666375335, "epoch": 18.5167714884696, "percentage": 92.58, "elapsed_time": "1:30:54", "remaining_time": "0:07:16", "throughput": 4225.05, "total_tokens": 23043888}
|
| 7085 |
+
{"current_steps": 35335, "total_steps": 38160, "loss": 0.3578, "lr": 0.0004984065356306195, "epoch": 18.519392033542978, "percentage": 92.6, "elapsed_time": "1:30:54", "remaining_time": "0:07:16", "throughput": 4225.06, "total_tokens": 23046928}
|
| 7086 |
+
{"current_steps": 35340, "total_steps": 38160, "loss": 0.2871, "lr": 0.0004966542381916173, "epoch": 18.52201257861635, "percentage": 92.61, "elapsed_time": "1:30:55", "remaining_time": "0:07:15", "throughput": 4225.24, "total_tokens": 23052016}
|
| 7087 |
+
{"current_steps": 35345, "total_steps": 38160, "loss": 0.3188, "lr": 0.0004949049746870837, "epoch": 18.52463312368973, "percentage": 92.62, "elapsed_time": "1:30:56", "remaining_time": "0:07:14", "throughput": 4225.31, "total_tokens": 23055632}
|
| 7088 |
+
{"current_steps": 35350, "total_steps": 38160, "loss": 0.3499, "lr": 0.0004931587454829494, "epoch": 18.5272536687631, "percentage": 92.64, "elapsed_time": "1:30:57", "remaining_time": "0:07:13", "throughput": 4225.32, "total_tokens": 23058640}
|
| 7089 |
+
{"current_steps": 35355, "total_steps": 38160, "loss": 0.3754, "lr": 0.0004914155509445006, "epoch": 18.52987421383648, "percentage": 92.65, "elapsed_time": "1:30:57", "remaining_time": "0:07:13", "throughput": 4225.32, "total_tokens": 23061488}
|
| 7090 |
+
{"current_steps": 35360, "total_steps": 38160, "loss": 0.2664, "lr": 0.0004896753914363955, "epoch": 18.532494758909852, "percentage": 92.66, "elapsed_time": "1:30:58", "remaining_time": "0:07:12", "throughput": 4225.29, "total_tokens": 23064016}
|
| 7091 |
+
{"current_steps": 35365, "total_steps": 38160, "loss": 0.3143, "lr": 0.0004879382673226545, "epoch": 18.53511530398323, "percentage": 92.68, "elapsed_time": "1:30:59", "remaining_time": "0:07:11", "throughput": 4225.25, "total_tokens": 23066576}
|
| 7092 |
+
{"current_steps": 35370, "total_steps": 38160, "loss": 0.3956, "lr": 0.00048620417896666875, "epoch": 18.537735849056602, "percentage": 92.69, "elapsed_time": "1:30:59", "remaining_time": "0:07:10", "throughput": 4225.29, "total_tokens": 23069936}
|
| 7093 |
+
{"current_steps": 35375, "total_steps": 38160, "loss": 0.6297, "lr": 0.00048447312673118633, "epoch": 18.54035639412998, "percentage": 92.7, "elapsed_time": "1:31:00", "remaining_time": "0:07:09", "throughput": 4225.33, "total_tokens": 23073392}
|
| 7094 |
+
{"current_steps": 35380, "total_steps": 38160, "loss": 0.3624, "lr": 0.00048274511097832427, "epoch": 18.542976939203353, "percentage": 92.71, "elapsed_time": "1:31:01", "remaining_time": "0:07:09", "throughput": 4225.57, "total_tokens": 23079568}
|
| 7095 |
+
{"current_steps": 35385, "total_steps": 38160, "loss": 0.2644, "lr": 0.0004810201320695617, "epoch": 18.54559748427673, "percentage": 92.73, "elapsed_time": "1:31:02", "remaining_time": "0:07:08", "throughput": 4225.56, "total_tokens": 23082480}
|
| 7096 |
+
{"current_steps": 35390, "total_steps": 38160, "loss": 0.2942, "lr": 0.00047929819036574505, "epoch": 18.548218029350103, "percentage": 92.74, "elapsed_time": "1:31:03", "remaining_time": "0:07:07", "throughput": 4225.55, "total_tokens": 23085328}
|
| 7097 |
+
{"current_steps": 35395, "total_steps": 38160, "loss": 0.3768, "lr": 0.00047757928622708777, "epoch": 18.55083857442348, "percentage": 92.75, "elapsed_time": "1:31:03", "remaining_time": "0:07:06", "throughput": 4225.54, "total_tokens": 23088208}
|
| 7098 |
+
{"current_steps": 35400, "total_steps": 38160, "loss": 0.2681, "lr": 0.00047586342001316393, "epoch": 18.553459119496857, "percentage": 92.77, "elapsed_time": "1:31:04", "remaining_time": "0:07:06", "throughput": 4225.56, "total_tokens": 23091376}
|
| 7099 |
+
{"current_steps": 35405, "total_steps": 38160, "loss": 0.4002, "lr": 0.0004741505920829131, "epoch": 18.55607966457023, "percentage": 92.78, "elapsed_time": "1:31:05", "remaining_time": "0:07:05", "throughput": 4225.61, "total_tokens": 23094832}
|
| 7100 |
+
{"current_steps": 35410, "total_steps": 38160, "loss": 0.3076, "lr": 0.00047244080279463694, "epoch": 18.558700209643607, "percentage": 92.79, "elapsed_time": "1:31:06", "remaining_time": "0:07:04", "throughput": 4225.61, "total_tokens": 23097808}
|
| 7101 |
+
{"current_steps": 35415, "total_steps": 38160, "loss": 0.4253, "lr": 0.00047073405250600605, "epoch": 18.56132075471698, "percentage": 92.81, "elapsed_time": "1:31:06", "remaining_time": "0:07:03", "throughput": 4225.59, "total_tokens": 23100464}
|
| 7102 |
+
{"current_steps": 35420, "total_steps": 38160, "loss": 0.2877, "lr": 0.00046903034157405154, "epoch": 18.563941299790358, "percentage": 92.82, "elapsed_time": "1:31:07", "remaining_time": "0:07:02", "throughput": 4225.62, "total_tokens": 23103760}
|
| 7103 |
+
{"current_steps": 35425, "total_steps": 38160, "loss": 0.3468, "lr": 0.0004673296703551732, "epoch": 18.56656184486373, "percentage": 92.83, "elapsed_time": "1:31:08", "remaining_time": "0:07:02", "throughput": 4225.63, "total_tokens": 23106832}
|
| 7104 |
+
{"current_steps": 35430, "total_steps": 38160, "loss": 0.4086, "lr": 0.0004656320392051333, "epoch": 18.569182389937108, "percentage": 92.85, "elapsed_time": "1:31:09", "remaining_time": "0:07:01", "throughput": 4225.72, "total_tokens": 23110832}
|
| 7105 |
+
{"current_steps": 35435, "total_steps": 38160, "loss": 0.2344, "lr": 0.00046393744847904924, "epoch": 18.57180293501048, "percentage": 92.86, "elapsed_time": "1:31:09", "remaining_time": "0:07:00", "throughput": 4225.74, "total_tokens": 23113968}
|
| 7106 |
+
{"current_steps": 35440, "total_steps": 38160, "loss": 0.3383, "lr": 0.00046224589853141596, "epoch": 18.57442348008386, "percentage": 92.87, "elapsed_time": "1:31:10", "remaining_time": "0:06:59", "throughput": 4225.74, "total_tokens": 23116944}
|
| 7107 |
+
{"current_steps": 35445, "total_steps": 38160, "loss": 0.4199, "lr": 0.00046055738971608525, "epoch": 18.57704402515723, "percentage": 92.89, "elapsed_time": "1:31:11", "remaining_time": "0:06:59", "throughput": 4225.77, "total_tokens": 23120112}
|
| 7108 |
+
{"current_steps": 35450, "total_steps": 38160, "loss": 0.421, "lr": 0.00045887192238627307, "epoch": 18.57966457023061, "percentage": 92.9, "elapsed_time": "1:31:12", "remaining_time": "0:06:58", "throughput": 4225.86, "total_tokens": 23124176}
|
| 7109 |
+
{"current_steps": 35455, "total_steps": 38160, "loss": 0.3121, "lr": 0.0004571894968945639, "epoch": 18.582285115303982, "percentage": 92.91, "elapsed_time": "1:31:12", "remaining_time": "0:06:57", "throughput": 4225.84, "total_tokens": 23126832}
|
| 7110 |
+
{"current_steps": 35460, "total_steps": 38160, "loss": 0.3513, "lr": 0.00045551011359289795, "epoch": 18.58490566037736, "percentage": 92.92, "elapsed_time": "1:31:13", "remaining_time": "0:06:56", "throughput": 4225.86, "total_tokens": 23129904}
|
| 7111 |
+
{"current_steps": 35465, "total_steps": 38160, "loss": 0.3363, "lr": 0.00045383377283258087, "epoch": 18.587526205450732, "percentage": 92.94, "elapsed_time": "1:31:14", "remaining_time": "0:06:55", "throughput": 4225.9, "total_tokens": 23133200}
|
| 7112 |
+
{"current_steps": 35470, "total_steps": 38160, "loss": 0.3027, "lr": 0.00045216047496428877, "epoch": 18.59014675052411, "percentage": 92.95, "elapsed_time": "1:31:14", "remaining_time": "0:06:55", "throughput": 4225.93, "total_tokens": 23136496}
|
| 7113 |
+
{"current_steps": 35475, "total_steps": 38160, "loss": 0.2652, "lr": 0.00045049022033805504, "epoch": 18.592767295597483, "percentage": 92.96, "elapsed_time": "1:31:15", "remaining_time": "0:06:54", "throughput": 4225.94, "total_tokens": 23139472}
|
| 7114 |
+
{"current_steps": 35480, "total_steps": 38160, "loss": 0.3176, "lr": 0.0004488230093032785, "epoch": 18.59538784067086, "percentage": 92.98, "elapsed_time": "1:31:16", "remaining_time": "0:06:53", "throughput": 4225.93, "total_tokens": 23142384}
|
| 7115 |
+
{"current_steps": 35485, "total_steps": 38160, "loss": 0.3517, "lr": 0.00044715884220872026, "epoch": 18.598008385744233, "percentage": 92.99, "elapsed_time": "1:31:16", "remaining_time": "0:06:52", "throughput": 4225.94, "total_tokens": 23145328}
|
| 7116 |
+
{"current_steps": 35490, "total_steps": 38160, "loss": 0.3575, "lr": 0.0004454977194025017, "epoch": 18.60062893081761, "percentage": 93.0, "elapsed_time": "1:31:17", "remaining_time": "0:06:52", "throughput": 4225.99, "total_tokens": 23148912}
|
| 7117 |
+
{"current_steps": 35495, "total_steps": 38160, "loss": 0.3561, "lr": 0.00044383964123211495, "epoch": 18.603249475890987, "percentage": 93.02, "elapsed_time": "1:31:18", "remaining_time": "0:06:51", "throughput": 4225.97, "total_tokens": 23151632}
|
| 7118 |
+
{"current_steps": 35500, "total_steps": 38160, "loss": 0.3223, "lr": 0.0004421846080444108, "epoch": 18.60587002096436, "percentage": 93.03, "elapsed_time": "1:31:19", "remaining_time": "0:06:50", "throughput": 4225.93, "total_tokens": 23154032}
|
| 7119 |
+
{"current_steps": 35505, "total_steps": 38160, "loss": 0.377, "lr": 0.0004405326201856008, "epoch": 18.608490566037737, "percentage": 93.04, "elapsed_time": "1:31:19", "remaining_time": "0:06:49", "throughput": 4225.93, "total_tokens": 23156976}
|
| 7120 |
+
{"current_steps": 35510, "total_steps": 38160, "loss": 0.3329, "lr": 0.00043888367800126504, "epoch": 18.61111111111111, "percentage": 93.06, "elapsed_time": "1:31:20", "remaining_time": "0:06:48", "throughput": 4225.9, "total_tokens": 23159472}
|
| 7121 |
+
{"current_steps": 35515, "total_steps": 38160, "loss": 0.4128, "lr": 0.0004372377818363426, "epoch": 18.613731656184488, "percentage": 93.07, "elapsed_time": "1:31:21", "remaining_time": "0:06:48", "throughput": 4225.91, "total_tokens": 23162672}
|
| 7122 |
+
{"current_steps": 35520, "total_steps": 38160, "loss": 0.3785, "lr": 0.0004355949320351332, "epoch": 18.61635220125786, "percentage": 93.08, "elapsed_time": "1:31:21", "remaining_time": "0:06:47", "throughput": 4225.9, "total_tokens": 23165456}
|
| 7123 |
+
{"current_steps": 35525, "total_steps": 38160, "loss": 0.3422, "lr": 0.00043395512894130514, "epoch": 18.618972746331238, "percentage": 93.09, "elapsed_time": "1:31:22", "remaining_time": "0:06:46", "throughput": 4225.91, "total_tokens": 23168656}
|
| 7124 |
+
{"current_steps": 35530, "total_steps": 38160, "loss": 0.3543, "lr": 0.00043231837289788407, "epoch": 18.62159329140461, "percentage": 93.11, "elapsed_time": "1:31:23", "remaining_time": "0:06:45", "throughput": 4225.94, "total_tokens": 23171856}
|
| 7125 |
+
{"current_steps": 35535, "total_steps": 38160, "loss": 0.3153, "lr": 0.00043068466424726616, "epoch": 18.62421383647799, "percentage": 93.12, "elapsed_time": "1:31:24", "remaining_time": "0:06:45", "throughput": 4226.03, "total_tokens": 23175856}
|
| 7126 |
+
{"current_steps": 35540, "total_steps": 38160, "loss": 0.3927, "lr": 0.0004290540033311996, "epoch": 18.62683438155136, "percentage": 93.13, "elapsed_time": "1:31:24", "remaining_time": "0:06:44", "throughput": 4226.04, "total_tokens": 23178928}
|
| 7127 |
+
{"current_steps": 35545, "total_steps": 38160, "loss": 0.4115, "lr": 0.00042742639049080154, "epoch": 18.62945492662474, "percentage": 93.15, "elapsed_time": "1:31:25", "remaining_time": "0:06:43", "throughput": 4226.09, "total_tokens": 23182384}
|
| 7128 |
+
{"current_steps": 35550, "total_steps": 38160, "loss": 0.2443, "lr": 0.000425801826066548, "epoch": 18.632075471698112, "percentage": 93.16, "elapsed_time": "1:31:26", "remaining_time": "0:06:42", "throughput": 4226.16, "total_tokens": 23186192}
|
| 7129 |
+
{"current_steps": 35555, "total_steps": 38160, "loss": 0.3794, "lr": 0.0004241803103982822, "epoch": 18.63469601677149, "percentage": 93.17, "elapsed_time": "1:31:27", "remaining_time": "0:06:42", "throughput": 4226.32, "total_tokens": 23191056}
|
| 7130 |
+
{"current_steps": 35560, "total_steps": 38160, "loss": 0.3654, "lr": 0.00042256184382520763, "epoch": 18.637316561844862, "percentage": 93.19, "elapsed_time": "1:31:28", "remaining_time": "0:06:41", "throughput": 4226.33, "total_tokens": 23194160}
|
| 7131 |
+
{"current_steps": 35565, "total_steps": 38160, "loss": 0.3324, "lr": 0.0004209464266858853, "epoch": 18.63993710691824, "percentage": 93.2, "elapsed_time": "1:31:28", "remaining_time": "0:06:40", "throughput": 4226.36, "total_tokens": 23197392}
|
| 7132 |
+
{"current_steps": 35570, "total_steps": 38160, "loss": 0.3234, "lr": 0.00041933405931824486, "epoch": 18.642557651991613, "percentage": 93.21, "elapsed_time": "1:31:29", "remaining_time": "0:06:39", "throughput": 4226.43, "total_tokens": 23201008}
|
| 7133 |
+
{"current_steps": 35575, "total_steps": 38160, "loss": 0.3033, "lr": 0.0004177247420595731, "epoch": 18.64517819706499, "percentage": 93.23, "elapsed_time": "1:31:30", "remaining_time": "0:06:38", "throughput": 4226.73, "total_tokens": 23208176}
|
| 7134 |
+
{"current_steps": 35580, "total_steps": 38160, "loss": 0.3204, "lr": 0.0004161184752465208, "epoch": 18.647798742138363, "percentage": 93.24, "elapsed_time": "1:31:31", "remaining_time": "0:06:38", "throughput": 4226.7, "total_tokens": 23210768}
|
| 7135 |
+
{"current_steps": 35585, "total_steps": 38160, "loss": 0.5507, "lr": 0.0004145152592151041, "epoch": 18.65041928721174, "percentage": 93.25, "elapsed_time": "1:31:32", "remaining_time": "0:06:37", "throughput": 4226.77, "total_tokens": 23214480}
|
| 7136 |
+
{"current_steps": 35590, "total_steps": 38160, "loss": 0.3226, "lr": 0.00041291509430069483, "epoch": 18.653039832285117, "percentage": 93.27, "elapsed_time": "1:31:32", "remaining_time": "0:06:36", "throughput": 4226.76, "total_tokens": 23217296}
|
| 7137 |
+
{"current_steps": 35595, "total_steps": 38160, "loss": 0.2881, "lr": 0.0004113179808380285, "epoch": 18.65566037735849, "percentage": 93.28, "elapsed_time": "1:31:33", "remaining_time": "0:06:35", "throughput": 4226.77, "total_tokens": 23220272}
|
| 7138 |
+
{"current_steps": 35600, "total_steps": 38160, "loss": 0.2962, "lr": 0.0004097239191612062, "epoch": 18.658280922431867, "percentage": 93.29, "elapsed_time": "1:31:34", "remaining_time": "0:06:35", "throughput": 4226.93, "total_tokens": 23224912}
|
| 7139 |
+
{"current_steps": 35605, "total_steps": 38160, "loss": 0.2746, "lr": 0.00040813290960368286, "epoch": 18.66090146750524, "percentage": 93.3, "elapsed_time": "1:31:35", "remaining_time": "0:06:34", "throughput": 4226.91, "total_tokens": 23227600}
|
| 7140 |
+
{"current_steps": 35610, "total_steps": 38160, "loss": 0.4783, "lr": 0.00040654495249828224, "epoch": 18.663522012578618, "percentage": 93.32, "elapsed_time": "1:31:35", "remaining_time": "0:06:33", "throughput": 4226.92, "total_tokens": 23230576}
|
| 7141 |
+
{"current_steps": 35615, "total_steps": 38160, "loss": 0.4227, "lr": 0.00040496004817718864, "epoch": 18.66614255765199, "percentage": 93.33, "elapsed_time": "1:31:36", "remaining_time": "0:06:32", "throughput": 4226.91, "total_tokens": 23233360}
|
| 7142 |
+
{"current_steps": 35620, "total_steps": 38160, "loss": 0.5332, "lr": 0.0004033781969719419, "epoch": 18.668763102725368, "percentage": 93.34, "elapsed_time": "1:31:37", "remaining_time": "0:06:32", "throughput": 4227.01, "total_tokens": 23237328}
|
| 7143 |
+
{"current_steps": 35625, "total_steps": 38160, "loss": 0.3122, "lr": 0.00040179939921345054, "epoch": 18.67138364779874, "percentage": 93.36, "elapsed_time": "1:31:38", "remaining_time": "0:06:31", "throughput": 4227.04, "total_tokens": 23240592}
|
| 7144 |
+
{"current_steps": 35630, "total_steps": 38160, "loss": 0.3332, "lr": 0.00040022365523197876, "epoch": 18.67400419287212, "percentage": 93.37, "elapsed_time": "1:31:38", "remaining_time": "0:06:30", "throughput": 4227.03, "total_tokens": 23243472}
|
| 7145 |
+
{"current_steps": 35635, "total_steps": 38160, "loss": 0.3027, "lr": 0.00039865096535715626, "epoch": 18.67662473794549, "percentage": 93.38, "elapsed_time": "1:31:39", "remaining_time": "0:06:29", "throughput": 4226.99, "total_tokens": 23246032}
|
| 7146 |
+
{"current_steps": 35640, "total_steps": 38160, "loss": 0.2786, "lr": 0.0003970813299179715, "epoch": 18.67924528301887, "percentage": 93.4, "elapsed_time": "1:31:40", "remaining_time": "0:06:28", "throughput": 4226.94, "total_tokens": 23248304}
|
| 7147 |
+
{"current_steps": 35645, "total_steps": 38160, "loss": 0.3557, "lr": 0.00039551474924277185, "epoch": 18.681865828092242, "percentage": 93.41, "elapsed_time": "1:31:40", "remaining_time": "0:06:28", "throughput": 4226.98, "total_tokens": 23251632}
|
| 7148 |
+
{"current_steps": 35650, "total_steps": 38160, "loss": 0.332, "lr": 0.00039395122365927016, "epoch": 18.68448637316562, "percentage": 93.42, "elapsed_time": "1:31:41", "remaining_time": "0:06:27", "throughput": 4227.04, "total_tokens": 23255216}
|
| 7149 |
+
{"current_steps": 35655, "total_steps": 38160, "loss": 0.4087, "lr": 0.0003923907534945381, "epoch": 18.687106918238992, "percentage": 93.44, "elapsed_time": "1:31:42", "remaining_time": "0:06:26", "throughput": 4227.1, "total_tokens": 23258736}
|
| 7150 |
+
{"current_steps": 35660, "total_steps": 38160, "loss": 0.292, "lr": 0.0003908333390750079, "epoch": 18.68972746331237, "percentage": 93.45, "elapsed_time": "1:31:43", "remaining_time": "0:06:25", "throughput": 4227.12, "total_tokens": 23261872}
|
| 7151 |
+
{"current_steps": 35665, "total_steps": 38160, "loss": 0.4485, "lr": 0.00038927898072647237, "epoch": 18.692348008385743, "percentage": 93.46, "elapsed_time": "1:31:43", "remaining_time": "0:06:25", "throughput": 4227.15, "total_tokens": 23265008}
|
| 7152 |
+
{"current_steps": 35670, "total_steps": 38160, "loss": 0.3324, "lr": 0.00038772767877408797, "epoch": 18.69496855345912, "percentage": 93.47, "elapsed_time": "1:31:44", "remaining_time": "0:06:24", "throughput": 4227.24, "total_tokens": 23268912}
|
| 7153 |
+
{"current_steps": 35675, "total_steps": 38160, "loss": 0.1882, "lr": 0.00038617943354236683, "epoch": 18.697589098532493, "percentage": 93.49, "elapsed_time": "1:31:45", "remaining_time": "0:06:23", "throughput": 4227.2, "total_tokens": 23271344}
|
| 7154 |
+
{"current_steps": 35680, "total_steps": 38160, "loss": 0.3565, "lr": 0.0003846342453551832, "epoch": 18.70020964360587, "percentage": 93.5, "elapsed_time": "1:31:45", "remaining_time": "0:06:22", "throughput": 4227.27, "total_tokens": 23274960}
|
| 7155 |
+
{"current_steps": 35685, "total_steps": 38160, "loss": 0.3851, "lr": 0.00038309211453577683, "epoch": 18.702830188679247, "percentage": 93.51, "elapsed_time": "1:31:46", "remaining_time": "0:06:21", "throughput": 4227.3, "total_tokens": 23278160}
|
| 7156 |
+
{"current_steps": 35690, "total_steps": 38160, "loss": 0.3899, "lr": 0.0003815530414067414, "epoch": 18.70545073375262, "percentage": 93.53, "elapsed_time": "1:31:47", "remaining_time": "0:06:21", "throughput": 4227.38, "total_tokens": 23282192}
|
| 7157 |
+
{"current_steps": 35695, "total_steps": 38160, "loss": 0.3633, "lr": 0.0003800170262900326, "epoch": 18.708071278825997, "percentage": 93.54, "elapsed_time": "1:31:48", "remaining_time": "0:06:20", "throughput": 4227.44, "total_tokens": 23285648}
|
| 7158 |
+
{"current_steps": 35700, "total_steps": 38160, "loss": 0.534, "lr": 0.00037848406950696854, "epoch": 18.71069182389937, "percentage": 93.55, "elapsed_time": "1:31:48", "remaining_time": "0:06:19", "throughput": 4227.42, "total_tokens": 23288304}
|
| 7159 |
+
{"current_steps": 35705, "total_steps": 38160, "loss": 0.3416, "lr": 0.00037695417137822594, "epoch": 18.713312368972748, "percentage": 93.57, "elapsed_time": "1:31:49", "remaining_time": "0:06:18", "throughput": 4227.43, "total_tokens": 23291408}
|
| 7160 |
+
{"current_steps": 35710, "total_steps": 38160, "loss": 0.3352, "lr": 0.0003754273322238438, "epoch": 18.71593291404612, "percentage": 93.58, "elapsed_time": "1:31:50", "remaining_time": "0:06:18", "throughput": 4227.45, "total_tokens": 23294448}
|
| 7161 |
+
{"current_steps": 35715, "total_steps": 38160, "loss": 0.3686, "lr": 0.00037390355236321504, "epoch": 18.718553459119498, "percentage": 93.59, "elapsed_time": "1:31:50", "remaining_time": "0:06:17", "throughput": 4227.44, "total_tokens": 23297232}
|
| 7162 |
+
{"current_steps": 35720, "total_steps": 38160, "loss": 0.4048, "lr": 0.00037238283211510294, "epoch": 18.72117400419287, "percentage": 93.61, "elapsed_time": "1:31:51", "remaining_time": "0:06:16", "throughput": 4227.54, "total_tokens": 23301264}
|
| 7163 |
+
{"current_steps": 35725, "total_steps": 38160, "loss": 0.3828, "lr": 0.00037086517179761965, "epoch": 18.72379454926625, "percentage": 93.62, "elapsed_time": "1:31:52", "remaining_time": "0:06:15", "throughput": 4227.59, "total_tokens": 23304720}
|
| 7164 |
+
{"current_steps": 35730, "total_steps": 38160, "loss": 0.3827, "lr": 0.00036935057172824304, "epoch": 18.72641509433962, "percentage": 93.63, "elapsed_time": "1:31:53", "remaining_time": "0:06:14", "throughput": 4227.67, "total_tokens": 23308592}
|
| 7165 |
+
{"current_steps": 35735, "total_steps": 38160, "loss": 0.7767, "lr": 0.0003678390322238129, "epoch": 18.729035639413, "percentage": 93.65, "elapsed_time": "1:31:54", "remaining_time": "0:06:14", "throughput": 4227.73, "total_tokens": 23312080}
|
| 7166 |
+
{"current_steps": 35740, "total_steps": 38160, "loss": 0.5679, "lr": 0.0003663305536005262, "epoch": 18.731656184486372, "percentage": 93.66, "elapsed_time": "1:31:54", "remaining_time": "0:06:13", "throughput": 4227.71, "total_tokens": 23314800}
|
| 7167 |
+
{"current_steps": 35745, "total_steps": 38160, "loss": 0.4555, "lr": 0.0003648251361739374, "epoch": 18.73427672955975, "percentage": 93.67, "elapsed_time": "1:31:55", "remaining_time": "0:06:12", "throughput": 4227.73, "total_tokens": 23317840}
|
| 7168 |
+
{"current_steps": 35750, "total_steps": 38160, "loss": 0.3151, "lr": 0.0003633227802589628, "epoch": 18.736897274633122, "percentage": 93.68, "elapsed_time": "1:31:56", "remaining_time": "0:06:11", "throughput": 4227.77, "total_tokens": 23321200}
|
| 7169 |
+
{"current_steps": 35755, "total_steps": 38160, "loss": 0.3962, "lr": 0.0003618234861698777, "epoch": 18.7395178197065, "percentage": 93.7, "elapsed_time": "1:31:56", "remaining_time": "0:06:11", "throughput": 4227.75, "total_tokens": 23323824}
|
| 7170 |
+
{"current_steps": 35760, "total_steps": 38160, "loss": 0.3554, "lr": 0.00036032725422031783, "epoch": 18.742138364779873, "percentage": 93.71, "elapsed_time": "1:31:57", "remaining_time": "0:06:10", "throughput": 4227.76, "total_tokens": 23326928}
|
| 7171 |
+
{"current_steps": 35765, "total_steps": 38160, "loss": 0.4516, "lr": 0.0003588340847232796, "epoch": 18.74475890985325, "percentage": 93.72, "elapsed_time": "1:31:58", "remaining_time": "0:06:09", "throughput": 4227.83, "total_tokens": 23330576}
|
| 7172 |
+
{"current_steps": 35770, "total_steps": 38160, "loss": 0.4119, "lr": 0.00035734397799111635, "epoch": 18.747379454926623, "percentage": 93.74, "elapsed_time": "1:31:59", "remaining_time": "0:06:08", "throughput": 4227.94, "total_tokens": 23334832}
|
| 7173 |
+
{"current_steps": 35775, "total_steps": 38160, "loss": 0.2795, "lr": 0.00035585693433554057, "epoch": 18.75, "percentage": 93.75, "elapsed_time": "1:31:59", "remaining_time": "0:06:07", "throughput": 4227.92, "total_tokens": 23337488}
|
| 7174 |
+
{"current_steps": 35780, "total_steps": 38160, "loss": 0.2809, "lr": 0.00035437295406762323, "epoch": 18.752620545073377, "percentage": 93.76, "elapsed_time": "1:32:00", "remaining_time": "0:06:07", "throughput": 4228.0, "total_tokens": 23341296}
|
| 7175 |
+
{"current_steps": 35785, "total_steps": 38160, "loss": 0.4165, "lr": 0.00035289203749779783, "epoch": 18.75524109014675, "percentage": 93.78, "elapsed_time": "1:32:01", "remaining_time": "0:06:06", "throughput": 4228.28, "total_tokens": 23348144}
|
| 7176 |
+
{"current_steps": 35790, "total_steps": 38160, "loss": 0.3471, "lr": 0.00035141418493585486, "epoch": 18.757861635220127, "percentage": 93.79, "elapsed_time": "1:32:02", "remaining_time": "0:06:05", "throughput": 4228.36, "total_tokens": 23352080}
|
| 7177 |
+
{"current_steps": 35795, "total_steps": 38160, "loss": 0.2228, "lr": 0.0003499393966909453, "epoch": 18.7604821802935, "percentage": 93.8, "elapsed_time": "1:32:03", "remaining_time": "0:06:04", "throughput": 4228.41, "total_tokens": 23355536}
|
| 7178 |
+
{"current_steps": 35800, "total_steps": 38160, "loss": 0.2622, "lr": 0.0003484676730715791, "epoch": 18.763102725366878, "percentage": 93.82, "elapsed_time": "1:32:04", "remaining_time": "0:06:04", "throughput": 4228.39, "total_tokens": 23358160}
|
| 7179 |
+
{"current_steps": 35805, "total_steps": 38160, "loss": 0.454, "lr": 0.00034699901438561994, "epoch": 18.76572327044025, "percentage": 93.83, "elapsed_time": "1:32:04", "remaining_time": "0:06:03", "throughput": 4228.42, "total_tokens": 23361296}
|
| 7180 |
+
{"current_steps": 35810, "total_steps": 38160, "loss": 0.3453, "lr": 0.00034553342094029545, "epoch": 18.768343815513628, "percentage": 93.84, "elapsed_time": "1:32:05", "remaining_time": "0:06:02", "throughput": 4228.51, "total_tokens": 23365264}
|
| 7181 |
+
{"current_steps": 35815, "total_steps": 38160, "loss": 0.2631, "lr": 0.0003440708930421937, "epoch": 18.770964360587, "percentage": 93.85, "elapsed_time": "1:32:06", "remaining_time": "0:06:01", "throughput": 4228.52, "total_tokens": 23368208}
|
| 7182 |
+
{"current_steps": 35820, "total_steps": 38160, "loss": 0.3726, "lr": 0.00034261143099725665, "epoch": 18.77358490566038, "percentage": 93.87, "elapsed_time": "1:32:07", "remaining_time": "0:06:01", "throughput": 4228.52, "total_tokens": 23371248}
|
| 7183 |
+
{"current_steps": 35825, "total_steps": 38160, "loss": 0.2797, "lr": 0.00034115503511079003, "epoch": 18.77620545073375, "percentage": 93.88, "elapsed_time": "1:32:07", "remaining_time": "0:06:00", "throughput": 4228.6, "total_tokens": 23374960}
|
| 7184 |
+
{"current_steps": 35830, "total_steps": 38160, "loss": 0.2842, "lr": 0.0003397017056874535, "epoch": 18.77882599580713, "percentage": 93.89, "elapsed_time": "1:32:08", "remaining_time": "0:05:59", "throughput": 4228.67, "total_tokens": 23378832}
|
| 7185 |
+
{"current_steps": 35835, "total_steps": 38160, "loss": 0.4303, "lr": 0.0003382514430312638, "epoch": 18.781446540880502, "percentage": 93.91, "elapsed_time": "1:32:09", "remaining_time": "0:05:58", "throughput": 4228.71, "total_tokens": 23382032}
|
| 7186 |
+
{"current_steps": 35840, "total_steps": 38160, "loss": 0.3422, "lr": 0.0003368042474456034, "epoch": 18.78406708595388, "percentage": 93.92, "elapsed_time": "1:32:10", "remaining_time": "0:05:57", "throughput": 4228.74, "total_tokens": 23385456}
|
| 7187 |
+
{"current_steps": 35845, "total_steps": 38160, "loss": 0.3792, "lr": 0.00033536011923320674, "epoch": 18.786687631027252, "percentage": 93.93, "elapsed_time": "1:32:10", "remaining_time": "0:05:57", "throughput": 4228.76, "total_tokens": 23388592}
|
| 7188 |
+
{"current_steps": 35850, "total_steps": 38160, "loss": 0.3162, "lr": 0.0003339190586961721, "epoch": 18.78930817610063, "percentage": 93.95, "elapsed_time": "1:32:11", "remaining_time": "0:05:56", "throughput": 4228.79, "total_tokens": 23391824}
|
| 7189 |
+
{"current_steps": 35855, "total_steps": 38160, "loss": 0.3436, "lr": 0.0003324810661359517, "epoch": 18.791928721174003, "percentage": 93.96, "elapsed_time": "1:32:12", "remaining_time": "0:05:55", "throughput": 4228.81, "total_tokens": 23394896}
|
| 7190 |
+
{"current_steps": 35860, "total_steps": 38160, "loss": 0.3124, "lr": 0.0003310461418533533, "epoch": 18.79454926624738, "percentage": 93.97, "elapsed_time": "1:32:13", "remaining_time": "0:05:54", "throughput": 4228.98, "total_tokens": 23399888}
|
| 7191 |
+
{"current_steps": 35865, "total_steps": 38160, "loss": 0.3246, "lr": 0.0003296142861485518, "epoch": 18.797169811320753, "percentage": 93.99, "elapsed_time": "1:32:13", "remaining_time": "0:05:54", "throughput": 4228.99, "total_tokens": 23402832}
|
| 7192 |
+
{"current_steps": 35870, "total_steps": 38160, "loss": 0.4719, "lr": 0.00032818549932107086, "epoch": 18.79979035639413, "percentage": 94.0, "elapsed_time": "1:32:14", "remaining_time": "0:05:53", "throughput": 4229.01, "total_tokens": 23405968}
|
| 7193 |
+
{"current_steps": 35875, "total_steps": 38160, "loss": 0.4647, "lr": 0.00032675978166980146, "epoch": 18.802410901467507, "percentage": 94.01, "elapsed_time": "1:32:15", "remaining_time": "0:05:52", "throughput": 4229.0, "total_tokens": 23408656}
|
| 7194 |
+
{"current_steps": 35880, "total_steps": 38160, "loss": 0.466, "lr": 0.0003253371334929833, "epoch": 18.80503144654088, "percentage": 94.03, "elapsed_time": "1:32:16", "remaining_time": "0:05:51", "throughput": 4229.03, "total_tokens": 23411952}
|
| 7195 |
+
{"current_steps": 35885, "total_steps": 38160, "loss": 0.3773, "lr": 0.00032391755508822, "epoch": 18.807651991614257, "percentage": 94.04, "elapsed_time": "1:32:16", "remaining_time": "0:05:51", "throughput": 4229.03, "total_tokens": 23414896}
|
| 7196 |
+
{"current_steps": 35890, "total_steps": 38160, "loss": 0.4504, "lr": 0.00032250104675246913, "epoch": 18.81027253668763, "percentage": 94.05, "elapsed_time": "1:32:17", "remaining_time": "0:05:50", "throughput": 4229.07, "total_tokens": 23418224}
|
| 7197 |
+
{"current_steps": 35895, "total_steps": 38160, "loss": 0.3078, "lr": 0.0003210876087820502, "epoch": 18.812893081761008, "percentage": 94.06, "elapsed_time": "1:32:18", "remaining_time": "0:05:49", "throughput": 4229.06, "total_tokens": 23421008}
|
| 7198 |
+
{"current_steps": 35900, "total_steps": 38160, "loss": 0.3043, "lr": 0.00031967724147263674, "epoch": 18.81551362683438, "percentage": 94.08, "elapsed_time": "1:32:18", "remaining_time": "0:05:48", "throughput": 4229.05, "total_tokens": 23423792}
|
| 7199 |
+
{"current_steps": 35905, "total_steps": 38160, "loss": 0.3964, "lr": 0.0003182699451192644, "epoch": 18.818134171907758, "percentage": 94.09, "elapsed_time": "1:32:19", "remaining_time": "0:05:47", "throughput": 4229.02, "total_tokens": 23426320}
|
| 7200 |
+
{"current_steps": 35910, "total_steps": 38160, "loss": 0.4204, "lr": 0.0003168657200163211, "epoch": 18.82075471698113, "percentage": 94.1, "elapsed_time": "1:32:20", "remaining_time": "0:05:47", "throughput": 4229.08, "total_tokens": 23429840}
|
| 7201 |
+
{"current_steps": 35915, "total_steps": 38160, "loss": 0.2733, "lr": 0.0003154645664575534, "epoch": 18.82337526205451, "percentage": 94.12, "elapsed_time": "1:32:20", "remaining_time": "0:05:46", "throughput": 4229.06, "total_tokens": 23432720}
|
| 7202 |
+
{"current_steps": 35920, "total_steps": 38160, "loss": 0.2924, "lr": 0.00031406648473607024, "epoch": 18.82599580712788, "percentage": 94.13, "elapsed_time": "1:32:21", "remaining_time": "0:05:45", "throughput": 4229.14, "total_tokens": 23436592}
|
| 7203 |
+
{"current_steps": 35925, "total_steps": 38160, "loss": 0.2708, "lr": 0.000312671475144331, "epoch": 18.82861635220126, "percentage": 94.14, "elapsed_time": "1:32:22", "remaining_time": "0:05:44", "throughput": 4229.18, "total_tokens": 23439856}
|
| 7204 |
+
{"current_steps": 35930, "total_steps": 38160, "loss": 0.2849, "lr": 0.00031127953797415895, "epoch": 18.831236897274632, "percentage": 94.16, "elapsed_time": "1:32:23", "remaining_time": "0:05:44", "throughput": 4229.18, "total_tokens": 23442832}
|
| 7205 |
+
{"current_steps": 35935, "total_steps": 38160, "loss": 0.2815, "lr": 0.0003098906735167278, "epoch": 18.83385744234801, "percentage": 94.17, "elapsed_time": "1:32:23", "remaining_time": "0:05:43", "throughput": 4229.24, "total_tokens": 23446416}
|
| 7206 |
+
{"current_steps": 35940, "total_steps": 38160, "loss": 0.3226, "lr": 0.0003085048820625752, "epoch": 18.836477987421382, "percentage": 94.18, "elapsed_time": "1:32:24", "remaining_time": "0:05:42", "throughput": 4229.23, "total_tokens": 23449104}
|
| 7207 |
+
{"current_steps": 35945, "total_steps": 38160, "loss": 0.3997, "lr": 0.0003071221639015925, "epoch": 18.83909853249476, "percentage": 94.2, "elapsed_time": "1:32:25", "remaining_time": "0:05:41", "throughput": 4229.25, "total_tokens": 23452272}
|
| 7208 |
+
{"current_steps": 35950, "total_steps": 38160, "loss": 0.3527, "lr": 0.0003057425193230251, "epoch": 18.841719077568133, "percentage": 94.21, "elapsed_time": "1:32:25", "remaining_time": "0:05:40", "throughput": 4229.24, "total_tokens": 23455088}
|
| 7209 |
+
{"current_steps": 35955, "total_steps": 38160, "loss": 0.4055, "lr": 0.0003043659486154854, "epoch": 18.84433962264151, "percentage": 94.22, "elapsed_time": "1:32:26", "remaining_time": "0:05:40", "throughput": 4229.3, "total_tokens": 23458704}
|
| 7210 |
+
{"current_steps": 35960, "total_steps": 38160, "loss": 0.3721, "lr": 0.0003029924520669297, "epoch": 18.846960167714883, "percentage": 94.23, "elapsed_time": "1:32:27", "remaining_time": "0:05:39", "throughput": 4229.39, "total_tokens": 23462704}
|
| 7211 |
+
{"current_steps": 35965, "total_steps": 38160, "loss": 0.2772, "lr": 0.00030162202996468154, "epoch": 18.84958071278826, "percentage": 94.25, "elapsed_time": "1:32:28", "remaining_time": "0:05:38", "throughput": 4229.37, "total_tokens": 23465328}
|
| 7212 |
+
{"current_steps": 35970, "total_steps": 38160, "loss": 0.3989, "lr": 0.0003002546825954183, "epoch": 18.852201257861637, "percentage": 94.26, "elapsed_time": "1:32:28", "remaining_time": "0:05:37", "throughput": 4229.44, "total_tokens": 23468912}
|
| 7213 |
+
{"current_steps": 35975, "total_steps": 38160, "loss": 0.3289, "lr": 0.0002988904102451711, "epoch": 18.85482180293501, "percentage": 94.27, "elapsed_time": "1:32:29", "remaining_time": "0:05:37", "throughput": 4229.45, "total_tokens": 23471984}
|
| 7214 |
+
{"current_steps": 35980, "total_steps": 38160, "loss": 0.2884, "lr": 0.0002975292131993301, "epoch": 18.857442348008387, "percentage": 94.29, "elapsed_time": "1:32:30", "remaining_time": "0:05:36", "throughput": 4229.53, "total_tokens": 23475888}
|
| 7215 |
+
{"current_steps": 35985, "total_steps": 38160, "loss": 0.3265, "lr": 0.0002961710917426441, "epoch": 18.86006289308176, "percentage": 94.3, "elapsed_time": "1:32:31", "remaining_time": "0:05:35", "throughput": 4229.52, "total_tokens": 23478704}
|
| 7216 |
+
{"current_steps": 35990, "total_steps": 38160, "loss": 0.3095, "lr": 0.0002948160461592142, "epoch": 18.862683438155138, "percentage": 94.31, "elapsed_time": "1:32:31", "remaining_time": "0:05:34", "throughput": 4229.47, "total_tokens": 23481136}
|
| 7217 |
+
{"current_steps": 35995, "total_steps": 38160, "loss": 0.2695, "lr": 0.0002934640767325036, "epoch": 18.86530398322851, "percentage": 94.33, "elapsed_time": "1:32:32", "remaining_time": "0:05:33", "throughput": 4229.42, "total_tokens": 23483408}
|
| 7218 |
+
{"current_steps": 36000, "total_steps": 38160, "loss": 0.3577, "lr": 0.00029211518374532616, "epoch": 18.867924528301888, "percentage": 94.34, "elapsed_time": "1:32:33", "remaining_time": "0:05:33", "throughput": 4229.51, "total_tokens": 23487568}
|
| 7219 |
+
{"current_steps": 36005, "total_steps": 38160, "loss": 0.4181, "lr": 0.00029076936747985446, "epoch": 18.87054507337526, "percentage": 94.35, "elapsed_time": "1:32:33", "remaining_time": "0:05:32", "throughput": 4229.52, "total_tokens": 23490512}
|
| 7220 |
+
{"current_steps": 36010, "total_steps": 38160, "loss": 0.4142, "lr": 0.00028942662821762166, "epoch": 18.87316561844864, "percentage": 94.37, "elapsed_time": "1:32:34", "remaining_time": "0:05:31", "throughput": 4229.55, "total_tokens": 23493680}
|
| 7221 |
+
{"current_steps": 36015, "total_steps": 38160, "loss": 0.3458, "lr": 0.0002880869662395097, "epoch": 18.87578616352201, "percentage": 94.38, "elapsed_time": "1:32:35", "remaining_time": "0:05:30", "throughput": 4229.64, "total_tokens": 23497680}
|
| 7222 |
+
{"current_steps": 36020, "total_steps": 38160, "loss": 0.3471, "lr": 0.00028675038182576274, "epoch": 18.87840670859539, "percentage": 94.39, "elapsed_time": "1:32:36", "remaining_time": "0:05:30", "throughput": 4229.63, "total_tokens": 23500464}
|
| 7223 |
+
{"current_steps": 36025, "total_steps": 38160, "loss": 0.2943, "lr": 0.0002854168752559788, "epoch": 18.881027253668762, "percentage": 94.41, "elapsed_time": "1:32:36", "remaining_time": "0:05:29", "throughput": 4229.62, "total_tokens": 23503184}
|
| 7224 |
+
{"current_steps": 36030, "total_steps": 38160, "loss": 0.3752, "lr": 0.00028408644680910975, "epoch": 18.88364779874214, "percentage": 94.42, "elapsed_time": "1:32:37", "remaining_time": "0:05:28", "throughput": 4229.62, "total_tokens": 23506064}
|
| 7225 |
+
{"current_steps": 36035, "total_steps": 38160, "loss": 0.3729, "lr": 0.0002827590967634696, "epoch": 18.886268343815512, "percentage": 94.43, "elapsed_time": "1:32:38", "remaining_time": "0:05:27", "throughput": 4229.61, "total_tokens": 23508848}
|
| 7226 |
+
{"current_steps": 36040, "total_steps": 38160, "loss": 0.4292, "lr": 0.00028143482539672303, "epoch": 18.88888888888889, "percentage": 94.44, "elapsed_time": "1:32:38", "remaining_time": "0:05:26", "throughput": 4229.67, "total_tokens": 23512432}
|
| 7227 |
+
{"current_steps": 36045, "total_steps": 38160, "loss": 0.4191, "lr": 0.00028011363298589164, "epoch": 18.891509433962263, "percentage": 94.46, "elapsed_time": "1:32:39", "remaining_time": "0:05:26", "throughput": 4229.66, "total_tokens": 23515312}
|
| 7228 |
+
{"current_steps": 36050, "total_steps": 38160, "loss": 0.4163, "lr": 0.00027879551980735604, "epoch": 18.89412997903564, "percentage": 94.47, "elapsed_time": "1:32:40", "remaining_time": "0:05:25", "throughput": 4229.74, "total_tokens": 23519024}
|
| 7229 |
+
{"current_steps": 36055, "total_steps": 38160, "loss": 0.272, "lr": 0.00027748048613684907, "epoch": 18.896750524109013, "percentage": 94.48, "elapsed_time": "1:32:41", "remaining_time": "0:05:24", "throughput": 4229.75, "total_tokens": 23522032}
|
| 7230 |
+
{"current_steps": 36060, "total_steps": 38160, "loss": 0.4364, "lr": 0.00027616853224946057, "epoch": 18.89937106918239, "percentage": 94.5, "elapsed_time": "1:32:41", "remaining_time": "0:05:23", "throughput": 4229.76, "total_tokens": 23524912}
|
| 7231 |
+
{"current_steps": 36065, "total_steps": 38160, "loss": 0.241, "lr": 0.0002748596584196394, "epoch": 18.901991614255767, "percentage": 94.51, "elapsed_time": "1:32:42", "remaining_time": "0:05:23", "throughput": 4229.76, "total_tokens": 23527984}
|
| 7232 |
+
{"current_steps": 36070, "total_steps": 38160, "loss": 0.4071, "lr": 0.0002735538649211816, "epoch": 18.90461215932914, "percentage": 94.52, "elapsed_time": "1:32:43", "remaining_time": "0:05:22", "throughput": 4229.82, "total_tokens": 23531792}
|
| 7233 |
+
{"current_steps": 36075, "total_steps": 38160, "loss": 0.3422, "lr": 0.0002722511520272469, "epoch": 18.907232704402517, "percentage": 94.54, "elapsed_time": "1:32:44", "remaining_time": "0:05:21", "throughput": 4229.89, "total_tokens": 23535504}
|
| 7234 |
+
{"current_steps": 36080, "total_steps": 38160, "loss": 0.3416, "lr": 0.00027095152001034903, "epoch": 18.90985324947589, "percentage": 94.55, "elapsed_time": "1:32:44", "remaining_time": "0:05:20", "throughput": 4229.94, "total_tokens": 23538864}
|
| 7235 |
+
{"current_steps": 36085, "total_steps": 38160, "loss": 0.3937, "lr": 0.00026965496914235554, "epoch": 18.912473794549268, "percentage": 94.56, "elapsed_time": "1:32:45", "remaining_time": "0:05:20", "throughput": 4230.02, "total_tokens": 23542736}
|
| 7236 |
+
{"current_steps": 36090, "total_steps": 38160, "loss": 0.4136, "lr": 0.0002683614996944894, "epoch": 18.91509433962264, "percentage": 94.58, "elapsed_time": "1:32:46", "remaining_time": "0:05:19", "throughput": 4230.18, "total_tokens": 23547504}
|
| 7237 |
+
{"current_steps": 36095, "total_steps": 38160, "loss": 0.3228, "lr": 0.0002670711119373309, "epoch": 18.917714884696018, "percentage": 94.59, "elapsed_time": "1:32:47", "remaining_time": "0:05:18", "throughput": 4230.17, "total_tokens": 23550224}
|
| 7238 |
+
{"current_steps": 36100, "total_steps": 38160, "loss": 0.41, "lr": 0.00026578380614081243, "epoch": 18.92033542976939, "percentage": 94.6, "elapsed_time": "1:32:47", "remaining_time": "0:05:17", "throughput": 4230.2, "total_tokens": 23553488}
|
| 7239 |
+
{"current_steps": 36105, "total_steps": 38160, "loss": 0.4443, "lr": 0.0002644995825742252, "epoch": 18.92295597484277, "percentage": 94.61, "elapsed_time": "1:32:48", "remaining_time": "0:05:16", "throughput": 4230.24, "total_tokens": 23556816}
|
| 7240 |
+
{"current_steps": 36110, "total_steps": 38160, "loss": 0.2323, "lr": 0.0002632184415062144, "epoch": 18.92557651991614, "percentage": 94.63, "elapsed_time": "1:32:49", "remaining_time": "0:05:16", "throughput": 4230.2, "total_tokens": 23559280}
|
| 7241 |
+
{"current_steps": 36115, "total_steps": 38160, "loss": 0.4367, "lr": 0.0002619403832047806, "epoch": 18.92819706498952, "percentage": 94.64, "elapsed_time": "1:32:50", "remaining_time": "0:05:15", "throughput": 4230.22, "total_tokens": 23562416}
|
| 7242 |
+
{"current_steps": 36120, "total_steps": 38160, "loss": 0.289, "lr": 0.000260665407937275, "epoch": 18.930817610062892, "percentage": 94.65, "elapsed_time": "1:32:50", "remaining_time": "0:05:14", "throughput": 4230.28, "total_tokens": 23566032}
|
| 7243 |
+
{"current_steps": 36125, "total_steps": 38160, "loss": 0.4043, "lr": 0.00025939351597041246, "epoch": 18.93343815513627, "percentage": 94.67, "elapsed_time": "1:32:51", "remaining_time": "0:05:13", "throughput": 4230.28, "total_tokens": 23568880}
|
| 7244 |
+
{"current_steps": 36130, "total_steps": 38160, "loss": 0.3029, "lr": 0.0002581247075702569, "epoch": 18.936058700209642, "percentage": 94.68, "elapsed_time": "1:32:52", "remaining_time": "0:05:13", "throughput": 4230.23, "total_tokens": 23571216}
|
| 7245 |
+
{"current_steps": 36135, "total_steps": 38160, "loss": 0.4551, "lr": 0.00025685898300222774, "epoch": 18.93867924528302, "percentage": 94.69, "elapsed_time": "1:32:52", "remaining_time": "0:05:12", "throughput": 4230.32, "total_tokens": 23575216}
|
| 7246 |
+
{"current_steps": 36140, "total_steps": 38160, "loss": 0.3332, "lr": 0.00025559634253110143, "epoch": 18.941299790356393, "percentage": 94.71, "elapsed_time": "1:32:53", "remaining_time": "0:05:11", "throughput": 4230.36, "total_tokens": 23578576}
|
| 7247 |
+
{"current_steps": 36145, "total_steps": 38160, "loss": 0.3199, "lr": 0.00025433678642100663, "epoch": 18.94392033542977, "percentage": 94.72, "elapsed_time": "1:32:54", "remaining_time": "0:05:10", "throughput": 4230.31, "total_tokens": 23580848}
|
| 7248 |
+
{"current_steps": 36150, "total_steps": 38160, "loss": 0.3807, "lr": 0.0002530803149354294, "epoch": 18.946540880503143, "percentage": 94.73, "elapsed_time": "1:32:54", "remaining_time": "0:05:09", "throughput": 4230.28, "total_tokens": 23583440}
|
| 7249 |
+
{"current_steps": 36155, "total_steps": 38160, "loss": 0.2632, "lr": 0.00025182692833720764, "epoch": 18.94916142557652, "percentage": 94.75, "elapsed_time": "1:32:55", "remaining_time": "0:05:09", "throughput": 4230.31, "total_tokens": 23586640}
|
| 7250 |
+
{"current_steps": 36160, "total_steps": 38160, "loss": 0.3199, "lr": 0.00025057662688853676, "epoch": 18.951781970649897, "percentage": 94.76, "elapsed_time": "1:32:56", "remaining_time": "0:05:08", "throughput": 4230.35, "total_tokens": 23589968}
|
| 7251 |
+
{"current_steps": 36165, "total_steps": 38160, "loss": 0.2903, "lr": 0.00024932941085096584, "epoch": 18.95440251572327, "percentage": 94.77, "elapsed_time": "1:32:56", "remaining_time": "0:05:07", "throughput": 4230.31, "total_tokens": 23592368}
|
| 7252 |
+
{"current_steps": 36170, "total_steps": 38160, "loss": 0.3171, "lr": 0.0002480852804853978, "epoch": 18.957023060796647, "percentage": 94.79, "elapsed_time": "1:32:57", "remaining_time": "0:05:06", "throughput": 4230.31, "total_tokens": 23595248}
|
| 7253 |
+
{"current_steps": 36175, "total_steps": 38160, "loss": 0.3702, "lr": 0.00024684423605208784, "epoch": 18.95964360587002, "percentage": 94.8, "elapsed_time": "1:32:58", "remaining_time": "0:05:06", "throughput": 4230.34, "total_tokens": 23598384}
|
| 7254 |
+
{"current_steps": 36180, "total_steps": 38160, "loss": 0.4235, "lr": 0.0002456062778106532, "epoch": 18.962264150943398, "percentage": 94.81, "elapsed_time": "1:32:59", "remaining_time": "0:05:05", "throughput": 4230.4, "total_tokens": 23602128}
|
| 7255 |
+
{"current_steps": 36185, "total_steps": 38160, "loss": 0.2744, "lr": 0.0002443714060200569, "epoch": 18.96488469601677, "percentage": 94.82, "elapsed_time": "1:32:59", "remaining_time": "0:05:04", "throughput": 4230.43, "total_tokens": 23605232}
|
| 7256 |
+
{"current_steps": 36190, "total_steps": 38160, "loss": 0.288, "lr": 0.00024313962093862218, "epoch": 18.967505241090148, "percentage": 94.84, "elapsed_time": "1:33:00", "remaining_time": "0:05:03", "throughput": 4230.44, "total_tokens": 23608272}
|
| 7257 |
+
{"current_steps": 36195, "total_steps": 38160, "loss": 0.3041, "lr": 0.0002419109228240246, "epoch": 18.97012578616352, "percentage": 94.85, "elapsed_time": "1:33:01", "remaining_time": "0:05:02", "throughput": 4230.39, "total_tokens": 23610576}
|
| 7258 |
+
{"current_steps": 36200, "total_steps": 38160, "loss": 0.3863, "lr": 0.00024068531193329024, "epoch": 18.9727463312369, "percentage": 94.86, "elapsed_time": "1:33:01", "remaining_time": "0:05:02", "throughput": 4230.44, "total_tokens": 23614000}
|
| 7259 |
+
{"current_steps": 36205, "total_steps": 38160, "loss": 0.2961, "lr": 0.00023946278852280732, "epoch": 18.97536687631027, "percentage": 94.88, "elapsed_time": "1:33:02", "remaining_time": "0:05:01", "throughput": 4230.47, "total_tokens": 23617264}
|
| 7260 |
+
{"current_steps": 36210, "total_steps": 38160, "loss": 0.379, "lr": 0.00023824335284831132, "epoch": 18.97798742138365, "percentage": 94.89, "elapsed_time": "1:33:03", "remaining_time": "0:05:00", "throughput": 4230.47, "total_tokens": 23620240}
|
| 7261 |
+
{"current_steps": 36215, "total_steps": 38160, "loss": 0.3114, "lr": 0.00023702700516489315, "epoch": 18.980607966457022, "percentage": 94.9, "elapsed_time": "1:33:04", "remaining_time": "0:04:59", "throughput": 4230.48, "total_tokens": 23623216}
|
| 7262 |
+
{"current_steps": 36220, "total_steps": 38160, "loss": 0.3173, "lr": 0.0002358137457270043, "epoch": 18.9832285115304, "percentage": 94.92, "elapsed_time": "1:33:04", "remaining_time": "0:04:59", "throughput": 4230.47, "total_tokens": 23626064}
|
| 7263 |
+
{"current_steps": 36225, "total_steps": 38160, "loss": 0.3942, "lr": 0.00023460357478844007, "epoch": 18.985849056603772, "percentage": 94.93, "elapsed_time": "1:33:05", "remaining_time": "0:04:58", "throughput": 4230.51, "total_tokens": 23629360}
|
| 7264 |
+
{"current_steps": 36230, "total_steps": 38160, "loss": 0.3829, "lr": 0.00023339649260235295, "epoch": 18.98846960167715, "percentage": 94.94, "elapsed_time": "1:33:06", "remaining_time": "0:04:57", "throughput": 4230.48, "total_tokens": 23631792}
|
| 7265 |
+
{"current_steps": 36235, "total_steps": 38160, "loss": 0.3088, "lr": 0.00023219249942125597, "epoch": 18.991090146750523, "percentage": 94.96, "elapsed_time": "1:33:06", "remaining_time": "0:04:56", "throughput": 4230.62, "total_tokens": 23636400}
|
| 7266 |
+
{"current_steps": 36240, "total_steps": 38160, "loss": 0.3298, "lr": 0.00023099159549700598, "epoch": 18.9937106918239, "percentage": 94.97, "elapsed_time": "1:33:07", "remaining_time": "0:04:56", "throughput": 4230.59, "total_tokens": 23639056}
|
| 7267 |
+
{"current_steps": 36245, "total_steps": 38160, "loss": 0.2882, "lr": 0.00022979378108082204, "epoch": 18.996331236897273, "percentage": 94.98, "elapsed_time": "1:33:08", "remaining_time": "0:04:55", "throughput": 4230.65, "total_tokens": 23642672}
|
| 7268 |
+
{"current_steps": 36250, "total_steps": 38160, "loss": 0.3265, "lr": 0.00022859905642327036, "epoch": 18.99895178197065, "percentage": 94.99, "elapsed_time": "1:33:09", "remaining_time": "0:04:54", "throughput": 4230.59, "total_tokens": 23644848}
|
| 7269 |
+
{"current_steps": 36252, "total_steps": 38160, "eval_loss": 0.5051435828208923, "epoch": 19.0, "percentage": 95.0, "elapsed_time": "1:33:23", "remaining_time": "0:04:54", "throughput": 4220.11, "total_tokens": 23645440}
|
| 7270 |
+
{"current_steps": 36255, "total_steps": 38160, "loss": 0.2365, "lr": 0.00022740742177427775, "epoch": 19.001572327044027, "percentage": 95.01, "elapsed_time": "1:33:24", "remaining_time": "0:04:54", "throughput": 4218.95, "total_tokens": 23647040}
|
| 7271 |
+
{"current_steps": 36260, "total_steps": 38160, "loss": 0.3468, "lr": 0.00022621887738311474, "epoch": 19.0041928721174, "percentage": 95.02, "elapsed_time": "1:33:25", "remaining_time": "0:04:53", "throughput": 4218.95, "total_tokens": 23649760}
|
| 7272 |
+
{"current_steps": 36265, "total_steps": 38160, "loss": 0.2705, "lr": 0.0002250334234984158, "epoch": 19.006813417190777, "percentage": 95.03, "elapsed_time": "1:33:26", "remaining_time": "0:04:52", "throughput": 4218.95, "total_tokens": 23652672}
|
| 7273 |
+
{"current_steps": 36270, "total_steps": 38160, "loss": 0.3259, "lr": 0.0002238510603681626, "epoch": 19.00943396226415, "percentage": 95.05, "elapsed_time": "1:33:26", "remaining_time": "0:04:52", "throughput": 4218.96, "total_tokens": 23655584}
|
| 7274 |
+
{"current_steps": 36275, "total_steps": 38160, "loss": 0.399, "lr": 0.00022267178823969224, "epoch": 19.012054507337528, "percentage": 95.06, "elapsed_time": "1:33:27", "remaining_time": "0:04:51", "throughput": 4218.97, "total_tokens": 23658496}
|
| 7275 |
+
{"current_steps": 36280, "total_steps": 38160, "loss": 0.2765, "lr": 0.00022149560735969576, "epoch": 19.0146750524109, "percentage": 95.07, "elapsed_time": "1:33:28", "remaining_time": "0:04:50", "throughput": 4218.93, "total_tokens": 23660928}
|
| 7276 |
+
{"current_steps": 36285, "total_steps": 38160, "loss": 0.5208, "lr": 0.00022032251797421464, "epoch": 19.017295597484278, "percentage": 95.09, "elapsed_time": "1:33:29", "remaining_time": "0:04:49", "throughput": 4218.98, "total_tokens": 23664448}
|
| 7277 |
+
{"current_steps": 36290, "total_steps": 38160, "loss": 0.4726, "lr": 0.00021915252032864927, "epoch": 19.01991614255765, "percentage": 95.1, "elapsed_time": "1:33:29", "remaining_time": "0:04:49", "throughput": 4219.05, "total_tokens": 23668128}
|