Training in progress, step 11220
Browse files- adapter_model.safetensors +1 -1
- trainer_log.jsonl +110 -0
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 58745928
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:34fb3ab8b7e26bff00345bffa6e4c4c7e36495aa2f0f7afc99fde3bbe51cb885
|
| 3 |
size 58745928
|
trainer_log.jsonl
CHANGED
|
@@ -2153,3 +2153,113 @@
|
|
| 2153 |
{"current_steps": 10670, "total_steps": 11220, "loss": 0.0, "lr": 3.6641940761735217e-07, "epoch": 19.019607843137255, "percentage": 95.1, "elapsed_time": "0:46:03", "remaining_time": "0:02:22", "throughput": 2390.27, "total_tokens": 6605616}
|
| 2154 |
{"current_steps": 10675, "total_steps": 11220, "loss": 0.0, "lr": 3.598154263543596e-07, "epoch": 19.028520499108733, "percentage": 95.14, "elapsed_time": "0:46:04", "remaining_time": "0:02:21", "throughput": 2390.31, "total_tokens": 6608720}
|
| 2155 |
{"current_steps": 10680, "total_steps": 11220, "loss": 0.0, "lr": 3.532710679631679e-07, "epoch": 19.037433155080215, "percentage": 95.19, "elapsed_time": "0:46:06", "remaining_time": "0:02:19", "throughput": 2390.33, "total_tokens": 6611664}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2153 |
{"current_steps": 10670, "total_steps": 11220, "loss": 0.0, "lr": 3.6641940761735217e-07, "epoch": 19.019607843137255, "percentage": 95.1, "elapsed_time": "0:46:03", "remaining_time": "0:02:22", "throughput": 2390.27, "total_tokens": 6605616}
|
| 2154 |
{"current_steps": 10675, "total_steps": 11220, "loss": 0.0, "lr": 3.598154263543596e-07, "epoch": 19.028520499108733, "percentage": 95.14, "elapsed_time": "0:46:04", "remaining_time": "0:02:21", "throughput": 2390.31, "total_tokens": 6608720}
|
| 2155 |
{"current_steps": 10680, "total_steps": 11220, "loss": 0.0, "lr": 3.532710679631679e-07, "epoch": 19.037433155080215, "percentage": 95.19, "elapsed_time": "0:46:06", "remaining_time": "0:02:19", "throughput": 2390.33, "total_tokens": 6611664}
|
| 2156 |
+
{"current_steps": 10685, "total_steps": 11220, "loss": 0.0, "lr": 3.467863482794348e-07, "epoch": 19.046345811051694, "percentage": 95.23, "elapsed_time": "0:46:07", "remaining_time": "0:02:18", "throughput": 2390.39, "total_tokens": 6614928}
|
| 2157 |
+
{"current_steps": 10690, "total_steps": 11220, "loss": 0.0, "lr": 3.4036128299449466e-07, "epoch": 19.055258467023172, "percentage": 95.28, "elapsed_time": "0:46:08", "remaining_time": "0:02:17", "throughput": 2390.36, "total_tokens": 6617328}
|
| 2158 |
+
{"current_steps": 10695, "total_steps": 11220, "loss": 0.0, "lr": 3.3399588765535284e-07, "epoch": 19.06417112299465, "percentage": 95.32, "elapsed_time": "0:46:09", "remaining_time": "0:02:15", "throughput": 2390.37, "total_tokens": 6619984}
|
| 2159 |
+
{"current_steps": 10700, "total_steps": 11220, "loss": 0.0, "lr": 3.276901776646135e-07, "epoch": 19.073083778966133, "percentage": 95.37, "elapsed_time": "0:46:10", "remaining_time": "0:02:14", "throughput": 2390.44, "total_tokens": 6623408}
|
| 2160 |
+
{"current_steps": 10705, "total_steps": 11220, "loss": 0.0, "lr": 3.2144416828046307e-07, "epoch": 19.08199643493761, "percentage": 95.41, "elapsed_time": "0:46:11", "remaining_time": "0:02:13", "throughput": 2390.44, "total_tokens": 6626192}
|
| 2161 |
+
{"current_steps": 10710, "total_steps": 11220, "loss": 0.0, "lr": 3.1525787461663405e-07, "epoch": 19.09090909090909, "percentage": 95.45, "elapsed_time": "0:46:13", "remaining_time": "0:02:12", "throughput": 2390.58, "total_tokens": 6630096}
|
| 2162 |
+
{"current_steps": 10715, "total_steps": 11220, "loss": 0.0, "lr": 3.091313116423522e-07, "epoch": 19.099821746880572, "percentage": 95.5, "elapsed_time": "0:46:14", "remaining_time": "0:02:10", "throughput": 2390.66, "total_tokens": 6633360}
|
| 2163 |
+
{"current_steps": 10720, "total_steps": 11220, "loss": 0.0, "lr": 3.0306449418231464e-07, "epoch": 19.10873440285205, "percentage": 95.54, "elapsed_time": "0:46:15", "remaining_time": "0:02:09", "throughput": 2390.73, "total_tokens": 6636656}
|
| 2164 |
+
{"current_steps": 10725, "total_steps": 11220, "loss": 0.0, "lr": 2.9705743691665345e-07, "epoch": 19.11764705882353, "percentage": 95.59, "elapsed_time": "0:46:17", "remaining_time": "0:02:08", "throughput": 2390.72, "total_tokens": 6639184}
|
| 2165 |
+
{"current_steps": 10730, "total_steps": 11220, "loss": 0.0, "lr": 2.9111015438088583e-07, "epoch": 19.126559714795007, "percentage": 95.63, "elapsed_time": "0:46:18", "remaining_time": "0:02:06", "throughput": 2390.76, "total_tokens": 6642256}
|
| 2166 |
+
{"current_steps": 10735, "total_steps": 11220, "loss": 0.0, "lr": 2.852226609659059e-07, "epoch": 19.13547237076649, "percentage": 95.68, "elapsed_time": "0:46:19", "remaining_time": "0:02:05", "throughput": 2390.82, "total_tokens": 6645680}
|
| 2167 |
+
{"current_steps": 10740, "total_steps": 11220, "loss": 0.0, "lr": 2.793949709179178e-07, "epoch": 19.144385026737968, "percentage": 95.72, "elapsed_time": "0:46:20", "remaining_time": "0:02:04", "throughput": 2390.84, "total_tokens": 6648784}
|
| 2168 |
+
{"current_steps": 10745, "total_steps": 11220, "loss": 0.0, "lr": 2.7362709833842757e-07, "epoch": 19.153297682709447, "percentage": 95.77, "elapsed_time": "0:46:22", "remaining_time": "0:02:02", "throughput": 2390.88, "total_tokens": 6651728}
|
| 2169 |
+
{"current_steps": 10750, "total_steps": 11220, "loss": 0.0, "lr": 2.679190571841933e-07, "epoch": 19.16221033868093, "percentage": 95.81, "elapsed_time": "0:46:23", "remaining_time": "0:02:01", "throughput": 2390.93, "total_tokens": 6654864}
|
| 2170 |
+
{"current_steps": 10755, "total_steps": 11220, "loss": 0.0, "lr": 2.62270861267197e-07, "epoch": 19.171122994652407, "percentage": 95.86, "elapsed_time": "0:46:24", "remaining_time": "0:02:00", "throughput": 2390.95, "total_tokens": 6657776}
|
| 2171 |
+
{"current_steps": 10760, "total_steps": 11220, "loss": 0.0, "lr": 2.566825242546117e-07, "epoch": 19.180035650623886, "percentage": 95.9, "elapsed_time": "0:46:25", "remaining_time": "0:01:59", "throughput": 2391.06, "total_tokens": 6661264}
|
| 2172 |
+
{"current_steps": 10765, "total_steps": 11220, "loss": 0.0, "lr": 2.511540596687678e-07, "epoch": 19.188948306595364, "percentage": 95.94, "elapsed_time": "0:46:27", "remaining_time": "0:01:57", "throughput": 2391.23, "total_tokens": 6665584}
|
| 2173 |
+
{"current_steps": 10770, "total_steps": 11220, "loss": 0.0, "lr": 2.456854808871201e-07, "epoch": 19.197860962566846, "percentage": 95.99, "elapsed_time": "0:46:28", "remaining_time": "0:01:56", "throughput": 2391.33, "total_tokens": 6669200}
|
| 2174 |
+
{"current_steps": 10775, "total_steps": 11220, "loss": 0.0, "lr": 2.4027680114221405e-07, "epoch": 19.206773618538325, "percentage": 96.03, "elapsed_time": "0:46:30", "remaining_time": "0:01:55", "throughput": 2391.42, "total_tokens": 6672624}
|
| 2175 |
+
{"current_steps": 10780, "total_steps": 11220, "loss": 0.0, "lr": 2.3492803352165303e-07, "epoch": 19.215686274509803, "percentage": 96.08, "elapsed_time": "0:46:31", "remaining_time": "0:01:53", "throughput": 2391.47, "total_tokens": 6675632}
|
| 2176 |
+
{"current_steps": 10785, "total_steps": 11220, "loss": 0.0, "lr": 2.2963919096807285e-07, "epoch": 19.224598930481285, "percentage": 96.12, "elapsed_time": "0:46:32", "remaining_time": "0:01:52", "throughput": 2391.49, "total_tokens": 6678352}
|
| 2177 |
+
{"current_steps": 10790, "total_steps": 11220, "loss": 0.0, "lr": 2.244102862791031e-07, "epoch": 19.233511586452764, "percentage": 96.17, "elapsed_time": "0:46:33", "remaining_time": "0:01:51", "throughput": 2391.52, "total_tokens": 6681296}
|
| 2178 |
+
{"current_steps": 10795, "total_steps": 11220, "loss": 0.0, "lr": 2.1924133210734222e-07, "epoch": 19.242424242424242, "percentage": 96.21, "elapsed_time": "0:46:35", "remaining_time": "0:01:50", "throughput": 2391.62, "total_tokens": 6685200}
|
| 2179 |
+
{"current_steps": 10800, "total_steps": 11220, "loss": 0.0, "lr": 2.141323409603241e-07, "epoch": 19.25133689839572, "percentage": 96.26, "elapsed_time": "0:46:36", "remaining_time": "0:01:48", "throughput": 2391.68, "total_tokens": 6688304}
|
| 2180 |
+
{"current_steps": 10805, "total_steps": 11220, "loss": 0.0, "lr": 2.0908332520047645e-07, "epoch": 19.260249554367203, "percentage": 96.3, "elapsed_time": "0:46:37", "remaining_time": "0:01:47", "throughput": 2391.67, "total_tokens": 6691024}
|
| 2181 |
+
{"current_steps": 10810, "total_steps": 11220, "loss": 0.0, "lr": 2.0409429704512096e-07, "epoch": 19.26916221033868, "percentage": 96.35, "elapsed_time": "0:46:38", "remaining_time": "0:01:46", "throughput": 2391.71, "total_tokens": 6693936}
|
| 2182 |
+
{"current_steps": 10815, "total_steps": 11220, "loss": 0.0, "lr": 1.9916526856641193e-07, "epoch": 19.27807486631016, "percentage": 96.39, "elapsed_time": "0:46:40", "remaining_time": "0:01:44", "throughput": 2391.83, "total_tokens": 6697520}
|
| 2183 |
+
{"current_steps": 10820, "total_steps": 11220, "loss": 0.0, "lr": 1.9429625169131716e-07, "epoch": 19.28698752228164, "percentage": 96.43, "elapsed_time": "0:46:41", "remaining_time": "0:01:43", "throughput": 2391.92, "total_tokens": 6700880}
|
| 2184 |
+
{"current_steps": 10825, "total_steps": 11220, "loss": 0.0, "lr": 1.8948725820160662e-07, "epoch": 19.29590017825312, "percentage": 96.48, "elapsed_time": "0:46:42", "remaining_time": "0:01:42", "throughput": 2392.01, "total_tokens": 6704432}
|
| 2185 |
+
{"current_steps": 10830, "total_steps": 11220, "loss": 0.0, "lr": 1.847382997337943e-07, "epoch": 19.3048128342246, "percentage": 96.52, "elapsed_time": "0:46:44", "remaining_time": "0:01:40", "throughput": 2392.09, "total_tokens": 6707568}
|
| 2186 |
+
{"current_steps": 10835, "total_steps": 11220, "loss": 0.0, "lr": 1.8004938777913537e-07, "epoch": 19.313725490196077, "percentage": 96.57, "elapsed_time": "0:46:45", "remaining_time": "0:01:39", "throughput": 2392.15, "total_tokens": 6710736}
|
| 2187 |
+
{"current_steps": 10840, "total_steps": 11220, "loss": 0.0, "lr": 1.754205336835818e-07, "epoch": 19.32263814616756, "percentage": 96.61, "elapsed_time": "0:46:46", "remaining_time": "0:01:38", "throughput": 2392.28, "total_tokens": 6714224}
|
| 2188 |
+
{"current_steps": 10845, "total_steps": 11220, "loss": 0.0, "lr": 1.7085174864776287e-07, "epoch": 19.331550802139038, "percentage": 96.66, "elapsed_time": "0:46:47", "remaining_time": "0:01:37", "throughput": 2392.36, "total_tokens": 6717680}
|
| 2189 |
+
{"current_steps": 10850, "total_steps": 11220, "loss": 0.0, "lr": 1.6634304372695474e-07, "epoch": 19.340463458110516, "percentage": 96.7, "elapsed_time": "0:46:49", "remaining_time": "0:01:35", "throughput": 2392.36, "total_tokens": 6720304}
|
| 2190 |
+
{"current_steps": 10855, "total_steps": 11220, "loss": 0.0, "lr": 1.6189442983105817e-07, "epoch": 19.349376114081995, "percentage": 96.75, "elapsed_time": "0:46:50", "remaining_time": "0:01:34", "throughput": 2392.45, "total_tokens": 6723728}
|
| 2191 |
+
{"current_steps": 10860, "total_steps": 11220, "loss": 0.0, "lr": 1.5750591772456802e-07, "epoch": 19.358288770053477, "percentage": 96.79, "elapsed_time": "0:46:51", "remaining_time": "0:01:33", "throughput": 2392.44, "total_tokens": 6726320}
|
| 2192 |
+
{"current_steps": 10865, "total_steps": 11220, "loss": 0.0, "lr": 1.5317751802654823e-07, "epoch": 19.367201426024955, "percentage": 96.84, "elapsed_time": "0:46:52", "remaining_time": "0:01:31", "throughput": 2392.49, "total_tokens": 6729680}
|
| 2193 |
+
{"current_steps": 10870, "total_steps": 11220, "loss": 0.0, "lr": 1.489092412106069e-07, "epoch": 19.376114081996434, "percentage": 96.88, "elapsed_time": "0:46:53", "remaining_time": "0:01:30", "throughput": 2392.45, "total_tokens": 6732080}
|
| 2194 |
+
{"current_steps": 10875, "total_steps": 11220, "loss": 0.0, "lr": 1.447010976048685e-07, "epoch": 19.385026737967916, "percentage": 96.93, "elapsed_time": "0:46:55", "remaining_time": "0:01:29", "throughput": 2392.46, "total_tokens": 6734768}
|
| 2195 |
+
{"current_steps": 10880, "total_steps": 11220, "loss": 0.0, "lr": 1.4055309739195167e-07, "epoch": 19.393939393939394, "percentage": 96.97, "elapsed_time": "0:46:56", "remaining_time": "0:01:28", "throughput": 2392.43, "total_tokens": 6737168}
|
| 2196 |
+
{"current_steps": 10885, "total_steps": 11220, "loss": 0.0, "lr": 1.3646525060894422e-07, "epoch": 19.402852049910873, "percentage": 97.01, "elapsed_time": "0:46:57", "remaining_time": "0:01:26", "throughput": 2392.45, "total_tokens": 6739824}
|
| 2197 |
+
{"current_steps": 10890, "total_steps": 11220, "loss": 0.0, "lr": 1.324375671473782e-07, "epoch": 19.41176470588235, "percentage": 97.06, "elapsed_time": "0:46:58", "remaining_time": "0:01:25", "throughput": 2392.48, "total_tokens": 6742832}
|
| 2198 |
+
{"current_steps": 10895, "total_steps": 11220, "loss": 0.0, "lr": 1.2847005675320767e-07, "epoch": 19.420677361853834, "percentage": 97.1, "elapsed_time": "0:46:59", "remaining_time": "0:01:24", "throughput": 2392.59, "total_tokens": 6746192}
|
| 2199 |
+
{"current_steps": 10900, "total_steps": 11220, "loss": 0.0, "lr": 1.2456272902677534e-07, "epoch": 19.429590017825312, "percentage": 97.15, "elapsed_time": "0:47:01", "remaining_time": "0:01:22", "throughput": 2392.69, "total_tokens": 6750064}
|
| 2200 |
+
{"current_steps": 10905, "total_steps": 11220, "loss": 0.0, "lr": 1.207155934228099e-07, "epoch": 19.43850267379679, "percentage": 97.19, "elapsed_time": "0:47:02", "remaining_time": "0:01:21", "throughput": 2392.79, "total_tokens": 6753968}
|
| 2201 |
+
{"current_steps": 10910, "total_steps": 11220, "loss": 0.0, "lr": 1.16928659250376e-07, "epoch": 19.447415329768273, "percentage": 97.24, "elapsed_time": "0:47:03", "remaining_time": "0:01:20", "throughput": 2392.83, "total_tokens": 6757200}
|
| 2202 |
+
{"current_steps": 10915, "total_steps": 11220, "loss": 0.0, "lr": 1.1320193567288529e-07, "epoch": 19.45632798573975, "percentage": 97.28, "elapsed_time": "0:47:05", "remaining_time": "0:01:18", "throughput": 2392.94, "total_tokens": 6760784}
|
| 2203 |
+
{"current_steps": 10920, "total_steps": 11220, "loss": 0.0, "lr": 1.0953543170803826e-07, "epoch": 19.46524064171123, "percentage": 97.33, "elapsed_time": "0:47:06", "remaining_time": "0:01:17", "throughput": 2393.02, "total_tokens": 6763760}
|
| 2204 |
+
{"current_steps": 10925, "total_steps": 11220, "loss": 0.0, "lr": 1.0592915622782418e-07, "epoch": 19.474153297682708, "percentage": 97.37, "elapsed_time": "0:47:07", "remaining_time": "0:01:16", "throughput": 2393.0, "total_tokens": 6766768}
|
| 2205 |
+
{"current_steps": 10930, "total_steps": 11220, "loss": 0.0, "lr": 1.0238311795850163e-07, "epoch": 19.48306595365419, "percentage": 97.42, "elapsed_time": "0:47:08", "remaining_time": "0:01:15", "throughput": 2393.0, "total_tokens": 6769616}
|
| 2206 |
+
{"current_steps": 10935, "total_steps": 11220, "loss": 0.0, "lr": 9.889732548056252e-08, "epoch": 19.49197860962567, "percentage": 97.46, "elapsed_time": "0:47:10", "remaining_time": "0:01:13", "throughput": 2393.17, "total_tokens": 6773968}
|
| 2207 |
+
{"current_steps": 10940, "total_steps": 11220, "loss": 0.0, "lr": 9.547178722872364e-08, "epoch": 19.500891265597147, "percentage": 97.5, "elapsed_time": "0:47:11", "remaining_time": "0:01:12", "throughput": 2393.19, "total_tokens": 6776752}
|
| 2208 |
+
{"current_steps": 10945, "total_steps": 11220, "loss": 0.0, "lr": 9.210651149190175e-08, "epoch": 19.509803921568626, "percentage": 97.55, "elapsed_time": "0:47:12", "remaining_time": "0:01:11", "throughput": 2393.23, "total_tokens": 6779632}
|
| 2209 |
+
{"current_steps": 10950, "total_steps": 11220, "loss": 0.0, "lr": 8.880150641319418e-08, "epoch": 19.518716577540108, "percentage": 97.59, "elapsed_time": "0:47:14", "remaining_time": "0:01:09", "throughput": 2393.29, "total_tokens": 6782800}
|
| 2210 |
+
{"current_steps": 10955, "total_steps": 11220, "loss": 0.0, "lr": 8.555677998985657e-08, "epoch": 19.527629233511586, "percentage": 97.64, "elapsed_time": "0:47:15", "remaining_time": "0:01:08", "throughput": 2393.29, "total_tokens": 6785648}
|
| 2211 |
+
{"current_steps": 10960, "total_steps": 11220, "loss": 0.0, "lr": 8.23723400732862e-08, "epoch": 19.536541889483065, "percentage": 97.68, "elapsed_time": "0:47:16", "remaining_time": "0:01:07", "throughput": 2393.33, "total_tokens": 6788944}
|
| 2212 |
+
{"current_steps": 10965, "total_steps": 11220, "loss": 0.0, "lr": 7.924819436900821e-08, "epoch": 19.545454545454547, "percentage": 97.73, "elapsed_time": "0:47:17", "remaining_time": "0:01:05", "throughput": 2393.41, "total_tokens": 6792048}
|
| 2213 |
+
{"current_steps": 10970, "total_steps": 11220, "loss": 0.0, "lr": 7.618435043664218e-08, "epoch": 19.554367201426025, "percentage": 97.77, "elapsed_time": "0:47:18", "remaining_time": "0:01:04", "throughput": 2393.45, "total_tokens": 6794896}
|
| 2214 |
+
{"current_steps": 10975, "total_steps": 11220, "loss": 0.0, "lr": 7.318081568990221e-08, "epoch": 19.563279857397504, "percentage": 97.82, "elapsed_time": "0:47:20", "remaining_time": "0:01:03", "throughput": 2393.53, "total_tokens": 6797936}
|
| 2215 |
+
{"current_steps": 10980, "total_steps": 11220, "loss": 0.0, "lr": 7.023759739656078e-08, "epoch": 19.572192513368982, "percentage": 97.86, "elapsed_time": "0:47:21", "remaining_time": "0:01:02", "throughput": 2393.61, "total_tokens": 6801328}
|
| 2216 |
+
{"current_steps": 10985, "total_steps": 11220, "loss": 0.0, "lr": 6.735470267844879e-08, "epoch": 19.581105169340464, "percentage": 97.91, "elapsed_time": "0:47:22", "remaining_time": "0:01:00", "throughput": 2393.74, "total_tokens": 6805328}
|
| 2217 |
+
{"current_steps": 10990, "total_steps": 11220, "loss": 0.0, "lr": 6.453213851142226e-08, "epoch": 19.590017825311943, "percentage": 97.95, "elapsed_time": "0:47:24", "remaining_time": "0:00:59", "throughput": 2393.77, "total_tokens": 6808176}
|
| 2218 |
+
{"current_steps": 10995, "total_steps": 11220, "loss": 0.0, "lr": 6.176991172535673e-08, "epoch": 19.59893048128342, "percentage": 97.99, "elapsed_time": "0:47:25", "remaining_time": "0:00:58", "throughput": 2393.85, "total_tokens": 6811632}
|
| 2219 |
+
{"current_steps": 11000, "total_steps": 11220, "loss": 0.0, "lr": 5.906802900412789e-08, "epoch": 19.607843137254903, "percentage": 98.04, "elapsed_time": "0:47:26", "remaining_time": "0:00:56", "throughput": 2393.94, "total_tokens": 6814864}
|
| 2220 |
+
{"current_steps": 11005, "total_steps": 11220, "loss": 0.0, "lr": 5.642649688559487e-08, "epoch": 19.616755793226382, "percentage": 98.08, "elapsed_time": "0:47:27", "remaining_time": "0:00:55", "throughput": 2394.01, "total_tokens": 6817936}
|
| 2221 |
+
{"current_steps": 11010, "total_steps": 11220, "loss": 0.0, "lr": 5.384532176157808e-08, "epoch": 19.62566844919786, "percentage": 98.13, "elapsed_time": "0:47:29", "remaining_time": "0:00:54", "throughput": 2394.04, "total_tokens": 6820976}
|
| 2222 |
+
{"current_steps": 11015, "total_steps": 11220, "loss": 0.0, "lr": 5.132450987785364e-08, "epoch": 19.63458110516934, "percentage": 98.17, "elapsed_time": "0:47:30", "remaining_time": "0:00:53", "throughput": 2394.12, "total_tokens": 6824368}
|
| 2223 |
+
{"current_steps": 11020, "total_steps": 11220, "loss": 0.0, "lr": 4.8864067334136735e-08, "epoch": 19.64349376114082, "percentage": 98.22, "elapsed_time": "0:47:31", "remaining_time": "0:00:51", "throughput": 2394.24, "total_tokens": 6828240}
|
| 2224 |
+
{"current_steps": 11025, "total_steps": 11220, "loss": 0.0, "lr": 4.6464000084059376e-08, "epoch": 19.6524064171123, "percentage": 98.26, "elapsed_time": "0:47:33", "remaining_time": "0:00:50", "throughput": 2394.32, "total_tokens": 6831376}
|
| 2225 |
+
{"current_steps": 11030, "total_steps": 11220, "loss": 0.0, "lr": 4.412431393516492e-08, "epoch": 19.661319073083778, "percentage": 98.31, "elapsed_time": "0:47:34", "remaining_time": "0:00:49", "throughput": 2394.33, "total_tokens": 6834064}
|
| 2226 |
+
{"current_steps": 11035, "total_steps": 11220, "loss": 0.0, "lr": 4.184501454888856e-08, "epoch": 19.67023172905526, "percentage": 98.35, "elapsed_time": "0:47:35", "remaining_time": "0:00:47", "throughput": 2394.36, "total_tokens": 6837136}
|
| 2227 |
+
{"current_steps": 11040, "total_steps": 11220, "loss": 0.0, "lr": 3.9626107440543515e-08, "epoch": 19.67914438502674, "percentage": 98.4, "elapsed_time": "0:47:36", "remaining_time": "0:00:46", "throughput": 2394.37, "total_tokens": 6840048}
|
| 2228 |
+
{"current_steps": 11045, "total_steps": 11220, "loss": 0.0, "lr": 3.746759797931265e-08, "epoch": 19.688057040998217, "percentage": 98.44, "elapsed_time": "0:47:38", "remaining_time": "0:00:45", "throughput": 2394.45, "total_tokens": 6843568}
|
| 2229 |
+
{"current_steps": 11050, "total_steps": 11220, "loss": 0.0, "lr": 3.536949138822909e-08, "epoch": 19.696969696969695, "percentage": 98.48, "elapsed_time": "0:47:39", "remaining_time": "0:00:43", "throughput": 2394.6, "total_tokens": 6847312}
|
| 2230 |
+
{"current_steps": 11055, "total_steps": 11220, "loss": 0.0, "lr": 3.333179274417064e-08, "epoch": 19.705882352941178, "percentage": 98.53, "elapsed_time": "0:47:40", "remaining_time": "0:00:42", "throughput": 2394.59, "total_tokens": 6849904}
|
| 2231 |
+
{"current_steps": 11060, "total_steps": 11220, "loss": 0.0, "lr": 3.135450697783482e-08, "epoch": 19.714795008912656, "percentage": 98.57, "elapsed_time": "0:47:41", "remaining_time": "0:00:41", "throughput": 2394.65, "total_tokens": 6853104}
|
| 2232 |
+
{"current_steps": 11065, "total_steps": 11220, "loss": 0.0, "lr": 2.943763887374995e-08, "epoch": 19.723707664884135, "percentage": 98.62, "elapsed_time": "0:47:43", "remaining_time": "0:00:40", "throughput": 2394.67, "total_tokens": 6856208}
|
| 2233 |
+
{"current_steps": 11070, "total_steps": 11220, "loss": 0.0, "lr": 2.7581193070233546e-08, "epoch": 19.732620320855617, "percentage": 98.66, "elapsed_time": "0:47:44", "remaining_time": "0:00:38", "throughput": 2394.74, "total_tokens": 6859440}
|
| 2234 |
+
{"current_steps": 11075, "total_steps": 11220, "loss": 0.0, "lr": 2.5785174059408947e-08, "epoch": 19.741532976827095, "percentage": 98.71, "elapsed_time": "0:47:45", "remaining_time": "0:00:37", "throughput": 2394.74, "total_tokens": 6862096}
|
| 2235 |
+
{"current_steps": 11080, "total_steps": 11220, "loss": 0.0, "lr": 2.4049586187174787e-08, "epoch": 19.750445632798574, "percentage": 98.75, "elapsed_time": "0:47:46", "remaining_time": "0:00:36", "throughput": 2394.75, "total_tokens": 6864848}
|
| 2236 |
+
{"current_steps": 11085, "total_steps": 11220, "loss": 0.0, "lr": 2.237443365320502e-08, "epoch": 19.759358288770052, "percentage": 98.8, "elapsed_time": "0:47:47", "remaining_time": "0:00:34", "throughput": 2394.83, "total_tokens": 6868336}
|
| 2237 |
+
{"current_steps": 11090, "total_steps": 11220, "loss": 0.0, "lr": 2.0759720510937773e-08, "epoch": 19.768270944741534, "percentage": 98.84, "elapsed_time": "0:47:49", "remaining_time": "0:00:33", "throughput": 2394.81, "total_tokens": 6870896}
|
| 2238 |
+
{"current_steps": 11095, "total_steps": 11220, "loss": 0.0, "lr": 1.9205450667558743e-08, "epoch": 19.777183600713013, "percentage": 98.89, "elapsed_time": "0:47:50", "remaining_time": "0:00:32", "throughput": 2394.86, "total_tokens": 6873936}
|
| 2239 |
+
{"current_steps": 11100, "total_steps": 11220, "loss": 0.0, "lr": 1.7711627883998382e-08, "epoch": 19.78609625668449, "percentage": 98.93, "elapsed_time": "0:47:51", "remaining_time": "0:00:31", "throughput": 2394.87, "total_tokens": 6876784}
|
| 2240 |
+
{"current_steps": 11105, "total_steps": 11220, "loss": 0.0, "lr": 1.627825577492359e-08, "epoch": 19.795008912655973, "percentage": 98.98, "elapsed_time": "0:47:52", "remaining_time": "0:00:29", "throughput": 2394.9, "total_tokens": 6879568}
|
| 2241 |
+
{"current_steps": 11110, "total_steps": 11220, "loss": 0.0, "lr": 1.4905337808721053e-08, "epoch": 19.80392156862745, "percentage": 99.02, "elapsed_time": "0:47:53", "remaining_time": "0:00:28", "throughput": 2394.92, "total_tokens": 6882256}
|
| 2242 |
+
{"current_steps": 11115, "total_steps": 11220, "loss": 0.0, "lr": 1.3592877307500029e-08, "epoch": 19.81283422459893, "percentage": 99.06, "elapsed_time": "0:47:54", "remaining_time": "0:00:27", "throughput": 2394.97, "total_tokens": 6885168}
|
| 2243 |
+
{"current_steps": 11120, "total_steps": 11220, "loss": 0.0, "lr": 1.2340877447072907e-08, "epoch": 19.82174688057041, "percentage": 99.11, "elapsed_time": "0:47:55", "remaining_time": "0:00:25", "throughput": 2394.97, "total_tokens": 6887856}
|
| 2244 |
+
{"current_steps": 11125, "total_steps": 11220, "loss": 0.0, "lr": 1.114934125695799e-08, "epoch": 19.83065953654189, "percentage": 99.15, "elapsed_time": "0:47:57", "remaining_time": "0:00:24", "throughput": 2395.03, "total_tokens": 6891024}
|
| 2245 |
+
{"current_steps": 11130, "total_steps": 11220, "loss": 0.0, "lr": 1.001827162036284e-08, "epoch": 19.83957219251337, "percentage": 99.2, "elapsed_time": "0:47:58", "remaining_time": "0:00:23", "throughput": 2395.01, "total_tokens": 6893904}
|
| 2246 |
+
{"current_steps": 11135, "total_steps": 11220, "loss": 0.0, "lr": 8.947671274184277e-09, "epoch": 19.848484848484848, "percentage": 99.24, "elapsed_time": "0:47:59", "remaining_time": "0:00:21", "throughput": 2394.94, "total_tokens": 6896336}
|
| 2247 |
+
{"current_steps": 11140, "total_steps": 11220, "loss": 0.0, "lr": 7.937542808997278e-09, "epoch": 19.85739750445633, "percentage": 99.29, "elapsed_time": "0:48:00", "remaining_time": "0:00:20", "throughput": 2394.98, "total_tokens": 6899120}
|
| 2248 |
+
{"current_steps": 11145, "total_steps": 11220, "loss": 0.0, "lr": 6.987888669052201e-09, "epoch": 19.86631016042781, "percentage": 99.33, "elapsed_time": "0:48:01", "remaining_time": "0:00:19", "throughput": 2394.97, "total_tokens": 6901808}
|
| 2249 |
+
{"current_steps": 11150, "total_steps": 11220, "loss": 0.0, "lr": 6.098711152266456e-09, "epoch": 19.875222816399287, "percentage": 99.38, "elapsed_time": "0:48:03", "remaining_time": "0:00:18", "throughput": 2395.05, "total_tokens": 6905200}
|
| 2250 |
+
{"current_steps": 11155, "total_steps": 11220, "loss": 0.0, "lr": 5.270012410216185e-09, "epoch": 19.884135472370765, "percentage": 99.42, "elapsed_time": "0:48:04", "remaining_time": "0:00:16", "throughput": 2395.08, "total_tokens": 6908272}
|
| 2251 |
+
{"current_steps": 11160, "total_steps": 11220, "loss": 0.0, "lr": 4.50179444814458e-09, "epoch": 19.893048128342247, "percentage": 99.47, "elapsed_time": "0:48:05", "remaining_time": "0:00:15", "throughput": 2395.17, "total_tokens": 6911760}
|
| 2252 |
+
{"current_steps": 11165, "total_steps": 11220, "loss": 0.0, "lr": 3.794059124934135e-09, "epoch": 19.901960784313726, "percentage": 99.51, "elapsed_time": "0:48:07", "remaining_time": "0:00:14", "throughput": 2395.29, "total_tokens": 6915568}
|
| 2253 |
+
{"current_steps": 11170, "total_steps": 11220, "loss": 0.0, "lr": 3.146808153123293e-09, "epoch": 19.910873440285204, "percentage": 99.55, "elapsed_time": "0:48:08", "remaining_time": "0:00:12", "throughput": 2395.35, "total_tokens": 6918832}
|
| 2254 |
+
{"current_steps": 11175, "total_steps": 11220, "loss": 0.0, "lr": 2.560043098895348e-09, "epoch": 19.919786096256683, "percentage": 99.6, "elapsed_time": "0:48:09", "remaining_time": "0:00:11", "throughput": 2395.41, "total_tokens": 6922288}
|
| 2255 |
+
{"current_steps": 11180, "total_steps": 11220, "loss": 0.0, "lr": 2.0337653820645673e-09, "epoch": 19.928698752228165, "percentage": 99.64, "elapsed_time": "0:48:10", "remaining_time": "0:00:10", "throughput": 2395.47, "total_tokens": 6925232}
|
| 2256 |
+
{"current_steps": 11185, "total_steps": 11220, "loss": 0.0, "lr": 1.5679762760900663e-09, "epoch": 19.937611408199643, "percentage": 99.69, "elapsed_time": "0:48:12", "remaining_time": "0:00:09", "throughput": 2395.42, "total_tokens": 6927792}
|
| 2257 |
+
{"current_steps": 11190, "total_steps": 11220, "loss": 0.0, "lr": 1.162676908059157e-09, "epoch": 19.946524064171122, "percentage": 99.73, "elapsed_time": "0:48:13", "remaining_time": "0:00:07", "throughput": 2395.46, "total_tokens": 6931024}
|
| 2258 |
+
{"current_steps": 11195, "total_steps": 11220, "loss": 0.0, "lr": 8.178682586928998e-10, "epoch": 19.955436720142604, "percentage": 99.78, "elapsed_time": "0:48:14", "remaining_time": "0:00:06", "throughput": 2395.46, "total_tokens": 6933616}
|
| 2259 |
+
{"current_steps": 11200, "total_steps": 11220, "loss": 0.0, "lr": 5.335511623377753e-10, "epoch": 19.964349376114082, "percentage": 99.82, "elapsed_time": "0:48:15", "remaining_time": "0:00:05", "throughput": 2395.47, "total_tokens": 6936592}
|
| 2260 |
+
{"current_steps": 11205, "total_steps": 11220, "loss": 0.0, "lr": 3.0972630696846084e-10, "epoch": 19.97326203208556, "percentage": 99.87, "elapsed_time": "0:48:16", "remaining_time": "0:00:03", "throughput": 2395.43, "total_tokens": 6939024}
|
| 2261 |
+
{"current_steps": 11210, "total_steps": 11220, "loss": 0.0, "lr": 1.463942341850544e-10, "epoch": 19.98217468805704, "percentage": 99.91, "elapsed_time": "0:48:17", "remaining_time": "0:00:02", "throughput": 2395.46, "total_tokens": 6942032}
|
| 2262 |
+
{"current_steps": 11215, "total_steps": 11220, "loss": 0.0, "lr": 4.35553392047483e-11, "epoch": 19.99108734402852, "percentage": 99.96, "elapsed_time": "0:48:19", "remaining_time": "0:00:01", "throughput": 2395.45, "total_tokens": 6944464}
|
| 2263 |
+
{"current_steps": 11220, "total_steps": 11220, "loss": 0.0, "lr": 1.2098708757068978e-12, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:48:20", "remaining_time": "0:00:00", "throughput": 2395.37, "total_tokens": 6947288}
|
| 2264 |
+
{"current_steps": 11220, "total_steps": 11220, "eval_loss": 0.2706851363182068, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:48:24", "remaining_time": "0:00:00", "throughput": 2391.58, "total_tokens": 6947288}
|
| 2265 |
+
{"current_steps": 11220, "total_steps": 11220, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:48:26", "remaining_time": "0:00:00", "throughput": 2390.58, "total_tokens": 6947288}
|