Training in progress, step 10720
Browse files- adapter_model.safetensors +1 -1
- trainer_log.jsonl +106 -0
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1638528
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:972e1511b2c5348ecd5740b0ebd4420be7a60a408a73255acf8c93100dbfcd60
|
| 3 |
size 1638528
|
trainer_log.jsonl
CHANGED
|
@@ -2057,3 +2057,109 @@
|
|
| 2057 |
{"current_steps": 10190, "total_steps": 10720, "loss": 0.1653, "lr": 7.455417381375451e-06, "epoch": 19.011194029850746, "percentage": 95.06, "elapsed_time": "0:27:34", "remaining_time": "0:01:26", "throughput": 1748.63, "total_tokens": 2893008}
|
| 2058 |
{"current_steps": 10195, "total_steps": 10720, "loss": 0.2549, "lr": 7.316016870279441e-06, "epoch": 19.020522388059703, "percentage": 95.1, "elapsed_time": "0:27:35", "remaining_time": "0:01:25", "throughput": 1748.66, "total_tokens": 2894416}
|
| 2059 |
{"current_steps": 10200, "total_steps": 10720, "loss": 0.1339, "lr": 7.177922330120712e-06, "epoch": 19.029850746268657, "percentage": 95.15, "elapsed_time": "0:27:35", "remaining_time": "0:01:24", "throughput": 1748.73, "total_tokens": 2895856}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2057 |
{"current_steps": 10190, "total_steps": 10720, "loss": 0.1653, "lr": 7.455417381375451e-06, "epoch": 19.011194029850746, "percentage": 95.06, "elapsed_time": "0:27:34", "remaining_time": "0:01:26", "throughput": 1748.63, "total_tokens": 2893008}
|
| 2058 |
{"current_steps": 10195, "total_steps": 10720, "loss": 0.2549, "lr": 7.316016870279441e-06, "epoch": 19.020522388059703, "percentage": 95.1, "elapsed_time": "0:27:35", "remaining_time": "0:01:25", "throughput": 1748.66, "total_tokens": 2894416}
|
| 2059 |
{"current_steps": 10200, "total_steps": 10720, "loss": 0.1339, "lr": 7.177922330120712e-06, "epoch": 19.029850746268657, "percentage": 95.15, "elapsed_time": "0:27:35", "remaining_time": "0:01:24", "throughput": 1748.73, "total_tokens": 2895856}
|
| 2060 |
+
{"current_steps": 10205, "total_steps": 10720, "loss": 0.2286, "lr": 7.041134126950233e-06, "epoch": 19.03917910447761, "percentage": 95.2, "elapsed_time": "0:27:36", "remaining_time": "0:01:23", "throughput": 1748.84, "total_tokens": 2897392}
|
| 2061 |
+
{"current_steps": 10210, "total_steps": 10720, "loss": 0.299, "lr": 6.9056526233562955e-06, "epoch": 19.04850746268657, "percentage": 95.24, "elapsed_time": "0:27:37", "remaining_time": "0:01:22", "throughput": 1748.88, "total_tokens": 2898768}
|
| 2062 |
+
{"current_steps": 10215, "total_steps": 10720, "loss": 0.2895, "lr": 6.771478178463353e-06, "epoch": 19.057835820895523, "percentage": 95.29, "elapsed_time": "0:27:38", "remaining_time": "0:01:21", "throughput": 1748.95, "total_tokens": 2900240}
|
| 2063 |
+
{"current_steps": 10220, "total_steps": 10720, "loss": 0.2119, "lr": 6.638611147931406e-06, "epoch": 19.067164179104477, "percentage": 95.34, "elapsed_time": "0:27:39", "remaining_time": "0:01:21", "throughput": 1749.01, "total_tokens": 2901680}
|
| 2064 |
+
{"current_steps": 10225, "total_steps": 10720, "loss": 0.3805, "lr": 6.507051883954618e-06, "epoch": 19.07649253731343, "percentage": 95.38, "elapsed_time": "0:27:39", "remaining_time": "0:01:20", "throughput": 1749.07, "total_tokens": 2903120}
|
| 2065 |
+
{"current_steps": 10230, "total_steps": 10720, "loss": 0.2819, "lr": 6.376800735260757e-06, "epoch": 19.08582089552239, "percentage": 95.43, "elapsed_time": "0:27:40", "remaining_time": "0:01:19", "throughput": 1749.12, "total_tokens": 2904528}
|
| 2066 |
+
{"current_steps": 10235, "total_steps": 10720, "loss": 0.1549, "lr": 6.247858047110145e-06, "epoch": 19.095149253731343, "percentage": 95.48, "elapsed_time": "0:27:41", "remaining_time": "0:01:18", "throughput": 1749.18, "total_tokens": 2905936}
|
| 2067 |
+
{"current_steps": 10240, "total_steps": 10720, "loss": 0.4115, "lr": 6.1202241612947075e-06, "epoch": 19.104477611940297, "percentage": 95.52, "elapsed_time": "0:27:42", "remaining_time": "0:01:17", "throughput": 1749.25, "total_tokens": 2907376}
|
| 2068 |
+
{"current_steps": 10245, "total_steps": 10720, "loss": 0.2842, "lr": 5.993899416137039e-06, "epoch": 19.113805970149254, "percentage": 95.57, "elapsed_time": "0:27:42", "remaining_time": "0:01:17", "throughput": 1749.29, "total_tokens": 2908784}
|
| 2069 |
+
{"current_steps": 10250, "total_steps": 10720, "loss": 0.2848, "lr": 5.868884146489617e-06, "epoch": 19.12313432835821, "percentage": 95.62, "elapsed_time": "0:27:43", "remaining_time": "0:01:16", "throughput": 1749.38, "total_tokens": 2910352}
|
| 2070 |
+
{"current_steps": 10255, "total_steps": 10720, "loss": 0.1694, "lr": 5.7451786837339205e-06, "epoch": 19.132462686567163, "percentage": 95.66, "elapsed_time": "0:27:44", "remaining_time": "0:01:15", "throughput": 1749.49, "total_tokens": 2911888}
|
| 2071 |
+
{"current_steps": 10260, "total_steps": 10720, "loss": 0.1688, "lr": 5.622783355779315e-06, "epoch": 19.14179104477612, "percentage": 95.71, "elapsed_time": "0:27:45", "remaining_time": "0:01:14", "throughput": 1749.51, "total_tokens": 2913264}
|
| 2072 |
+
{"current_steps": 10265, "total_steps": 10720, "loss": 0.2645, "lr": 5.501698487062445e-06, "epoch": 19.151119402985074, "percentage": 95.76, "elapsed_time": "0:27:45", "remaining_time": "0:01:13", "throughput": 1749.58, "total_tokens": 2914768}
|
| 2073 |
+
{"current_steps": 10270, "total_steps": 10720, "loss": 0.2812, "lr": 5.3819243985463454e-06, "epoch": 19.16044776119403, "percentage": 95.8, "elapsed_time": "0:27:46", "remaining_time": "0:01:13", "throughput": 1749.62, "total_tokens": 2916208}
|
| 2074 |
+
{"current_steps": 10275, "total_steps": 10720, "loss": 0.3057, "lr": 5.263461407719438e-06, "epoch": 19.169776119402986, "percentage": 95.85, "elapsed_time": "0:27:47", "remaining_time": "0:01:12", "throughput": 1749.66, "total_tokens": 2917616}
|
| 2075 |
+
{"current_steps": 10280, "total_steps": 10720, "loss": 0.3074, "lr": 5.146309828594875e-06, "epoch": 19.17910447761194, "percentage": 95.9, "elapsed_time": "0:27:48", "remaining_time": "0:01:11", "throughput": 1749.66, "total_tokens": 2918928}
|
| 2076 |
+
{"current_steps": 10285, "total_steps": 10720, "loss": 0.2349, "lr": 5.030469971709472e-06, "epoch": 19.188432835820894, "percentage": 95.94, "elapsed_time": "0:27:49", "remaining_time": "0:01:10", "throughput": 1749.73, "total_tokens": 2920368}
|
| 2077 |
+
{"current_steps": 10290, "total_steps": 10720, "loss": 0.1973, "lr": 4.91594214412322e-06, "epoch": 19.19776119402985, "percentage": 95.99, "elapsed_time": "0:27:49", "remaining_time": "0:01:09", "throughput": 1749.82, "total_tokens": 2921872}
|
| 2078 |
+
{"current_steps": 10295, "total_steps": 10720, "loss": 0.3659, "lr": 4.80272664941811e-06, "epoch": 19.207089552238806, "percentage": 96.04, "elapsed_time": "0:27:50", "remaining_time": "0:01:08", "throughput": 1749.93, "total_tokens": 2923472}
|
| 2079 |
+
{"current_steps": 10300, "total_steps": 10720, "loss": 0.4049, "lr": 4.690823787697473e-06, "epoch": 19.21641791044776, "percentage": 96.08, "elapsed_time": "0:27:51", "remaining_time": "0:01:08", "throughput": 1749.98, "total_tokens": 2924848}
|
| 2080 |
+
{"current_steps": 10305, "total_steps": 10720, "loss": 0.1685, "lr": 4.5802338555854254e-06, "epoch": 19.225746268656717, "percentage": 96.13, "elapsed_time": "0:27:52", "remaining_time": "0:01:07", "throughput": 1750.03, "total_tokens": 2926288}
|
| 2081 |
+
{"current_steps": 10310, "total_steps": 10720, "loss": 0.1647, "lr": 4.4709571462256956e-06, "epoch": 19.23507462686567, "percentage": 96.18, "elapsed_time": "0:27:52", "remaining_time": "0:01:06", "throughput": 1750.09, "total_tokens": 2927696}
|
| 2082 |
+
{"current_steps": 10315, "total_steps": 10720, "loss": 0.1864, "lr": 4.36299394928108e-06, "epoch": 19.244402985074625, "percentage": 96.22, "elapsed_time": "0:27:53", "remaining_time": "0:01:05", "throughput": 1750.16, "total_tokens": 2929136}
|
| 2083 |
+
{"current_steps": 10320, "total_steps": 10720, "loss": 0.1702, "lr": 4.256344550932434e-06, "epoch": 19.253731343283583, "percentage": 96.27, "elapsed_time": "0:27:54", "remaining_time": "0:01:04", "throughput": 1750.19, "total_tokens": 2930512}
|
| 2084 |
+
{"current_steps": 10325, "total_steps": 10720, "loss": 0.2484, "lr": 4.1510092338784e-06, "epoch": 19.263059701492537, "percentage": 96.32, "elapsed_time": "0:27:55", "remaining_time": "0:01:04", "throughput": 1750.23, "total_tokens": 2931888}
|
| 2085 |
+
{"current_steps": 10330, "total_steps": 10720, "loss": 0.2314, "lr": 4.046988277334185e-06, "epoch": 19.27238805970149, "percentage": 96.36, "elapsed_time": "0:27:55", "remaining_time": "0:01:03", "throughput": 1750.25, "total_tokens": 2933200}
|
| 2086 |
+
{"current_steps": 10335, "total_steps": 10720, "loss": 0.2918, "lr": 3.944281957030893e-06, "epoch": 19.28171641791045, "percentage": 96.41, "elapsed_time": "0:27:56", "remaining_time": "0:01:02", "throughput": 1750.34, "total_tokens": 2934704}
|
| 2087 |
+
{"current_steps": 10340, "total_steps": 10720, "loss": 0.2646, "lr": 3.842890545215028e-06, "epoch": 19.291044776119403, "percentage": 96.46, "elapsed_time": "0:27:57", "remaining_time": "0:01:01", "throughput": 1750.41, "total_tokens": 2936176}
|
| 2088 |
+
{"current_steps": 10345, "total_steps": 10720, "loss": 0.261, "lr": 3.742814310647602e-06, "epoch": 19.300373134328357, "percentage": 96.5, "elapsed_time": "0:27:58", "remaining_time": "0:01:00", "throughput": 1750.47, "total_tokens": 2937616}
|
| 2089 |
+
{"current_steps": 10350, "total_steps": 10720, "loss": 0.15, "lr": 3.6440535186034184e-06, "epoch": 19.309701492537314, "percentage": 96.55, "elapsed_time": "0:27:58", "remaining_time": "0:01:00", "throughput": 1750.5, "total_tokens": 2938992}
|
| 2090 |
+
{"current_steps": 10355, "total_steps": 10720, "loss": 0.2183, "lr": 3.5466084308704017e-06, "epoch": 19.31902985074627, "percentage": 96.6, "elapsed_time": "0:27:59", "remaining_time": "0:00:59", "throughput": 1750.49, "total_tokens": 2940240}
|
| 2091 |
+
{"current_steps": 10360, "total_steps": 10720, "loss": 0.1213, "lr": 3.4504793057489326e-06, "epoch": 19.328358208955223, "percentage": 96.64, "elapsed_time": "0:28:00", "remaining_time": "0:00:58", "throughput": 1750.56, "total_tokens": 2941776}
|
| 2092 |
+
{"current_steps": 10365, "total_steps": 10720, "loss": 0.2515, "lr": 3.3556663980511826e-06, "epoch": 19.33768656716418, "percentage": 96.69, "elapsed_time": "0:28:01", "remaining_time": "0:00:57", "throughput": 1750.62, "total_tokens": 2943184}
|
| 2093 |
+
{"current_steps": 10370, "total_steps": 10720, "loss": 0.2432, "lr": 3.2621699591001695e-06, "epoch": 19.347014925373134, "percentage": 96.74, "elapsed_time": "0:28:01", "remaining_time": "0:00:56", "throughput": 1750.66, "total_tokens": 2944592}
|
| 2094 |
+
{"current_steps": 10375, "total_steps": 10720, "loss": 0.2009, "lr": 3.1699902367295917e-06, "epoch": 19.35634328358209, "percentage": 96.78, "elapsed_time": "0:28:02", "remaining_time": "0:00:55", "throughput": 1750.66, "total_tokens": 2945904}
|
| 2095 |
+
{"current_steps": 10380, "total_steps": 10720, "loss": 0.1403, "lr": 3.079127475282717e-06, "epoch": 19.365671641791046, "percentage": 96.83, "elapsed_time": "0:28:03", "remaining_time": "0:00:55", "throughput": 1750.69, "total_tokens": 2947312}
|
| 2096 |
+
{"current_steps": 10385, "total_steps": 10720, "loss": 0.155, "lr": 2.9895819156119943e-06, "epoch": 19.375, "percentage": 96.88, "elapsed_time": "0:28:04", "remaining_time": "0:00:54", "throughput": 1750.84, "total_tokens": 2948976}
|
| 2097 |
+
{"current_steps": 10390, "total_steps": 10720, "loss": 0.2388, "lr": 2.9013537950782765e-06, "epoch": 19.384328358208954, "percentage": 96.92, "elapsed_time": "0:28:05", "remaining_time": "0:00:53", "throughput": 1750.87, "total_tokens": 2950320}
|
| 2098 |
+
{"current_steps": 10395, "total_steps": 10720, "loss": 0.4195, "lr": 2.8144433475502105e-06, "epoch": 19.39365671641791, "percentage": 96.97, "elapsed_time": "0:28:05", "remaining_time": "0:00:52", "throughput": 1750.86, "total_tokens": 2951568}
|
| 2099 |
+
{"current_steps": 10400, "total_steps": 10720, "loss": 0.2557, "lr": 2.728850803403793e-06, "epoch": 19.402985074626866, "percentage": 97.01, "elapsed_time": "0:28:06", "remaining_time": "0:00:51", "throughput": 1750.84, "total_tokens": 2952752}
|
| 2100 |
+
{"current_steps": 10405, "total_steps": 10720, "loss": 0.1643, "lr": 2.644576389521425e-06, "epoch": 19.41231343283582, "percentage": 97.06, "elapsed_time": "0:28:07", "remaining_time": "0:00:51", "throughput": 1750.86, "total_tokens": 2954096}
|
| 2101 |
+
{"current_steps": 10410, "total_steps": 10720, "loss": 0.253, "lr": 2.5616203292916916e-06, "epoch": 19.421641791044777, "percentage": 97.11, "elapsed_time": "0:28:07", "remaining_time": "0:00:50", "throughput": 1750.87, "total_tokens": 2955440}
|
| 2102 |
+
{"current_steps": 10415, "total_steps": 10720, "loss": 0.2437, "lr": 2.479982842608475e-06, "epoch": 19.43097014925373, "percentage": 97.15, "elapsed_time": "0:28:08", "remaining_time": "0:00:49", "throughput": 1750.91, "total_tokens": 2956784}
|
| 2103 |
+
{"current_steps": 10420, "total_steps": 10720, "loss": 0.2752, "lr": 2.3996641458704504e-06, "epoch": 19.440298507462686, "percentage": 97.2, "elapsed_time": "0:28:09", "remaining_time": "0:00:48", "throughput": 1750.96, "total_tokens": 2958192}
|
| 2104 |
+
{"current_steps": 10425, "total_steps": 10720, "loss": 0.2423, "lr": 2.320664451980592e-06, "epoch": 19.449626865671643, "percentage": 97.25, "elapsed_time": "0:28:10", "remaining_time": "0:00:47", "throughput": 1751.0, "total_tokens": 2959600}
|
| 2105 |
+
{"current_steps": 10430, "total_steps": 10720, "loss": 0.1503, "lr": 2.2429839703456136e-06, "epoch": 19.458955223880597, "percentage": 97.29, "elapsed_time": "0:28:11", "remaining_time": "0:00:47", "throughput": 1751.1, "total_tokens": 2961136}
|
| 2106 |
+
{"current_steps": 10435, "total_steps": 10720, "loss": 0.2785, "lr": 2.1666229068753594e-06, "epoch": 19.46828358208955, "percentage": 97.34, "elapsed_time": "0:28:11", "remaining_time": "0:00:46", "throughput": 1751.13, "total_tokens": 2962512}
|
| 2107 |
+
{"current_steps": 10440, "total_steps": 10720, "loss": 0.2599, "lr": 2.091581463981973e-06, "epoch": 19.47761194029851, "percentage": 97.39, "elapsed_time": "0:28:12", "remaining_time": "0:00:45", "throughput": 1751.13, "total_tokens": 2963760}
|
| 2108 |
+
{"current_steps": 10445, "total_steps": 10720, "loss": 0.2214, "lr": 2.0178598405800606e-06, "epoch": 19.486940298507463, "percentage": 97.43, "elapsed_time": "0:28:13", "remaining_time": "0:00:44", "throughput": 1751.15, "total_tokens": 2965168}
|
| 2109 |
+
{"current_steps": 10450, "total_steps": 10720, "loss": 0.195, "lr": 1.945458232085473e-06, "epoch": 19.496268656716417, "percentage": 97.48, "elapsed_time": "0:28:14", "remaining_time": "0:00:43", "throughput": 1751.22, "total_tokens": 2966608}
|
| 2110 |
+
{"current_steps": 10455, "total_steps": 10720, "loss": 0.1943, "lr": 1.8743768304151366e-06, "epoch": 19.505597014925375, "percentage": 97.53, "elapsed_time": "0:28:14", "remaining_time": "0:00:42", "throughput": 1751.33, "total_tokens": 2968144}
|
| 2111 |
+
{"current_steps": 10460, "total_steps": 10720, "loss": 0.2104, "lr": 1.8046158239864996e-06, "epoch": 19.51492537313433, "percentage": 97.57, "elapsed_time": "0:28:15", "remaining_time": "0:00:42", "throughput": 1751.39, "total_tokens": 2969616}
|
| 2112 |
+
{"current_steps": 10465, "total_steps": 10720, "loss": 0.2493, "lr": 1.7361753977169215e-06, "epoch": 19.524253731343283, "percentage": 97.62, "elapsed_time": "0:28:16", "remaining_time": "0:00:41", "throughput": 1751.39, "total_tokens": 2970960}
|
| 2113 |
+
{"current_steps": 10470, "total_steps": 10720, "loss": 0.1702, "lr": 1.6690557330233947e-06, "epoch": 19.53358208955224, "percentage": 97.67, "elapsed_time": "0:28:17", "remaining_time": "0:00:40", "throughput": 1751.41, "total_tokens": 2972336}
|
| 2114 |
+
{"current_steps": 10475, "total_steps": 10720, "loss": 0.2257, "lr": 1.6032570078217678e-06, "epoch": 19.542910447761194, "percentage": 97.71, "elapsed_time": "0:28:17", "remaining_time": "0:00:39", "throughput": 1751.39, "total_tokens": 2973584}
|
| 2115 |
+
{"current_steps": 10480, "total_steps": 10720, "loss": 0.2677, "lr": 1.5387793965265794e-06, "epoch": 19.55223880597015, "percentage": 97.76, "elapsed_time": "0:28:18", "remaining_time": "0:00:38", "throughput": 1751.38, "total_tokens": 2974864}
|
| 2116 |
+
{"current_steps": 10485, "total_steps": 10720, "loss": 0.3634, "lr": 1.4756230700503914e-06, "epoch": 19.561567164179106, "percentage": 97.81, "elapsed_time": "0:28:19", "remaining_time": "0:00:38", "throughput": 1751.43, "total_tokens": 2976304}
|
| 2117 |
+
{"current_steps": 10490, "total_steps": 10720, "loss": 0.2683, "lr": 1.4137881958034006e-06, "epoch": 19.57089552238806, "percentage": 97.85, "elapsed_time": "0:28:20", "remaining_time": "0:00:37", "throughput": 1751.42, "total_tokens": 2977552}
|
| 2118 |
+
{"current_steps": 10495, "total_steps": 10720, "loss": 0.2025, "lr": 1.3532749376929944e-06, "epoch": 19.580223880597014, "percentage": 97.9, "elapsed_time": "0:28:20", "remaining_time": "0:00:36", "throughput": 1751.44, "total_tokens": 2978928}
|
| 2119 |
+
{"current_steps": 10500, "total_steps": 10720, "loss": 0.1937, "lr": 1.2940834561233627e-06, "epoch": 19.58955223880597, "percentage": 97.95, "elapsed_time": "0:28:21", "remaining_time": "0:00:35", "throughput": 1751.53, "total_tokens": 2980432}
|
| 2120 |
+
{"current_steps": 10505, "total_steps": 10720, "loss": 0.2243, "lr": 1.236213907994943e-06, "epoch": 19.598880597014926, "percentage": 97.99, "elapsed_time": "0:28:22", "remaining_time": "0:00:34", "throughput": 1751.58, "total_tokens": 2981872}
|
| 2121 |
+
{"current_steps": 10510, "total_steps": 10720, "loss": 0.2583, "lr": 1.1796664467041973e-06, "epoch": 19.60820895522388, "percentage": 98.04, "elapsed_time": "0:28:23", "remaining_time": "0:00:34", "throughput": 1751.58, "total_tokens": 2983152}
|
| 2122 |
+
{"current_steps": 10515, "total_steps": 10720, "loss": 0.3239, "lr": 1.1244412221429468e-06, "epoch": 19.617537313432837, "percentage": 98.09, "elapsed_time": "0:28:23", "remaining_time": "0:00:33", "throughput": 1751.55, "total_tokens": 2984368}
|
| 2123 |
+
{"current_steps": 10520, "total_steps": 10720, "loss": 0.2292, "lr": 1.0705383806982606e-06, "epoch": 19.62686567164179, "percentage": 98.13, "elapsed_time": "0:28:24", "remaining_time": "0:00:32", "throughput": 1751.55, "total_tokens": 2985680}
|
| 2124 |
+
{"current_steps": 10525, "total_steps": 10720, "loss": 0.1975, "lr": 1.017958065251845e-06, "epoch": 19.636194029850746, "percentage": 98.18, "elapsed_time": "0:28:25", "remaining_time": "0:00:31", "throughput": 1751.6, "total_tokens": 2987088}
|
| 2125 |
+
{"current_steps": 10530, "total_steps": 10720, "loss": 0.2282, "lr": 9.66700415179822e-07, "epoch": 19.645522388059703, "percentage": 98.23, "elapsed_time": "0:28:26", "remaining_time": "0:00:30", "throughput": 1751.72, "total_tokens": 2988656}
|
| 2126 |
+
{"current_steps": 10535, "total_steps": 10720, "loss": 0.2169, "lr": 9.16765566352229e-07, "epoch": 19.654850746268657, "percentage": 98.27, "elapsed_time": "0:28:26", "remaining_time": "0:00:29", "throughput": 1751.71, "total_tokens": 2989968}
|
| 2127 |
+
{"current_steps": 10540, "total_steps": 10720, "loss": 0.2998, "lr": 8.681536511327415e-07, "epoch": 19.66417910447761, "percentage": 98.32, "elapsed_time": "0:28:27", "remaining_time": "0:00:29", "throughput": 1751.75, "total_tokens": 2991344}
|
| 2128 |
+
{"current_steps": 10545, "total_steps": 10720, "loss": 0.2024, "lr": 8.208647983782846e-07, "epoch": 19.673507462686565, "percentage": 98.37, "elapsed_time": "0:28:28", "remaining_time": "0:00:28", "throughput": 1751.82, "total_tokens": 2992848}
|
| 2129 |
+
{"current_steps": 10550, "total_steps": 10720, "loss": 0.1701, "lr": 7.748991334387557e-07, "epoch": 19.682835820895523, "percentage": 98.41, "elapsed_time": "0:28:29", "remaining_time": "0:00:27", "throughput": 1751.85, "total_tokens": 2994224}
|
| 2130 |
+
{"current_steps": 10555, "total_steps": 10720, "loss": 0.2244, "lr": 7.302567781565794e-07, "epoch": 19.692164179104477, "percentage": 98.46, "elapsed_time": "0:28:29", "remaining_time": "0:00:26", "throughput": 1751.96, "total_tokens": 2995792}
|
| 2131 |
+
{"current_steps": 10560, "total_steps": 10720, "loss": 0.1793, "lr": 6.869378508664315e-07, "epoch": 19.701492537313435, "percentage": 98.51, "elapsed_time": "0:28:30", "remaining_time": "0:00:25", "throughput": 1752.01, "total_tokens": 2997232}
|
| 2132 |
+
{"current_steps": 10565, "total_steps": 10720, "loss": 0.1613, "lr": 6.449424663950155e-07, "epoch": 19.71082089552239, "percentage": 98.55, "elapsed_time": "0:28:31", "remaining_time": "0:00:25", "throughput": 1752.1, "total_tokens": 2998768}
|
| 2133 |
+
{"current_steps": 10570, "total_steps": 10720, "loss": 0.2498, "lr": 6.042707360606192e-07, "epoch": 19.720149253731343, "percentage": 98.6, "elapsed_time": "0:28:32", "remaining_time": "0:00:24", "throughput": 1752.16, "total_tokens": 3000304}
|
| 2134 |
+
{"current_steps": 10575, "total_steps": 10720, "loss": 0.2252, "lr": 5.64922767673004e-07, "epoch": 19.729477611940297, "percentage": 98.65, "elapsed_time": "0:28:33", "remaining_time": "0:00:23", "throughput": 1752.22, "total_tokens": 3001744}
|
| 2135 |
+
{"current_steps": 10580, "total_steps": 10720, "loss": 0.3587, "lr": 5.268986655327934e-07, "epoch": 19.738805970149254, "percentage": 98.69, "elapsed_time": "0:28:33", "remaining_time": "0:00:22", "throughput": 1752.31, "total_tokens": 3003312}
|
| 2136 |
+
{"current_steps": 10585, "total_steps": 10720, "loss": 0.315, "lr": 4.901985304315848e-07, "epoch": 19.74813432835821, "percentage": 98.74, "elapsed_time": "0:28:34", "remaining_time": "0:00:21", "throughput": 1752.37, "total_tokens": 3004752}
|
| 2137 |
+
{"current_steps": 10590, "total_steps": 10720, "loss": 0.1565, "lr": 4.548224596513939e-07, "epoch": 19.757462686567163, "percentage": 98.79, "elapsed_time": "0:28:35", "remaining_time": "0:00:21", "throughput": 1752.43, "total_tokens": 3006192}
|
| 2138 |
+
{"current_steps": 10595, "total_steps": 10720, "loss": 0.2047, "lr": 4.207705469645995e-07, "epoch": 19.76679104477612, "percentage": 98.83, "elapsed_time": "0:28:36", "remaining_time": "0:00:20", "throughput": 1752.45, "total_tokens": 3007536}
|
| 2139 |
+
{"current_steps": 10600, "total_steps": 10720, "loss": 0.2337, "lr": 3.8804288263349917e-07, "epoch": 19.776119402985074, "percentage": 98.88, "elapsed_time": "0:28:37", "remaining_time": "0:00:19", "throughput": 1752.61, "total_tokens": 3009264}
|
| 2140 |
+
{"current_steps": 10605, "total_steps": 10720, "loss": 0.2111, "lr": 3.56639553410143e-07, "epoch": 19.78544776119403, "percentage": 98.93, "elapsed_time": "0:28:37", "remaining_time": "0:00:18", "throughput": 1752.63, "total_tokens": 3010640}
|
| 2141 |
+
{"current_steps": 10610, "total_steps": 10720, "loss": 0.2297, "lr": 3.265606425363332e-07, "epoch": 19.794776119402986, "percentage": 98.97, "elapsed_time": "0:28:38", "remaining_time": "0:00:17", "throughput": 1752.69, "total_tokens": 3012144}
|
| 2142 |
+
{"current_steps": 10615, "total_steps": 10720, "loss": 0.2829, "lr": 2.97806229743014e-07, "epoch": 19.80410447761194, "percentage": 99.02, "elapsed_time": "0:28:39", "remaining_time": "0:00:17", "throughput": 1752.79, "total_tokens": 3013712}
|
| 2143 |
+
{"current_steps": 10620, "total_steps": 10720, "loss": 0.1461, "lr": 2.703763912502155e-07, "epoch": 19.813432835820894, "percentage": 99.07, "elapsed_time": "0:28:40", "remaining_time": "0:00:16", "throughput": 1752.78, "total_tokens": 3014992}
|
| 2144 |
+
{"current_steps": 10625, "total_steps": 10720, "loss": 0.1673, "lr": 2.4427119976705436e-07, "epoch": 19.82276119402985, "percentage": 99.11, "elapsed_time": "0:28:40", "remaining_time": "0:00:15", "throughput": 1752.89, "total_tokens": 3016528}
|
| 2145 |
+
{"current_steps": 10630, "total_steps": 10720, "loss": 0.3771, "lr": 2.1949072449123363e-07, "epoch": 19.832089552238806, "percentage": 99.16, "elapsed_time": "0:28:41", "remaining_time": "0:00:14", "throughput": 1752.87, "total_tokens": 3017744}
|
| 2146 |
+
{"current_steps": 10635, "total_steps": 10720, "loss": 0.171, "lr": 1.9603503110904308e-07, "epoch": 19.84141791044776, "percentage": 99.21, "elapsed_time": "0:28:42", "remaining_time": "0:00:13", "throughput": 1752.94, "total_tokens": 3019280}
|
| 2147 |
+
{"current_steps": 10640, "total_steps": 10720, "loss": 0.3043, "lr": 1.739041817951925e-07, "epoch": 19.850746268656717, "percentage": 99.25, "elapsed_time": "0:28:43", "remaining_time": "0:00:12", "throughput": 1753.01, "total_tokens": 3020720}
|
| 2148 |
+
{"current_steps": 10645, "total_steps": 10720, "loss": 0.3391, "lr": 1.5309823521242328e-07, "epoch": 19.86007462686567, "percentage": 99.3, "elapsed_time": "0:28:43", "remaining_time": "0:00:12", "throughput": 1753.01, "total_tokens": 3022032}
|
| 2149 |
+
{"current_steps": 10650, "total_steps": 10720, "loss": 0.1881, "lr": 1.3361724651167473e-07, "epoch": 19.869402985074625, "percentage": 99.35, "elapsed_time": "0:28:44", "remaining_time": "0:00:11", "throughput": 1753.03, "total_tokens": 3023408}
|
| 2150 |
+
{"current_steps": 10655, "total_steps": 10720, "loss": 0.1387, "lr": 1.1546126733180673e-07, "epoch": 19.878731343283583, "percentage": 99.39, "elapsed_time": "0:28:45", "remaining_time": "0:00:10", "throughput": 1753.13, "total_tokens": 3024944}
|
| 2151 |
+
{"current_steps": 10660, "total_steps": 10720, "loss": 0.2879, "lr": 9.863034579926655e-08, "epoch": 19.888059701492537, "percentage": 99.44, "elapsed_time": "0:28:46", "remaining_time": "0:00:09", "throughput": 1753.2, "total_tokens": 3026384}
|
| 2152 |
+
{"current_steps": 10665, "total_steps": 10720, "loss": 0.3061, "lr": 8.312452652831093e-08, "epoch": 19.89738805970149, "percentage": 99.49, "elapsed_time": "0:28:46", "remaining_time": "0:00:08", "throughput": 1753.27, "total_tokens": 3027824}
|
| 2153 |
+
{"current_steps": 10670, "total_steps": 10720, "loss": 0.335, "lr": 6.894385062056197e-08, "epoch": 19.90671641791045, "percentage": 99.53, "elapsed_time": "0:28:47", "remaining_time": "0:00:08", "throughput": 1753.32, "total_tokens": 3029264}
|
| 2154 |
+
{"current_steps": 10675, "total_steps": 10720, "loss": 0.2673, "lr": 5.6088355665229187e-08, "epoch": 19.916044776119403, "percentage": 99.58, "elapsed_time": "0:28:48", "remaining_time": "0:00:07", "throughput": 1753.39, "total_tokens": 3030768}
|
| 2155 |
+
{"current_steps": 10680, "total_steps": 10720, "loss": 0.3039, "lr": 4.4558075738609926e-08, "epoch": 19.925373134328357, "percentage": 99.63, "elapsed_time": "0:28:49", "remaining_time": "0:00:06", "throughput": 1753.48, "total_tokens": 3032272}
|
| 2156 |
+
{"current_steps": 10685, "total_steps": 10720, "loss": 0.2032, "lr": 3.4353041404477926e-08, "epoch": 19.934701492537314, "percentage": 99.67, "elapsed_time": "0:28:50", "remaining_time": "0:00:05", "throughput": 1753.6, "total_tokens": 3033904}
|
| 2157 |
+
{"current_steps": 10690, "total_steps": 10720, "loss": 0.3782, "lr": 2.5473279713472685e-08, "epoch": 19.94402985074627, "percentage": 99.72, "elapsed_time": "0:28:50", "remaining_time": "0:00:04", "throughput": 1753.63, "total_tokens": 3035344}
|
| 2158 |
+
{"current_steps": 10695, "total_steps": 10720, "loss": 0.1892, "lr": 1.7918814203432555e-08, "epoch": 19.953358208955223, "percentage": 99.77, "elapsed_time": "0:28:51", "remaining_time": "0:00:04", "throughput": 1753.59, "total_tokens": 3036528}
|
| 2159 |
+
{"current_steps": 10700, "total_steps": 10720, "loss": 0.2006, "lr": 1.1689664899283691e-08, "epoch": 19.96268656716418, "percentage": 99.81, "elapsed_time": "0:28:52", "remaining_time": "0:00:03", "throughput": 1753.69, "total_tokens": 3038096}
|
| 2160 |
+
{"current_steps": 10705, "total_steps": 10720, "loss": 0.2468, "lr": 6.78584831270701e-09, "epoch": 19.972014925373134, "percentage": 99.86, "elapsed_time": "0:28:53", "remaining_time": "0:00:02", "throughput": 1753.76, "total_tokens": 3039568}
|
| 2161 |
+
{"current_steps": 10710, "total_steps": 10720, "loss": 0.3524, "lr": 3.2073774424157263e-09, "epoch": 19.98134328358209, "percentage": 99.91, "elapsed_time": "0:28:53", "remaining_time": "0:00:01", "throughput": 1753.81, "total_tokens": 3040976}
|
| 2162 |
+
{"current_steps": 10715, "total_steps": 10720, "loss": 0.2741, "lr": 9.54261773933318e-10, "epoch": 19.990671641791046, "percentage": 99.95, "elapsed_time": "0:28:54", "remaining_time": "0:00:00", "throughput": 1753.79, "total_tokens": 3042224}
|
| 2163 |
+
{"current_steps": 10720, "total_steps": 10720, "loss": 0.1177, "lr": 2.650727970454625e-11, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:28:55", "remaining_time": "0:00:00", "throughput": 1753.82, "total_tokens": 3043720}
|
| 2164 |
+
{"current_steps": 10720, "total_steps": 10720, "eval_loss": 0.8808238506317139, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:28:59", "remaining_time": "0:00:00", "throughput": 1749.57, "total_tokens": 3043720}
|
| 2165 |
+
{"current_steps": 10720, "total_steps": 10720, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:29:00", "remaining_time": "0:00:00", "throughput": 1748.71, "total_tokens": 3043720}
|