rbelanec commited on
Commit
b7d15ae
·
verified ·
1 Parent(s): 8a9da6b

Training in progress, step 10720

Browse files
Files changed (2) hide show
  1. adapter_model.safetensors +1 -1
  2. trainer_log.jsonl +106 -0
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c1439551e243060a71d1923018e265632297cf28b5a9d541827cd964e50d7769
3
  size 1638528
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6683d7e47df73ef5cedf256996daea978837fe98f5eef463898806079f4ecf06
3
  size 1638528
trainer_log.jsonl CHANGED
@@ -2057,3 +2057,109 @@
2057
  {"current_steps": 10190, "total_steps": 10720, "loss": 0.1724, "lr": 7.455417381375451e-06, "epoch": 19.011194029850746, "percentage": 95.06, "elapsed_time": "0:28:27", "remaining_time": "0:01:28", "throughput": 1698.33, "total_tokens": 2899248}
2058
  {"current_steps": 10195, "total_steps": 10720, "loss": 0.1997, "lr": 7.316016870279441e-06, "epoch": 19.020522388059703, "percentage": 95.1, "elapsed_time": "0:28:27", "remaining_time": "0:01:27", "throughput": 1698.24, "total_tokens": 2900464}
2059
  {"current_steps": 10200, "total_steps": 10720, "loss": 0.1263, "lr": 7.177922330120712e-06, "epoch": 19.029850746268657, "percentage": 95.15, "elapsed_time": "0:28:28", "remaining_time": "0:01:27", "throughput": 1698.27, "total_tokens": 2901840}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2057
  {"current_steps": 10190, "total_steps": 10720, "loss": 0.1724, "lr": 7.455417381375451e-06, "epoch": 19.011194029850746, "percentage": 95.06, "elapsed_time": "0:28:27", "remaining_time": "0:01:28", "throughput": 1698.33, "total_tokens": 2899248}
2058
  {"current_steps": 10195, "total_steps": 10720, "loss": 0.1997, "lr": 7.316016870279441e-06, "epoch": 19.020522388059703, "percentage": 95.1, "elapsed_time": "0:28:27", "remaining_time": "0:01:27", "throughput": 1698.24, "total_tokens": 2900464}
2059
  {"current_steps": 10200, "total_steps": 10720, "loss": 0.1263, "lr": 7.177922330120712e-06, "epoch": 19.029850746268657, "percentage": 95.15, "elapsed_time": "0:28:28", "remaining_time": "0:01:27", "throughput": 1698.27, "total_tokens": 2901840}
2060
+ {"current_steps": 10205, "total_steps": 10720, "loss": 0.2547, "lr": 7.041134126950233e-06, "epoch": 19.03917910447761, "percentage": 95.2, "elapsed_time": "0:28:29", "remaining_time": "0:01:26", "throughput": 1698.32, "total_tokens": 2903248}
2061
+ {"current_steps": 10210, "total_steps": 10720, "loss": 0.3607, "lr": 6.9056526233562955e-06, "epoch": 19.04850746268657, "percentage": 95.24, "elapsed_time": "0:28:30", "remaining_time": "0:01:25", "throughput": 1698.47, "total_tokens": 2904944}
2062
+ {"current_steps": 10215, "total_steps": 10720, "loss": 0.2637, "lr": 6.771478178463353e-06, "epoch": 19.057835820895523, "percentage": 95.29, "elapsed_time": "0:28:31", "remaining_time": "0:01:24", "throughput": 1698.47, "total_tokens": 2906256}
2063
+ {"current_steps": 10220, "total_steps": 10720, "loss": 0.1724, "lr": 6.638611147931406e-06, "epoch": 19.067164179104477, "percentage": 95.34, "elapsed_time": "0:28:31", "remaining_time": "0:01:23", "throughput": 1698.55, "total_tokens": 2907760}
2064
+ {"current_steps": 10225, "total_steps": 10720, "loss": 0.2378, "lr": 6.507051883954618e-06, "epoch": 19.07649253731343, "percentage": 95.38, "elapsed_time": "0:28:32", "remaining_time": "0:01:22", "throughput": 1698.54, "total_tokens": 2909040}
2065
+ {"current_steps": 10230, "total_steps": 10720, "loss": 0.2593, "lr": 6.376800735260757e-06, "epoch": 19.08582089552239, "percentage": 95.43, "elapsed_time": "0:28:33", "remaining_time": "0:01:22", "throughput": 1698.6, "total_tokens": 2910448}
2066
+ {"current_steps": 10235, "total_steps": 10720, "loss": 0.1728, "lr": 6.247858047110145e-06, "epoch": 19.095149253731343, "percentage": 95.48, "elapsed_time": "0:28:34", "remaining_time": "0:01:21", "throughput": 1698.63, "total_tokens": 2911792}
2067
+ {"current_steps": 10240, "total_steps": 10720, "loss": 0.2249, "lr": 6.1202241612947075e-06, "epoch": 19.104477611940297, "percentage": 95.52, "elapsed_time": "0:28:34", "remaining_time": "0:01:20", "throughput": 1698.61, "total_tokens": 2913072}
2068
+ {"current_steps": 10245, "total_steps": 10720, "loss": 0.1709, "lr": 5.993899416137039e-06, "epoch": 19.113805970149254, "percentage": 95.57, "elapsed_time": "0:28:35", "remaining_time": "0:01:19", "throughput": 1698.64, "total_tokens": 2914480}
2069
+ {"current_steps": 10250, "total_steps": 10720, "loss": 0.5177, "lr": 5.868884146489617e-06, "epoch": 19.12313432835821, "percentage": 95.62, "elapsed_time": "0:28:36", "remaining_time": "0:01:18", "throughput": 1698.71, "total_tokens": 2915920}
2070
+ {"current_steps": 10255, "total_steps": 10720, "loss": 0.2387, "lr": 5.7451786837339205e-06, "epoch": 19.132462686567163, "percentage": 95.66, "elapsed_time": "0:28:37", "remaining_time": "0:01:17", "throughput": 1698.75, "total_tokens": 2917360}
2071
+ {"current_steps": 10260, "total_steps": 10720, "loss": 0.2359, "lr": 5.622783355779315e-06, "epoch": 19.14179104477612, "percentage": 95.71, "elapsed_time": "0:28:38", "remaining_time": "0:01:17", "throughput": 1698.85, "total_tokens": 2918896}
2072
+ {"current_steps": 10265, "total_steps": 10720, "loss": 0.1998, "lr": 5.501698487062445e-06, "epoch": 19.151119402985074, "percentage": 95.76, "elapsed_time": "0:28:38", "remaining_time": "0:01:16", "throughput": 1698.92, "total_tokens": 2920368}
2073
+ {"current_steps": 10270, "total_steps": 10720, "loss": 0.1661, "lr": 5.3819243985463454e-06, "epoch": 19.16044776119403, "percentage": 95.8, "elapsed_time": "0:28:39", "remaining_time": "0:01:15", "throughput": 1699.0, "total_tokens": 2921872}
2074
+ {"current_steps": 10275, "total_steps": 10720, "loss": 0.2888, "lr": 5.263461407719438e-06, "epoch": 19.169776119402986, "percentage": 95.85, "elapsed_time": "0:28:40", "remaining_time": "0:01:14", "throughput": 1699.06, "total_tokens": 2923312}
2075
+ {"current_steps": 10280, "total_steps": 10720, "loss": 0.3048, "lr": 5.146309828594875e-06, "epoch": 19.17910447761194, "percentage": 95.9, "elapsed_time": "0:28:41", "remaining_time": "0:01:13", "throughput": 1699.2, "total_tokens": 2924912}
2076
+ {"current_steps": 10285, "total_steps": 10720, "loss": 0.4523, "lr": 5.030469971709472e-06, "epoch": 19.188432835820894, "percentage": 95.94, "elapsed_time": "0:28:42", "remaining_time": "0:01:12", "throughput": 1699.28, "total_tokens": 2926416}
2077
+ {"current_steps": 10290, "total_steps": 10720, "loss": 0.1621, "lr": 4.91594214412322e-06, "epoch": 19.19776119402985, "percentage": 95.99, "elapsed_time": "0:28:42", "remaining_time": "0:01:11", "throughput": 1699.3, "total_tokens": 2927696}
2078
+ {"current_steps": 10295, "total_steps": 10720, "loss": 0.1128, "lr": 4.80272664941811e-06, "epoch": 19.207089552238806, "percentage": 96.04, "elapsed_time": "0:28:43", "remaining_time": "0:01:11", "throughput": 1699.39, "total_tokens": 2929232}
2079
+ {"current_steps": 10300, "total_steps": 10720, "loss": 0.2353, "lr": 4.690823787697473e-06, "epoch": 19.21641791044776, "percentage": 96.08, "elapsed_time": "0:28:44", "remaining_time": "0:01:10", "throughput": 1699.43, "total_tokens": 2930608}
2080
+ {"current_steps": 10305, "total_steps": 10720, "loss": 0.1663, "lr": 4.5802338555854254e-06, "epoch": 19.225746268656717, "percentage": 96.13, "elapsed_time": "0:28:45", "remaining_time": "0:01:09", "throughput": 1699.56, "total_tokens": 2932240}
2081
+ {"current_steps": 10310, "total_steps": 10720, "loss": 0.2029, "lr": 4.4709571462256956e-06, "epoch": 19.23507462686567, "percentage": 96.18, "elapsed_time": "0:28:46", "remaining_time": "0:01:08", "throughput": 1699.63, "total_tokens": 2933712}
2082
+ {"current_steps": 10315, "total_steps": 10720, "loss": 0.173, "lr": 4.36299394928108e-06, "epoch": 19.244402985074625, "percentage": 96.22, "elapsed_time": "0:28:46", "remaining_time": "0:01:07", "throughput": 1699.61, "total_tokens": 2934960}
2083
+ {"current_steps": 10320, "total_steps": 10720, "loss": 0.2052, "lr": 4.256344550932434e-06, "epoch": 19.253731343283583, "percentage": 96.27, "elapsed_time": "0:28:47", "remaining_time": "0:01:06", "throughput": 1699.65, "total_tokens": 2936368}
2084
+ {"current_steps": 10325, "total_steps": 10720, "loss": 0.1815, "lr": 4.1510092338784e-06, "epoch": 19.263059701492537, "percentage": 96.32, "elapsed_time": "0:28:48", "remaining_time": "0:01:06", "throughput": 1699.72, "total_tokens": 2937936}
2085
+ {"current_steps": 10330, "total_steps": 10720, "loss": 0.3622, "lr": 4.046988277334185e-06, "epoch": 19.27238805970149, "percentage": 96.36, "elapsed_time": "0:28:49", "remaining_time": "0:01:05", "throughput": 1699.72, "total_tokens": 2939248}
2086
+ {"current_steps": 10335, "total_steps": 10720, "loss": 0.1298, "lr": 3.944281957030893e-06, "epoch": 19.28171641791045, "percentage": 96.41, "elapsed_time": "0:28:50", "remaining_time": "0:01:04", "throughput": 1699.79, "total_tokens": 2940688}
2087
+ {"current_steps": 10340, "total_steps": 10720, "loss": 0.1903, "lr": 3.842890545215028e-06, "epoch": 19.291044776119403, "percentage": 96.46, "elapsed_time": "0:28:50", "remaining_time": "0:01:03", "throughput": 1699.79, "total_tokens": 2942032}
2088
+ {"current_steps": 10345, "total_steps": 10720, "loss": 0.2553, "lr": 3.742814310647602e-06, "epoch": 19.300373134328357, "percentage": 96.5, "elapsed_time": "0:28:51", "remaining_time": "0:01:02", "throughput": 1699.84, "total_tokens": 2943472}
2089
+ {"current_steps": 10350, "total_steps": 10720, "loss": 0.3324, "lr": 3.6440535186034184e-06, "epoch": 19.309701492537314, "percentage": 96.55, "elapsed_time": "0:28:52", "remaining_time": "0:01:01", "throughput": 1699.88, "total_tokens": 2944880}
2090
+ {"current_steps": 10355, "total_steps": 10720, "loss": 0.2659, "lr": 3.5466084308704017e-06, "epoch": 19.31902985074627, "percentage": 96.6, "elapsed_time": "0:28:53", "remaining_time": "0:01:01", "throughput": 1699.93, "total_tokens": 2946352}
2091
+ {"current_steps": 10360, "total_steps": 10720, "loss": 0.3356, "lr": 3.4504793057489326e-06, "epoch": 19.328358208955223, "percentage": 96.64, "elapsed_time": "0:28:53", "remaining_time": "0:01:00", "throughput": 1699.9, "total_tokens": 2947568}
2092
+ {"current_steps": 10365, "total_steps": 10720, "loss": 0.1609, "lr": 3.3556663980511826e-06, "epoch": 19.33768656716418, "percentage": 96.69, "elapsed_time": "0:28:54", "remaining_time": "0:00:59", "throughput": 1700.01, "total_tokens": 2949104}
2093
+ {"current_steps": 10370, "total_steps": 10720, "loss": 0.2441, "lr": 3.2621699591001695e-06, "epoch": 19.347014925373134, "percentage": 96.74, "elapsed_time": "0:28:55", "remaining_time": "0:00:58", "throughput": 1700.04, "total_tokens": 2950448}
2094
+ {"current_steps": 10375, "total_steps": 10720, "loss": 0.2596, "lr": 3.1699902367295917e-06, "epoch": 19.35634328358209, "percentage": 96.78, "elapsed_time": "0:28:56", "remaining_time": "0:00:57", "throughput": 1700.04, "total_tokens": 2951760}
2095
+ {"current_steps": 10380, "total_steps": 10720, "loss": 0.2539, "lr": 3.079127475282717e-06, "epoch": 19.365671641791046, "percentage": 96.83, "elapsed_time": "0:28:57", "remaining_time": "0:00:56", "throughput": 1700.09, "total_tokens": 2953200}
2096
+ {"current_steps": 10385, "total_steps": 10720, "loss": 0.1706, "lr": 2.9895819156119943e-06, "epoch": 19.375, "percentage": 96.88, "elapsed_time": "0:28:57", "remaining_time": "0:00:56", "throughput": 1700.1, "total_tokens": 2954512}
2097
+ {"current_steps": 10390, "total_steps": 10720, "loss": 0.238, "lr": 2.9013537950782765e-06, "epoch": 19.384328358208954, "percentage": 96.92, "elapsed_time": "0:28:58", "remaining_time": "0:00:55", "throughput": 1700.12, "total_tokens": 2955856}
2098
+ {"current_steps": 10395, "total_steps": 10720, "loss": 0.2206, "lr": 2.8144433475502105e-06, "epoch": 19.39365671641791, "percentage": 96.97, "elapsed_time": "0:28:59", "remaining_time": "0:00:54", "throughput": 1700.18, "total_tokens": 2957328}
2099
+ {"current_steps": 10400, "total_steps": 10720, "loss": 0.1399, "lr": 2.728850803403793e-06, "epoch": 19.402985074626866, "percentage": 97.01, "elapsed_time": "0:29:00", "remaining_time": "0:00:53", "throughput": 1700.25, "total_tokens": 2958768}
2100
+ {"current_steps": 10405, "total_steps": 10720, "loss": 0.3264, "lr": 2.644576389521425e-06, "epoch": 19.41231343283582, "percentage": 97.06, "elapsed_time": "0:29:00", "remaining_time": "0:00:52", "throughput": 1700.33, "total_tokens": 2960272}
2101
+ {"current_steps": 10410, "total_steps": 10720, "loss": 0.2346, "lr": 2.5616203292916916e-06, "epoch": 19.421641791044777, "percentage": 97.11, "elapsed_time": "0:29:01", "remaining_time": "0:00:51", "throughput": 1700.36, "total_tokens": 2961648}
2102
+ {"current_steps": 10415, "total_steps": 10720, "loss": 0.091, "lr": 2.479982842608475e-06, "epoch": 19.43097014925373, "percentage": 97.15, "elapsed_time": "0:29:02", "remaining_time": "0:00:51", "throughput": 1700.43, "total_tokens": 2963088}
2103
+ {"current_steps": 10420, "total_steps": 10720, "loss": 0.371, "lr": 2.3996641458704504e-06, "epoch": 19.440298507462686, "percentage": 97.2, "elapsed_time": "0:29:03", "remaining_time": "0:00:50", "throughput": 1700.44, "total_tokens": 2964432}
2104
+ {"current_steps": 10425, "total_steps": 10720, "loss": 0.2244, "lr": 2.320664451980592e-06, "epoch": 19.449626865671643, "percentage": 97.25, "elapsed_time": "0:29:04", "remaining_time": "0:00:49", "throughput": 1700.46, "total_tokens": 2965808}
2105
+ {"current_steps": 10430, "total_steps": 10720, "loss": 0.2795, "lr": 2.2429839703456136e-06, "epoch": 19.458955223880597, "percentage": 97.29, "elapsed_time": "0:29:04", "remaining_time": "0:00:48", "throughput": 1700.48, "total_tokens": 2967248}
2106
+ {"current_steps": 10435, "total_steps": 10720, "loss": 0.2252, "lr": 2.1666229068753594e-06, "epoch": 19.46828358208955, "percentage": 97.34, "elapsed_time": "0:29:05", "remaining_time": "0:00:47", "throughput": 1700.46, "total_tokens": 2968528}
2107
+ {"current_steps": 10440, "total_steps": 10720, "loss": 0.2318, "lr": 2.091581463981973e-06, "epoch": 19.47761194029851, "percentage": 97.39, "elapsed_time": "0:29:06", "remaining_time": "0:00:46", "throughput": 1700.52, "total_tokens": 2969968}
2108
+ {"current_steps": 10445, "total_steps": 10720, "loss": 0.2825, "lr": 2.0178598405800606e-06, "epoch": 19.486940298507463, "percentage": 97.43, "elapsed_time": "0:29:07", "remaining_time": "0:00:46", "throughput": 1700.6, "total_tokens": 2971472}
2109
+ {"current_steps": 10450, "total_steps": 10720, "loss": 0.228, "lr": 1.945458232085473e-06, "epoch": 19.496268656716417, "percentage": 97.48, "elapsed_time": "0:29:08", "remaining_time": "0:00:45", "throughput": 1700.65, "total_tokens": 2972880}
2110
+ {"current_steps": 10455, "total_steps": 10720, "loss": 0.434, "lr": 1.8743768304151366e-06, "epoch": 19.505597014925375, "percentage": 97.53, "elapsed_time": "0:29:08", "remaining_time": "0:00:44", "throughput": 1700.67, "total_tokens": 2974192}
2111
+ {"current_steps": 10460, "total_steps": 10720, "loss": 0.1921, "lr": 1.8046158239864996e-06, "epoch": 19.51492537313433, "percentage": 97.57, "elapsed_time": "0:29:09", "remaining_time": "0:00:43", "throughput": 1700.71, "total_tokens": 2975632}
2112
+ {"current_steps": 10465, "total_steps": 10720, "loss": 0.2097, "lr": 1.7361753977169215e-06, "epoch": 19.524253731343283, "percentage": 97.62, "elapsed_time": "0:29:10", "remaining_time": "0:00:42", "throughput": 1700.79, "total_tokens": 2977168}
2113
+ {"current_steps": 10470, "total_steps": 10720, "loss": 0.3264, "lr": 1.6690557330233947e-06, "epoch": 19.53358208955224, "percentage": 97.67, "elapsed_time": "0:29:11", "remaining_time": "0:00:41", "throughput": 1700.89, "total_tokens": 2978736}
2114
+ {"current_steps": 10475, "total_steps": 10720, "loss": 0.2519, "lr": 1.6032570078217678e-06, "epoch": 19.542910447761194, "percentage": 97.71, "elapsed_time": "0:29:12", "remaining_time": "0:00:40", "throughput": 1700.93, "total_tokens": 2980176}
2115
+ {"current_steps": 10480, "total_steps": 10720, "loss": 0.2272, "lr": 1.5387793965265794e-06, "epoch": 19.55223880597015, "percentage": 97.76, "elapsed_time": "0:29:12", "remaining_time": "0:00:40", "throughput": 1700.92, "total_tokens": 2981488}
2116
+ {"current_steps": 10485, "total_steps": 10720, "loss": 0.1393, "lr": 1.4756230700503914e-06, "epoch": 19.561567164179106, "percentage": 97.81, "elapsed_time": "0:29:13", "remaining_time": "0:00:39", "throughput": 1700.95, "total_tokens": 2982800}
2117
+ {"current_steps": 10490, "total_steps": 10720, "loss": 0.1739, "lr": 1.4137881958034006e-06, "epoch": 19.57089552238806, "percentage": 97.85, "elapsed_time": "0:29:14", "remaining_time": "0:00:38", "throughput": 1701.01, "total_tokens": 2984208}
2118
+ {"current_steps": 10495, "total_steps": 10720, "loss": 0.3847, "lr": 1.3532749376929944e-06, "epoch": 19.580223880597014, "percentage": 97.9, "elapsed_time": "0:29:15", "remaining_time": "0:00:37", "throughput": 1701.06, "total_tokens": 2985616}
2119
+ {"current_steps": 10500, "total_steps": 10720, "loss": 0.2892, "lr": 1.2940834561233627e-06, "epoch": 19.58955223880597, "percentage": 97.95, "elapsed_time": "0:29:15", "remaining_time": "0:00:36", "throughput": 1701.12, "total_tokens": 2987056}
2120
+ {"current_steps": 10505, "total_steps": 10720, "loss": 0.2513, "lr": 1.236213907994943e-06, "epoch": 19.598880597014926, "percentage": 97.99, "elapsed_time": "0:29:16", "remaining_time": "0:00:35", "throughput": 1701.22, "total_tokens": 2988592}
2121
+ {"current_steps": 10510, "total_steps": 10720, "loss": 0.225, "lr": 1.1796664467041973e-06, "epoch": 19.60820895522388, "percentage": 98.04, "elapsed_time": "0:29:17", "remaining_time": "0:00:35", "throughput": 1701.23, "total_tokens": 2989904}
2122
+ {"current_steps": 10515, "total_steps": 10720, "loss": 0.1427, "lr": 1.1244412221429468e-06, "epoch": 19.617537313432837, "percentage": 98.09, "elapsed_time": "0:29:18", "remaining_time": "0:00:34", "throughput": 1701.31, "total_tokens": 2991440}
2123
+ {"current_steps": 10520, "total_steps": 10720, "loss": 0.2393, "lr": 1.0705383806982606e-06, "epoch": 19.62686567164179, "percentage": 98.13, "elapsed_time": "0:29:19", "remaining_time": "0:00:33", "throughput": 1701.35, "total_tokens": 2992848}
2124
+ {"current_steps": 10525, "total_steps": 10720, "loss": 0.4173, "lr": 1.017958065251845e-06, "epoch": 19.636194029850746, "percentage": 98.18, "elapsed_time": "0:29:19", "remaining_time": "0:00:32", "throughput": 1701.36, "total_tokens": 2994096}
2125
+ {"current_steps": 10530, "total_steps": 10720, "loss": 0.2562, "lr": 9.66700415179822e-07, "epoch": 19.645522388059703, "percentage": 98.23, "elapsed_time": "0:29:20", "remaining_time": "0:00:31", "throughput": 1701.39, "total_tokens": 2995440}
2126
+ {"current_steps": 10535, "total_steps": 10720, "loss": 0.3594, "lr": 9.16765566352229e-07, "epoch": 19.654850746268657, "percentage": 98.27, "elapsed_time": "0:29:21", "remaining_time": "0:00:30", "throughput": 1701.43, "total_tokens": 2996816}
2127
+ {"current_steps": 10540, "total_steps": 10720, "loss": 0.2093, "lr": 8.681536511327415e-07, "epoch": 19.66417910447761, "percentage": 98.32, "elapsed_time": "0:29:22", "remaining_time": "0:00:30", "throughput": 1701.52, "total_tokens": 2998352}
2128
+ {"current_steps": 10545, "total_steps": 10720, "loss": 0.2758, "lr": 8.208647983782846e-07, "epoch": 19.673507462686565, "percentage": 98.37, "elapsed_time": "0:29:22", "remaining_time": "0:00:29", "throughput": 1701.57, "total_tokens": 2999792}
2129
+ {"current_steps": 10550, "total_steps": 10720, "loss": 0.1757, "lr": 7.748991334387557e-07, "epoch": 19.682835820895523, "percentage": 98.41, "elapsed_time": "0:29:23", "remaining_time": "0:00:28", "throughput": 1701.57, "total_tokens": 3001072}
2130
+ {"current_steps": 10555, "total_steps": 10720, "loss": 0.2236, "lr": 7.302567781565794e-07, "epoch": 19.692164179104477, "percentage": 98.46, "elapsed_time": "0:29:24", "remaining_time": "0:00:27", "throughput": 1701.64, "total_tokens": 3002576}
2131
+ {"current_steps": 10560, "total_steps": 10720, "loss": 0.1104, "lr": 6.869378508664315e-07, "epoch": 19.701492537313435, "percentage": 98.51, "elapsed_time": "0:29:25", "remaining_time": "0:00:26", "throughput": 1701.69, "total_tokens": 3003984}
2132
+ {"current_steps": 10565, "total_steps": 10720, "loss": 0.1164, "lr": 6.449424663950155e-07, "epoch": 19.71082089552239, "percentage": 98.55, "elapsed_time": "0:29:26", "remaining_time": "0:00:25", "throughput": 1701.74, "total_tokens": 3005392}
2133
+ {"current_steps": 10570, "total_steps": 10720, "loss": 0.3706, "lr": 6.042707360606192e-07, "epoch": 19.720149253731343, "percentage": 98.6, "elapsed_time": "0:29:26", "remaining_time": "0:00:25", "throughput": 1701.83, "total_tokens": 3006896}
2134
+ {"current_steps": 10575, "total_steps": 10720, "loss": 0.2673, "lr": 5.64922767673004e-07, "epoch": 19.729477611940297, "percentage": 98.65, "elapsed_time": "0:29:27", "remaining_time": "0:00:24", "throughput": 1701.85, "total_tokens": 3008240}
2135
+ {"current_steps": 10580, "total_steps": 10720, "loss": 0.2049, "lr": 5.268986655327934e-07, "epoch": 19.738805970149254, "percentage": 98.69, "elapsed_time": "0:29:28", "remaining_time": "0:00:23", "throughput": 1702.03, "total_tokens": 3010000}
2136
+ {"current_steps": 10585, "total_steps": 10720, "loss": 0.2348, "lr": 4.901985304315848e-07, "epoch": 19.74813432835821, "percentage": 98.74, "elapsed_time": "0:29:29", "remaining_time": "0:00:22", "throughput": 1702.05, "total_tokens": 3011344}
2137
+ {"current_steps": 10590, "total_steps": 10720, "loss": 0.3252, "lr": 4.548224596513939e-07, "epoch": 19.757462686567163, "percentage": 98.79, "elapsed_time": "0:29:30", "remaining_time": "0:00:21", "throughput": 1702.15, "total_tokens": 3012880}
2138
+ {"current_steps": 10595, "total_steps": 10720, "loss": 0.2089, "lr": 4.207705469645995e-07, "epoch": 19.76679104477612, "percentage": 98.83, "elapsed_time": "0:29:30", "remaining_time": "0:00:20", "throughput": 1702.27, "total_tokens": 3014448}
2139
+ {"current_steps": 10600, "total_steps": 10720, "loss": 0.1524, "lr": 3.8804288263349917e-07, "epoch": 19.776119402985074, "percentage": 98.88, "elapsed_time": "0:29:31", "remaining_time": "0:00:20", "throughput": 1702.36, "total_tokens": 3015984}
2140
+ {"current_steps": 10605, "total_steps": 10720, "loss": 0.1925, "lr": 3.56639553410143e-07, "epoch": 19.78544776119403, "percentage": 98.93, "elapsed_time": "0:29:32", "remaining_time": "0:00:19", "throughput": 1702.4, "total_tokens": 3017392}
2141
+ {"current_steps": 10610, "total_steps": 10720, "loss": 0.3039, "lr": 3.265606425363332e-07, "epoch": 19.794776119402986, "percentage": 98.97, "elapsed_time": "0:29:33", "remaining_time": "0:00:18", "throughput": 1702.5, "total_tokens": 3018896}
2142
+ {"current_steps": 10615, "total_steps": 10720, "loss": 0.0832, "lr": 2.97806229743014e-07, "epoch": 19.80410447761194, "percentage": 99.02, "elapsed_time": "0:29:34", "remaining_time": "0:00:17", "throughput": 1702.61, "total_tokens": 3020496}
2143
+ {"current_steps": 10620, "total_steps": 10720, "loss": 0.1898, "lr": 2.703763912502155e-07, "epoch": 19.813432835820894, "percentage": 99.07, "elapsed_time": "0:29:34", "remaining_time": "0:00:16", "throughput": 1702.62, "total_tokens": 3021840}
2144
+ {"current_steps": 10625, "total_steps": 10720, "loss": 0.2509, "lr": 2.4427119976705436e-07, "epoch": 19.82276119402985, "percentage": 99.11, "elapsed_time": "0:29:35", "remaining_time": "0:00:15", "throughput": 1702.66, "total_tokens": 3023216}
2145
+ {"current_steps": 10630, "total_steps": 10720, "loss": 0.1979, "lr": 2.1949072449123363e-07, "epoch": 19.832089552238806, "percentage": 99.16, "elapsed_time": "0:29:36", "remaining_time": "0:00:15", "throughput": 1702.74, "total_tokens": 3024720}
2146
+ {"current_steps": 10635, "total_steps": 10720, "loss": 0.268, "lr": 1.9603503110904308e-07, "epoch": 19.84141791044776, "percentage": 99.21, "elapsed_time": "0:29:37", "remaining_time": "0:00:14", "throughput": 1702.81, "total_tokens": 3026160}
2147
+ {"current_steps": 10640, "total_steps": 10720, "loss": 0.3326, "lr": 1.739041817951925e-07, "epoch": 19.850746268656717, "percentage": 99.25, "elapsed_time": "0:29:37", "remaining_time": "0:00:13", "throughput": 1702.88, "total_tokens": 3027632}
2148
+ {"current_steps": 10645, "total_steps": 10720, "loss": 0.1947, "lr": 1.5309823521242328e-07, "epoch": 19.86007462686567, "percentage": 99.3, "elapsed_time": "0:29:38", "remaining_time": "0:00:12", "throughput": 1702.85, "total_tokens": 3028848}
2149
+ {"current_steps": 10650, "total_steps": 10720, "loss": 0.2189, "lr": 1.3361724651167473e-07, "epoch": 19.869402985074625, "percentage": 99.35, "elapsed_time": "0:29:39", "remaining_time": "0:00:11", "throughput": 1702.92, "total_tokens": 3030288}
2150
+ {"current_steps": 10655, "total_steps": 10720, "loss": 0.1734, "lr": 1.1546126733180673e-07, "epoch": 19.878731343283583, "percentage": 99.39, "elapsed_time": "0:29:40", "remaining_time": "0:00:10", "throughput": 1702.94, "total_tokens": 3031696}
2151
+ {"current_steps": 10660, "total_steps": 10720, "loss": 0.1619, "lr": 9.863034579926655e-08, "epoch": 19.888059701492537, "percentage": 99.44, "elapsed_time": "0:29:41", "remaining_time": "0:00:10", "throughput": 1702.96, "total_tokens": 3032976}
2152
+ {"current_steps": 10665, "total_steps": 10720, "loss": 0.2341, "lr": 8.312452652831093e-08, "epoch": 19.89738805970149, "percentage": 99.49, "elapsed_time": "0:29:41", "remaining_time": "0:00:09", "throughput": 1703.06, "total_tokens": 3034512}
2153
+ {"current_steps": 10670, "total_steps": 10720, "loss": 0.1828, "lr": 6.894385062056197e-08, "epoch": 19.90671641791045, "percentage": 99.53, "elapsed_time": "0:29:42", "remaining_time": "0:00:08", "throughput": 1703.14, "total_tokens": 3036016}
2154
+ {"current_steps": 10675, "total_steps": 10720, "loss": 0.1543, "lr": 5.6088355665229187e-08, "epoch": 19.916044776119403, "percentage": 99.58, "elapsed_time": "0:29:43", "remaining_time": "0:00:07", "throughput": 1703.14, "total_tokens": 3037296}
2155
+ {"current_steps": 10680, "total_steps": 10720, "loss": 0.2943, "lr": 4.4558075738609926e-08, "epoch": 19.925373134328357, "percentage": 99.63, "elapsed_time": "0:29:44", "remaining_time": "0:00:06", "throughput": 1703.2, "total_tokens": 3038736}
2156
+ {"current_steps": 10685, "total_steps": 10720, "loss": 0.2967, "lr": 3.4353041404477926e-08, "epoch": 19.934701492537314, "percentage": 99.67, "elapsed_time": "0:29:44", "remaining_time": "0:00:05", "throughput": 1703.19, "total_tokens": 3040048}
2157
+ {"current_steps": 10690, "total_steps": 10720, "loss": 0.1819, "lr": 2.5473279713472685e-08, "epoch": 19.94402985074627, "percentage": 99.72, "elapsed_time": "0:29:45", "remaining_time": "0:00:05", "throughput": 1703.21, "total_tokens": 3041424}
2158
+ {"current_steps": 10695, "total_steps": 10720, "loss": 0.2043, "lr": 1.7918814203432555e-08, "epoch": 19.953358208955223, "percentage": 99.77, "elapsed_time": "0:29:46", "remaining_time": "0:00:04", "throughput": 1703.31, "total_tokens": 3042960}
2159
+ {"current_steps": 10700, "total_steps": 10720, "loss": 0.2257, "lr": 1.1689664899283691e-08, "epoch": 19.96268656716418, "percentage": 99.81, "elapsed_time": "0:29:47", "remaining_time": "0:00:03", "throughput": 1703.34, "total_tokens": 3044336}
2160
+ {"current_steps": 10705, "total_steps": 10720, "loss": 0.1333, "lr": 6.78584831270701e-09, "epoch": 19.972014925373134, "percentage": 99.86, "elapsed_time": "0:29:48", "remaining_time": "0:00:02", "throughput": 1703.44, "total_tokens": 3045904}
2161
+ {"current_steps": 10710, "total_steps": 10720, "loss": 0.5205, "lr": 3.2073774424157263e-09, "epoch": 19.98134328358209, "percentage": 99.91, "elapsed_time": "0:29:48", "remaining_time": "0:00:01", "throughput": 1703.45, "total_tokens": 3047248}
2162
+ {"current_steps": 10715, "total_steps": 10720, "loss": 0.2663, "lr": 9.54261773933318e-10, "epoch": 19.990671641791046, "percentage": 99.95, "elapsed_time": "0:29:49", "remaining_time": "0:00:00", "throughput": 1703.58, "total_tokens": 3048848}
2163
+ {"current_steps": 10720, "total_steps": 10720, "loss": 0.2325, "lr": 2.650727970454625e-11, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:29:50", "remaining_time": "0:00:00", "throughput": 1703.5, "total_tokens": 3049984}
2164
+ {"current_steps": 10720, "total_steps": 10720, "eval_loss": 0.9178647398948669, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:29:54", "remaining_time": "0:00:00", "throughput": 1699.41, "total_tokens": 3049984}
2165
+ {"current_steps": 10720, "total_steps": 10720, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "0:29:55", "remaining_time": "0:00:00", "throughput": 1698.47, "total_tokens": 3049984}