rbelanec commited on
Commit
6a05fb7
·
verified ·
1 Parent(s): 38c9280

Training in progress, step 11160

Browse files
Files changed (2) hide show
  1. adapter_model.safetensors +1 -1
  2. trainer_log.jsonl +113 -0
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c26ed3b3d44623f86ab4fe200d5d2c19d4c9b7465a62aed7c316ae0393830a93
3
  size 1638528
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:385f4c8ce73907902a1b27416653ec2e368afe4e6d192c2860468c4fa8ad0412
3
  size 1638528
trainer_log.jsonl CHANGED
@@ -2138,3 +2138,116 @@
2138
  {"current_steps": 10600, "total_steps": 11160, "loss": 0.0003, "lr": 3.8389057823175754e-07, "epoch": 9.498207885304659, "percentage": 94.98, "elapsed_time": "0:37:19", "remaining_time": "0:01:58", "throughput": 1783.77, "total_tokens": 3994240}
2139
  {"current_steps": 10602, "total_steps": 11160, "eval_loss": 0.40608441829681396, "epoch": 9.5, "percentage": 95.0, "elapsed_time": "0:37:30", "remaining_time": "0:01:58", "throughput": 1775.53, "total_tokens": 3994976}
2140
  {"current_steps": 10605, "total_steps": 11160, "loss": 0.1703, "lr": 3.7709528214530664e-07, "epoch": 9.50268817204301, "percentage": 95.03, "elapsed_time": "0:37:32", "remaining_time": "0:01:57", "throughput": 1774.25, "total_tokens": 3996192}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2138
  {"current_steps": 10600, "total_steps": 11160, "loss": 0.0003, "lr": 3.8389057823175754e-07, "epoch": 9.498207885304659, "percentage": 94.98, "elapsed_time": "0:37:19", "remaining_time": "0:01:58", "throughput": 1783.77, "total_tokens": 3994240}
2139
  {"current_steps": 10602, "total_steps": 11160, "eval_loss": 0.40608441829681396, "epoch": 9.5, "percentage": 95.0, "elapsed_time": "0:37:30", "remaining_time": "0:01:58", "throughput": 1775.53, "total_tokens": 3994976}
2140
  {"current_steps": 10605, "total_steps": 11160, "loss": 0.1703, "lr": 3.7709528214530664e-07, "epoch": 9.50268817204301, "percentage": 95.03, "elapsed_time": "0:37:32", "remaining_time": "0:01:57", "throughput": 1774.25, "total_tokens": 3996192}
2141
+ {"current_steps": 10610, "total_steps": 11160, "loss": 0.0859, "lr": 3.7036020949648974e-07, "epoch": 9.507168458781361, "percentage": 95.07, "elapsed_time": "0:37:33", "remaining_time": "0:01:56", "throughput": 1774.36, "total_tokens": 3998144}
2142
+ {"current_steps": 10615, "total_steps": 11160, "loss": 0.0074, "lr": 3.636853767581494e-07, "epoch": 9.511648745519713, "percentage": 95.12, "elapsed_time": "0:37:34", "remaining_time": "0:01:55", "throughput": 1774.5, "total_tokens": 4000160}
2143
+ {"current_steps": 10620, "total_steps": 11160, "loss": 0.0005, "lr": 3.5707080025579045e-07, "epoch": 9.516129032258064, "percentage": 95.16, "elapsed_time": "0:37:35", "remaining_time": "0:01:54", "throughput": 1774.54, "total_tokens": 4001888}
2144
+ {"current_steps": 10625, "total_steps": 11160, "loss": 0.0003, "lr": 3.5051649616754114e-07, "epoch": 9.520609318996415, "percentage": 95.21, "elapsed_time": "0:37:36", "remaining_time": "0:01:53", "throughput": 1774.59, "total_tokens": 4003680}
2145
+ {"current_steps": 10630, "total_steps": 11160, "loss": 0.0002, "lr": 3.440224805241171e-07, "epoch": 9.525089605734767, "percentage": 95.25, "elapsed_time": "0:37:37", "remaining_time": "0:01:52", "throughput": 1774.69, "total_tokens": 4005632}
2146
+ {"current_steps": 10635, "total_steps": 11160, "loss": 0.0004, "lr": 3.3758876920877147e-07, "epoch": 9.529569892473118, "percentage": 95.3, "elapsed_time": "0:37:37", "remaining_time": "0:01:51", "throughput": 1774.75, "total_tokens": 4007296}
2147
+ {"current_steps": 10640, "total_steps": 11160, "loss": 0.0023, "lr": 3.312153779572724e-07, "epoch": 9.53405017921147, "percentage": 95.34, "elapsed_time": "0:37:38", "remaining_time": "0:01:50", "throughput": 1774.81, "total_tokens": 4009120}
2148
+ {"current_steps": 10645, "total_steps": 11160, "loss": 0.0004, "lr": 3.249023223578479e-07, "epoch": 9.53853046594982, "percentage": 95.39, "elapsed_time": "0:37:39", "remaining_time": "0:01:49", "throughput": 1774.88, "total_tokens": 4010944}
2149
+ {"current_steps": 10650, "total_steps": 11160, "loss": 0.0002, "lr": 3.1864961785116054e-07, "epoch": 9.543010752688172, "percentage": 95.43, "elapsed_time": "0:37:40", "remaining_time": "0:01:48", "throughput": 1774.94, "total_tokens": 4012832}
2150
+ {"current_steps": 10655, "total_steps": 11160, "loss": 0.0002, "lr": 3.124572797302661e-07, "epoch": 9.547491039426523, "percentage": 95.47, "elapsed_time": "0:37:41", "remaining_time": "0:01:47", "throughput": 1775.01, "total_tokens": 4014688}
2151
+ {"current_steps": 10660, "total_steps": 11160, "loss": 0.0825, "lr": 3.063253231405605e-07, "epoch": 9.551971326164875, "percentage": 95.52, "elapsed_time": "0:37:42", "remaining_time": "0:01:46", "throughput": 1775.09, "total_tokens": 4016512}
2152
+ {"current_steps": 10665, "total_steps": 11160, "loss": 0.001, "lr": 3.002537630797747e-07, "epoch": 9.556451612903226, "percentage": 95.56, "elapsed_time": "0:37:43", "remaining_time": "0:01:45", "throughput": 1775.19, "total_tokens": 4018368}
2153
+ {"current_steps": 10670, "total_steps": 11160, "loss": 0.0005, "lr": 2.9424261439791323e-07, "epoch": 9.560931899641577, "percentage": 95.61, "elapsed_time": "0:37:44", "remaining_time": "0:01:43", "throughput": 1775.25, "total_tokens": 4020096}
2154
+ {"current_steps": 10675, "total_steps": 11160, "loss": 0.0008, "lr": 2.8829189179721547e-07, "epoch": 9.565412186379929, "percentage": 95.65, "elapsed_time": "0:37:45", "remaining_time": "0:01:42", "throughput": 1775.31, "total_tokens": 4022048}
2155
+ {"current_steps": 10680, "total_steps": 11160, "loss": 0.0005, "lr": 2.824016098321447e-07, "epoch": 9.56989247311828, "percentage": 95.7, "elapsed_time": "0:37:46", "remaining_time": "0:01:41", "throughput": 1775.39, "total_tokens": 4023936}
2156
+ {"current_steps": 10685, "total_steps": 11160, "loss": 0.0006, "lr": 2.7657178290932396e-07, "epoch": 9.574372759856631, "percentage": 95.74, "elapsed_time": "0:37:47", "remaining_time": "0:01:40", "throughput": 1775.5, "total_tokens": 4025984}
2157
+ {"current_steps": 10690, "total_steps": 11160, "loss": 0.0012, "lr": 2.7080242528751964e-07, "epoch": 9.578853046594983, "percentage": 95.79, "elapsed_time": "0:37:48", "remaining_time": "0:01:39", "throughput": 1775.56, "total_tokens": 4027808}
2158
+ {"current_steps": 10695, "total_steps": 11160, "loss": 0.0004, "lr": 2.650935510776026e-07, "epoch": 9.583333333333334, "percentage": 95.83, "elapsed_time": "0:37:49", "remaining_time": "0:01:38", "throughput": 1775.63, "total_tokens": 4029632}
2159
+ {"current_steps": 10700, "total_steps": 11160, "loss": 0.0003, "lr": 2.594451742425036e-07, "epoch": 9.587813620071685, "percentage": 95.88, "elapsed_time": "0:37:50", "remaining_time": "0:01:37", "throughput": 1775.7, "total_tokens": 4031520}
2160
+ {"current_steps": 10705, "total_steps": 11160, "loss": 0.0104, "lr": 2.538573085971968e-07, "epoch": 9.592293906810037, "percentage": 95.92, "elapsed_time": "0:37:51", "remaining_time": "0:01:36", "throughput": 1775.83, "total_tokens": 4033568}
2161
+ {"current_steps": 10710, "total_steps": 11160, "loss": 0.0005, "lr": 2.4832996780864704e-07, "epoch": 9.596774193548388, "percentage": 95.97, "elapsed_time": "0:37:52", "remaining_time": "0:01:35", "throughput": 1775.9, "total_tokens": 4035424}
2162
+ {"current_steps": 10715, "total_steps": 11160, "loss": 0.001, "lr": 2.42863165395793e-07, "epoch": 9.601254480286737, "percentage": 96.01, "elapsed_time": "0:37:53", "remaining_time": "0:01:34", "throughput": 1776.01, "total_tokens": 4037376}
2163
+ {"current_steps": 10720, "total_steps": 11160, "loss": 0.0004, "lr": 2.3745691472950026e-07, "epoch": 9.60573476702509, "percentage": 96.06, "elapsed_time": "0:37:54", "remaining_time": "0:01:33", "throughput": 1776.09, "total_tokens": 4039264}
2164
+ {"current_steps": 10725, "total_steps": 11160, "loss": 0.0003, "lr": 2.3211122903254167e-07, "epoch": 9.61021505376344, "percentage": 96.1, "elapsed_time": "0:37:55", "remaining_time": "0:01:32", "throughput": 1776.13, "total_tokens": 4040992}
2165
+ {"current_steps": 10730, "total_steps": 11160, "loss": 0.145, "lr": 2.2682612137955307e-07, "epoch": 9.614695340501791, "percentage": 96.15, "elapsed_time": "0:37:56", "remaining_time": "0:01:31", "throughput": 1776.22, "total_tokens": 4042848}
2166
+ {"current_steps": 10735, "total_steps": 11160, "loss": 0.0009, "lr": 2.2160160469701097e-07, "epoch": 9.619175627240143, "percentage": 96.19, "elapsed_time": "0:37:56", "remaining_time": "0:01:30", "throughput": 1776.29, "total_tokens": 4044608}
2167
+ {"current_steps": 10740, "total_steps": 11160, "loss": 0.0004, "lr": 2.1643769176319385e-07, "epoch": 9.623655913978494, "percentage": 96.24, "elapsed_time": "0:37:57", "remaining_time": "0:01:29", "throughput": 1776.36, "total_tokens": 4046528}
2168
+ {"current_steps": 10745, "total_steps": 11160, "loss": 0.0005, "lr": 2.1133439520815423e-07, "epoch": 9.628136200716845, "percentage": 96.28, "elapsed_time": "0:37:58", "remaining_time": "0:01:28", "throughput": 1776.46, "total_tokens": 4048448}
2169
+ {"current_steps": 10750, "total_steps": 11160, "loss": 0.0033, "lr": 2.062917275136883e-07, "epoch": 9.632616487455197, "percentage": 96.33, "elapsed_time": "0:37:59", "remaining_time": "0:01:26", "throughput": 1776.53, "total_tokens": 4050304}
2170
+ {"current_steps": 10755, "total_steps": 11160, "loss": 0.0011, "lr": 2.0130970101330527e-07, "epoch": 9.637096774193548, "percentage": 96.37, "elapsed_time": "0:38:00", "remaining_time": "0:01:25", "throughput": 1776.62, "total_tokens": 4052224}
2171
+ {"current_steps": 10760, "total_steps": 11160, "loss": 0.176, "lr": 1.963883278921913e-07, "epoch": 9.6415770609319, "percentage": 96.42, "elapsed_time": "0:38:01", "remaining_time": "0:01:24", "throughput": 1776.71, "total_tokens": 4054208}
2172
+ {"current_steps": 10765, "total_steps": 11160, "loss": 0.0942, "lr": 1.9152762018719017e-07, "epoch": 9.64605734767025, "percentage": 96.46, "elapsed_time": "0:38:02", "remaining_time": "0:01:23", "throughput": 1776.81, "total_tokens": 4056192}
2173
+ {"current_steps": 10770, "total_steps": 11160, "loss": 0.0005, "lr": 1.867275897867643e-07, "epoch": 9.650537634408602, "percentage": 96.51, "elapsed_time": "0:38:03", "remaining_time": "0:01:22", "throughput": 1776.92, "total_tokens": 4058208}
2174
+ {"current_steps": 10775, "total_steps": 11160, "loss": 0.1099, "lr": 1.819882484309754e-07, "epoch": 9.655017921146953, "percentage": 96.55, "elapsed_time": "0:38:04", "remaining_time": "0:01:21", "throughput": 1777.03, "total_tokens": 4060096}
2175
+ {"current_steps": 10780, "total_steps": 11160, "loss": 0.0314, "lr": 1.773096077114428e-07, "epoch": 9.659498207885305, "percentage": 96.59, "elapsed_time": "0:38:05", "remaining_time": "0:01:20", "throughput": 1777.12, "total_tokens": 4062016}
2176
+ {"current_steps": 10785, "total_steps": 11160, "loss": 0.0001, "lr": 1.7269167907132954e-07, "epoch": 9.663978494623656, "percentage": 96.64, "elapsed_time": "0:38:06", "remaining_time": "0:01:19", "throughput": 1777.19, "total_tokens": 4063808}
2177
+ {"current_steps": 10790, "total_steps": 11160, "loss": 0.0002, "lr": 1.681344738053009e-07, "epoch": 9.668458781362007, "percentage": 96.68, "elapsed_time": "0:38:07", "remaining_time": "0:01:18", "throughput": 1777.25, "total_tokens": 4065600}
2178
+ {"current_steps": 10795, "total_steps": 11160, "loss": 0.0003, "lr": 1.636380030595075e-07, "epoch": 9.672939068100359, "percentage": 96.73, "elapsed_time": "0:38:08", "remaining_time": "0:01:17", "throughput": 1777.35, "total_tokens": 4067488}
2179
+ {"current_steps": 10800, "total_steps": 11160, "loss": 0.0003, "lr": 1.5920227783155217e-07, "epoch": 9.67741935483871, "percentage": 96.77, "elapsed_time": "0:38:09", "remaining_time": "0:01:16", "throughput": 1777.42, "total_tokens": 4069312}
2180
+ {"current_steps": 10805, "total_steps": 11160, "loss": 0.0005, "lr": 1.5482730897046216e-07, "epoch": 9.681899641577061, "percentage": 96.82, "elapsed_time": "0:38:10", "remaining_time": "0:01:15", "throughput": 1777.49, "total_tokens": 4071104}
2181
+ {"current_steps": 10810, "total_steps": 11160, "loss": 0.0002, "lr": 1.5051310717666967e-07, "epoch": 9.686379928315413, "percentage": 96.86, "elapsed_time": "0:38:11", "remaining_time": "0:01:14", "throughput": 1777.6, "total_tokens": 4073184}
2182
+ {"current_steps": 10815, "total_steps": 11160, "loss": 0.0014, "lr": 1.4625968300197857e-07, "epoch": 9.690860215053764, "percentage": 96.91, "elapsed_time": "0:38:12", "remaining_time": "0:01:13", "throughput": 1777.7, "total_tokens": 4075072}
2183
+ {"current_steps": 10820, "total_steps": 11160, "loss": 0.011, "lr": 1.4206704684953943e-07, "epoch": 9.695340501792115, "percentage": 96.95, "elapsed_time": "0:38:13", "remaining_time": "0:01:12", "throughput": 1777.79, "total_tokens": 4077024}
2184
+ {"current_steps": 10825, "total_steps": 11160, "loss": 0.0005, "lr": 1.3793520897383006e-07, "epoch": 9.699820788530467, "percentage": 97.0, "elapsed_time": "0:38:14", "remaining_time": "0:01:11", "throughput": 1777.87, "total_tokens": 4078944}
2185
+ {"current_steps": 10830, "total_steps": 11160, "loss": 0.003, "lr": 1.3386417948061947e-07, "epoch": 9.704301075268818, "percentage": 97.04, "elapsed_time": "0:38:15", "remaining_time": "0:01:09", "throughput": 1777.92, "total_tokens": 4080704}
2186
+ {"current_steps": 10835, "total_steps": 11160, "loss": 0.0014, "lr": 1.2985396832695674e-07, "epoch": 9.70878136200717, "percentage": 97.09, "elapsed_time": "0:38:16", "remaining_time": "0:01:08", "throughput": 1777.95, "total_tokens": 4082432}
2187
+ {"current_steps": 10840, "total_steps": 11160, "loss": 0.0003, "lr": 1.259045853211349e-07, "epoch": 9.713261648745519, "percentage": 97.13, "elapsed_time": "0:38:17", "remaining_time": "0:01:07", "throughput": 1778.06, "total_tokens": 4084320}
2188
+ {"current_steps": 10845, "total_steps": 11160, "loss": 0.0712, "lr": 1.2201604012267442e-07, "epoch": 9.717741935483872, "percentage": 97.18, "elapsed_time": "0:38:18", "remaining_time": "0:01:06", "throughput": 1778.15, "total_tokens": 4086240}
2189
+ {"current_steps": 10850, "total_steps": 11160, "loss": 0.0027, "lr": 1.1818834224229525e-07, "epoch": 9.722222222222221, "percentage": 97.22, "elapsed_time": "0:38:18", "remaining_time": "0:01:05", "throughput": 1778.23, "total_tokens": 4088096}
2190
+ {"current_steps": 10855, "total_steps": 11160, "loss": 0.0033, "lr": 1.1442150104189198e-07, "epoch": 9.726702508960573, "percentage": 97.27, "elapsed_time": "0:38:19", "remaining_time": "0:01:04", "throughput": 1778.32, "total_tokens": 4089888}
2191
+ {"current_steps": 10860, "total_steps": 11160, "loss": 0.0007, "lr": 1.1071552573452271e-07, "epoch": 9.731182795698924, "percentage": 97.31, "elapsed_time": "0:38:20", "remaining_time": "0:01:03", "throughput": 1778.39, "total_tokens": 4091744}
2192
+ {"current_steps": 10865, "total_steps": 11160, "loss": 0.1363, "lr": 1.0707042538437018e-07, "epoch": 9.735663082437275, "percentage": 97.36, "elapsed_time": "0:38:21", "remaining_time": "0:01:02", "throughput": 1778.44, "total_tokens": 4093408}
2193
+ {"current_steps": 10870, "total_steps": 11160, "loss": 0.0002, "lr": 1.0348620890673067e-07, "epoch": 9.740143369175627, "percentage": 97.4, "elapsed_time": "0:38:22", "remaining_time": "0:01:01", "throughput": 1778.54, "total_tokens": 4095392}
2194
+ {"current_steps": 10875, "total_steps": 11160, "loss": 0.0005, "lr": 9.9962885067989e-08, "epoch": 9.744623655913978, "percentage": 97.45, "elapsed_time": "0:38:23", "remaining_time": "0:01:00", "throughput": 1778.62, "total_tokens": 4097280}
2195
+ {"current_steps": 10880, "total_steps": 11160, "loss": 0.0006, "lr": 9.650046248559363e-08, "epoch": 9.74910394265233, "percentage": 97.49, "elapsed_time": "0:38:24", "remaining_time": "0:00:59", "throughput": 1778.76, "total_tokens": 4099360}
2196
+ {"current_steps": 10885, "total_steps": 11160, "loss": 0.0059, "lr": 9.309894962804267e-08, "epoch": 9.75358422939068, "percentage": 97.54, "elapsed_time": "0:38:25", "remaining_time": "0:00:58", "throughput": 1778.89, "total_tokens": 4101376}
2197
+ {"current_steps": 10890, "total_steps": 11160, "loss": 0.0029, "lr": 8.975835481485895e-08, "epoch": 9.758064516129032, "percentage": 97.58, "elapsed_time": "0:38:26", "remaining_time": "0:00:57", "throughput": 1779.01, "total_tokens": 4103296}
2198
+ {"current_steps": 10895, "total_steps": 11160, "loss": 0.0003, "lr": 8.647868621656785e-08, "epoch": 9.762544802867383, "percentage": 97.63, "elapsed_time": "0:38:27", "remaining_time": "0:00:56", "throughput": 1779.11, "total_tokens": 4105248}
2199
+ {"current_steps": 10900, "total_steps": 11160, "loss": 0.0003, "lr": 8.325995185468339e-08, "epoch": 9.767025089605735, "percentage": 97.67, "elapsed_time": "0:38:28", "remaining_time": "0:00:55", "throughput": 1779.19, "total_tokens": 4107072}
2200
+ {"current_steps": 10905, "total_steps": 11160, "loss": 0.0005, "lr": 8.010215960168044e-08, "epoch": 9.771505376344086, "percentage": 97.72, "elapsed_time": "0:38:29", "remaining_time": "0:00:53", "throughput": 1779.25, "total_tokens": 4108768}
2201
+ {"current_steps": 10910, "total_steps": 11160, "loss": 0.0057, "lr": 7.700531718098092e-08, "epoch": 9.775985663082437, "percentage": 97.76, "elapsed_time": "0:38:30", "remaining_time": "0:00:52", "throughput": 1779.34, "total_tokens": 4110624}
2202
+ {"current_steps": 10915, "total_steps": 11160, "loss": 0.0004, "lr": 7.396943216693708e-08, "epoch": 9.780465949820789, "percentage": 97.8, "elapsed_time": "0:38:31", "remaining_time": "0:00:51", "throughput": 1779.4, "total_tokens": 4112352}
2203
+ {"current_steps": 10920, "total_steps": 11160, "loss": 0.0007, "lr": 7.099451198480378e-08, "epoch": 9.78494623655914, "percentage": 97.85, "elapsed_time": "0:38:32", "remaining_time": "0:00:50", "throughput": 1779.46, "total_tokens": 4114144}
2204
+ {"current_steps": 10925, "total_steps": 11160, "loss": 0.0005, "lr": 6.808056391073569e-08, "epoch": 9.789426523297491, "percentage": 97.89, "elapsed_time": "0:38:32", "remaining_time": "0:00:49", "throughput": 1779.52, "total_tokens": 4115872}
2205
+ {"current_steps": 10930, "total_steps": 11160, "loss": 0.0725, "lr": 6.522759507175124e-08, "epoch": 9.793906810035843, "percentage": 97.94, "elapsed_time": "0:38:33", "remaining_time": "0:00:48", "throughput": 1779.65, "total_tokens": 4117984}
2206
+ {"current_steps": 10935, "total_steps": 11160, "loss": 0.1506, "lr": 6.243561244572427e-08, "epoch": 9.798387096774194, "percentage": 97.98, "elapsed_time": "0:38:34", "remaining_time": "0:00:47", "throughput": 1779.76, "total_tokens": 4119968}
2207
+ {"current_steps": 10940, "total_steps": 11160, "loss": 0.0004, "lr": 5.970462286137291e-08, "epoch": 9.802867383512545, "percentage": 98.03, "elapsed_time": "0:38:35", "remaining_time": "0:00:46", "throughput": 1779.88, "total_tokens": 4122048}
2208
+ {"current_steps": 10945, "total_steps": 11160, "loss": 0.0689, "lr": 5.7034632998231865e-08, "epoch": 9.807347670250897, "percentage": 98.07, "elapsed_time": "0:38:36", "remaining_time": "0:00:45", "throughput": 1779.96, "total_tokens": 4124032}
2209
+ {"current_steps": 10950, "total_steps": 11160, "loss": 0.0005, "lr": 5.4425649386644075e-08, "epoch": 9.811827956989248, "percentage": 98.12, "elapsed_time": "0:38:37", "remaining_time": "0:00:44", "throughput": 1780.07, "total_tokens": 4125984}
2210
+ {"current_steps": 10955, "total_steps": 11160, "loss": 0.0009, "lr": 5.187767840773849e-08, "epoch": 9.8163082437276, "percentage": 98.16, "elapsed_time": "0:38:38", "remaining_time": "0:00:43", "throughput": 1780.16, "total_tokens": 4128032}
2211
+ {"current_steps": 10960, "total_steps": 11160, "loss": 0.0018, "lr": 4.939072629341901e-08, "epoch": 9.82078853046595, "percentage": 98.21, "elapsed_time": "0:38:39", "remaining_time": "0:00:42", "throughput": 1780.23, "total_tokens": 4129792}
2212
+ {"current_steps": 10965, "total_steps": 11160, "loss": 0.0005, "lr": 4.696479912634499e-08, "epoch": 9.825268817204302, "percentage": 98.25, "elapsed_time": "0:38:40", "remaining_time": "0:00:41", "throughput": 1780.34, "total_tokens": 4131808}
2213
+ {"current_steps": 10970, "total_steps": 11160, "loss": 0.0039, "lr": 4.459990283992577e-08, "epoch": 9.829749103942653, "percentage": 98.3, "elapsed_time": "0:38:41", "remaining_time": "0:00:40", "throughput": 1780.42, "total_tokens": 4133696}
2214
+ {"current_steps": 10975, "total_steps": 11160, "loss": 0.0008, "lr": 4.229604321829561e-08, "epoch": 9.834229390681003, "percentage": 98.34, "elapsed_time": "0:38:42", "remaining_time": "0:00:39", "throughput": 1780.49, "total_tokens": 4135616}
2215
+ {"current_steps": 10980, "total_steps": 11160, "loss": 0.0018, "lr": 4.0053225896299894e-08, "epoch": 9.838709677419354, "percentage": 98.39, "elapsed_time": "0:38:43", "remaining_time": "0:00:38", "throughput": 1780.57, "total_tokens": 4137472}
2216
+ {"current_steps": 10985, "total_steps": 11160, "loss": 0.0002, "lr": 3.787145635948952e-08, "epoch": 9.843189964157705, "percentage": 98.43, "elapsed_time": "0:38:44", "remaining_time": "0:00:37", "throughput": 1780.64, "total_tokens": 4139328}
2217
+ {"current_steps": 10990, "total_steps": 11160, "loss": 0.0026, "lr": 3.575073994410427e-08, "epoch": 9.847670250896057, "percentage": 98.48, "elapsed_time": "0:38:45", "remaining_time": "0:00:35", "throughput": 1780.73, "total_tokens": 4141216}
2218
+ {"current_steps": 10995, "total_steps": 11160, "loss": 0.0054, "lr": 3.369108183705339e-08, "epoch": 9.852150537634408, "percentage": 98.52, "elapsed_time": "0:38:46", "remaining_time": "0:00:34", "throughput": 1780.8, "total_tokens": 4142976}
2219
+ {"current_steps": 11000, "total_steps": 11160, "loss": 0.0008, "lr": 3.169248707590999e-08, "epoch": 9.85663082437276, "percentage": 98.57, "elapsed_time": "0:38:47", "remaining_time": "0:00:33", "throughput": 1780.87, "total_tokens": 4144672}
2220
+ {"current_steps": 11005, "total_steps": 11160, "loss": 0.0004, "lr": 2.975496054889726e-08, "epoch": 9.86111111111111, "percentage": 98.61, "elapsed_time": "0:38:48", "remaining_time": "0:00:32", "throughput": 1780.92, "total_tokens": 4146496}
2221
+ {"current_steps": 11010, "total_steps": 11160, "loss": 0.0001, "lr": 2.7878506994877263e-08, "epoch": 9.865591397849462, "percentage": 98.66, "elapsed_time": "0:38:49", "remaining_time": "0:00:31", "throughput": 1780.97, "total_tokens": 4148320}
2222
+ {"current_steps": 11015, "total_steps": 11160, "loss": 0.0003, "lr": 2.6063131003337126e-08, "epoch": 9.870071684587813, "percentage": 98.7, "elapsed_time": "0:38:50", "remaining_time": "0:00:30", "throughput": 1781.03, "total_tokens": 4150176}
2223
+ {"current_steps": 11020, "total_steps": 11160, "loss": 0.033, "lr": 2.4308837014372366e-08, "epoch": 9.874551971326165, "percentage": 98.75, "elapsed_time": "0:38:51", "remaining_time": "0:00:29", "throughput": 1781.1, "total_tokens": 4152000}
2224
+ {"current_steps": 11025, "total_steps": 11160, "loss": 0.0897, "lr": 2.2615629318692434e-08, "epoch": 9.879032258064516, "percentage": 98.79, "elapsed_time": "0:38:52", "remaining_time": "0:00:28", "throughput": 1781.21, "total_tokens": 4153984}
2225
+ {"current_steps": 11030, "total_steps": 11160, "loss": 0.1747, "lr": 2.0983512057595743e-08, "epoch": 9.883512544802867, "percentage": 98.84, "elapsed_time": "0:38:53", "remaining_time": "0:00:27", "throughput": 1781.26, "total_tokens": 4155904}
2226
+ {"current_steps": 11035, "total_steps": 11160, "loss": 0.0002, "lr": 1.941248922296135e-08, "epoch": 9.887992831541219, "percentage": 98.88, "elapsed_time": "0:38:54", "remaining_time": "0:00:26", "throughput": 1781.33, "total_tokens": 4157760}
2227
+ {"current_steps": 11040, "total_steps": 11160, "loss": 0.0002, "lr": 1.7902564657246158e-08, "epoch": 9.89247311827957, "percentage": 98.92, "elapsed_time": "0:38:55", "remaining_time": "0:00:25", "throughput": 1781.38, "total_tokens": 4159584}
2228
+ {"current_steps": 11045, "total_steps": 11160, "loss": 0.0003, "lr": 1.6453742053465504e-08, "epoch": 9.896953405017921, "percentage": 98.97, "elapsed_time": "0:38:55", "remaining_time": "0:00:24", "throughput": 1781.45, "total_tokens": 4161472}
2229
+ {"current_steps": 11050, "total_steps": 11160, "loss": 0.0244, "lr": 1.506602495519316e-08, "epoch": 9.901433691756273, "percentage": 99.01, "elapsed_time": "0:38:56", "remaining_time": "0:00:23", "throughput": 1781.52, "total_tokens": 4163328}
2230
+ {"current_steps": 11055, "total_steps": 11160, "loss": 0.0002, "lr": 1.3739416756555768e-08, "epoch": 9.905913978494624, "percentage": 99.06, "elapsed_time": "0:38:57", "remaining_time": "0:00:22", "throughput": 1781.66, "total_tokens": 4165376}
2231
+ {"current_steps": 11060, "total_steps": 11160, "loss": 0.0014, "lr": 1.2473920702202325e-08, "epoch": 9.910394265232975, "percentage": 99.1, "elapsed_time": "0:38:58", "remaining_time": "0:00:21", "throughput": 1781.74, "total_tokens": 4167168}
2232
+ {"current_steps": 11065, "total_steps": 11160, "loss": 0.2259, "lr": 1.126953988732915e-08, "epoch": 9.914874551971327, "percentage": 99.15, "elapsed_time": "0:38:59", "remaining_time": "0:00:20", "throughput": 1781.82, "total_tokens": 4169056}
2233
+ {"current_steps": 11070, "total_steps": 11160, "loss": 0.3541, "lr": 1.0126277257641037e-08, "epoch": 9.919354838709678, "percentage": 99.19, "elapsed_time": "0:39:00", "remaining_time": "0:00:19", "throughput": 1781.89, "total_tokens": 4170976}
2234
+ {"current_steps": 11075, "total_steps": 11160, "loss": 0.0005, "lr": 9.044135609365124e-09, "epoch": 9.92383512544803, "percentage": 99.24, "elapsed_time": "0:39:01", "remaining_time": "0:00:17", "throughput": 1781.92, "total_tokens": 4172704}
2235
+ {"current_steps": 11080, "total_steps": 11160, "loss": 0.0002, "lr": 8.023117589237017e-09, "epoch": 9.92831541218638, "percentage": 99.28, "elapsed_time": "0:39:02", "remaining_time": "0:00:16", "throughput": 1782.02, "total_tokens": 4174688}
2236
+ {"current_steps": 11085, "total_steps": 11160, "loss": 0.0007, "lr": 7.06322569449247e-09, "epoch": 9.932795698924732, "percentage": 99.33, "elapsed_time": "0:39:03", "remaining_time": "0:00:15", "throughput": 1782.08, "total_tokens": 4176480}
2237
+ {"current_steps": 11090, "total_steps": 11160, "loss": 0.1785, "lr": 6.164462272864602e-09, "epoch": 9.937275985663083, "percentage": 99.37, "elapsed_time": "0:39:04", "remaining_time": "0:00:14", "throughput": 1782.18, "total_tokens": 4178432}
2238
+ {"current_steps": 11095, "total_steps": 11160, "loss": 0.0007, "lr": 5.326829522578347e-09, "epoch": 9.941756272401435, "percentage": 99.42, "elapsed_time": "0:39:05", "remaining_time": "0:00:13", "throughput": 1782.25, "total_tokens": 4180256}
2239
+ {"current_steps": 11100, "total_steps": 11160, "loss": 0.0002, "lr": 4.5503294923338044e-09, "epoch": 9.946236559139784, "percentage": 99.46, "elapsed_time": "0:39:06", "remaining_time": "0:00:12", "throughput": 1782.31, "total_tokens": 4182144}
2240
+ {"current_steps": 11105, "total_steps": 11160, "loss": 0.0901, "lr": 3.834964081325665e-09, "epoch": 9.950716845878135, "percentage": 99.51, "elapsed_time": "0:39:07", "remaining_time": "0:00:11", "throughput": 1782.39, "total_tokens": 4184064}
2241
+ {"current_steps": 11110, "total_steps": 11160, "loss": 0.1075, "lr": 3.1807350392099033e-09, "epoch": 9.955197132616487, "percentage": 99.55, "elapsed_time": "0:39:08", "remaining_time": "0:00:10", "throughput": 1782.46, "total_tokens": 4185888}
2242
+ {"current_steps": 11115, "total_steps": 11160, "loss": 0.2287, "lr": 2.58764396612321e-09, "epoch": 9.959677419354838, "percentage": 99.6, "elapsed_time": "0:39:09", "remaining_time": "0:00:09", "throughput": 1782.57, "total_tokens": 4187808}
2243
+ {"current_steps": 11120, "total_steps": 11160, "loss": 0.0006, "lr": 2.0556923126663353e-09, "epoch": 9.96415770609319, "percentage": 99.64, "elapsed_time": "0:39:10", "remaining_time": "0:00:08", "throughput": 1782.61, "total_tokens": 4189440}
2244
+ {"current_steps": 11125, "total_steps": 11160, "loss": 0.0006, "lr": 1.5848813798985396e-09, "epoch": 9.96863799283154, "percentage": 99.69, "elapsed_time": "0:39:11", "remaining_time": "0:00:07", "throughput": 1782.69, "total_tokens": 4191328}
2245
+ {"current_steps": 11130, "total_steps": 11160, "loss": 0.0181, "lr": 1.1752123193459197e-09, "epoch": 9.973118279569892, "percentage": 99.73, "elapsed_time": "0:39:12", "remaining_time": "0:00:06", "throughput": 1782.75, "total_tokens": 4193120}
2246
+ {"current_steps": 11135, "total_steps": 11160, "loss": 0.0009, "lr": 8.266861329903064e-10, "epoch": 9.977598566308243, "percentage": 99.78, "elapsed_time": "0:39:13", "remaining_time": "0:00:05", "throughput": 1782.86, "total_tokens": 4195200}
2247
+ {"current_steps": 11140, "total_steps": 11160, "loss": 0.0002, "lr": 5.393036732637136e-10, "epoch": 9.982078853046595, "percentage": 99.82, "elapsed_time": "0:39:13", "remaining_time": "0:00:04", "throughput": 1782.94, "total_tokens": 4197024}
2248
+ {"current_steps": 11145, "total_steps": 11160, "loss": 0.0002, "lr": 3.130656430594403e-10, "epoch": 9.986559139784946, "percentage": 99.87, "elapsed_time": "0:39:14", "remaining_time": "0:00:03", "throughput": 1783.02, "total_tokens": 4198976}
2249
+ {"current_steps": 11150, "total_steps": 11160, "loss": 0.0009, "lr": 1.4797259571541767e-10, "epoch": 9.991039426523297, "percentage": 99.91, "elapsed_time": "0:39:15", "remaining_time": "0:00:02", "throughput": 1783.1, "total_tokens": 4200832}
2250
+ {"current_steps": 11155, "total_steps": 11160, "loss": 0.0009, "lr": 4.402493501975968e-11, "epoch": 9.995519713261649, "percentage": 99.96, "elapsed_time": "0:39:16", "remaining_time": "0:00:01", "throughput": 1783.19, "total_tokens": 4202656}
2251
+ {"current_steps": 11160, "total_steps": 11160, "loss": 0.1073, "lr": 1.2229152107634533e-12, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "0:39:17", "remaining_time": "0:00:00", "throughput": 1783.11, "total_tokens": 4204168}
2252
+ {"current_steps": 11160, "total_steps": 11160, "eval_loss": 0.40339407324790955, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "0:39:28", "remaining_time": "0:00:00", "throughput": 1775.26, "total_tokens": 4204168}
2253
+ {"current_steps": 11160, "total_steps": 11160, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "0:39:29", "remaining_time": "0:00:00", "throughput": 1773.99, "total_tokens": 4204168}