rbelanec commited on
Commit
c890bca
·
verified ·
1 Parent(s): 8e72ca5

Training in progress, step 32436

Browse files
Files changed (2) hide show
  1. adapter_model.safetensors +1 -1
  2. trainer_log.jsonl +383 -0
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2fd691a69779af2453a731e09d16729c689f39bc213fb6b154a67de4839a7c03
3
  size 798032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80b39d9dd0c94109cec5bf7ad52df3f1f3116644285f394235ee9e10ba82f910
3
  size 798032
trainer_log.jsonl CHANGED
@@ -6122,3 +6122,386 @@
6122
  {"current_steps": 30530, "total_steps": 38160, "loss": 0.5205, "lr": 5.8474190388155216e-06, "epoch": 16.00104821802935, "percentage": 80.01, "elapsed_time": "1:32:18", "remaining_time": "0:23:04", "throughput": 3596.3, "total_tokens": 19917032}
6123
  {"current_steps": 30535, "total_steps": 38160, "loss": 0.6343, "lr": 5.840072026912205e-06, "epoch": 16.003668763102727, "percentage": 80.02, "elapsed_time": "1:32:19", "remaining_time": "0:23:03", "throughput": 3596.29, "total_tokens": 19919976}
6124
  {"current_steps": 30540, "total_steps": 38160, "loss": 0.4998, "lr": 5.83272902304724e-06, "epoch": 16.0062893081761, "percentage": 80.03, "elapsed_time": "1:32:19", "remaining_time": "0:23:02", "throughput": 3596.09, "total_tokens": 19922312}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6122
  {"current_steps": 30530, "total_steps": 38160, "loss": 0.5205, "lr": 5.8474190388155216e-06, "epoch": 16.00104821802935, "percentage": 80.01, "elapsed_time": "1:32:18", "remaining_time": "0:23:04", "throughput": 3596.3, "total_tokens": 19917032}
6123
  {"current_steps": 30535, "total_steps": 38160, "loss": 0.6343, "lr": 5.840072026912205e-06, "epoch": 16.003668763102727, "percentage": 80.02, "elapsed_time": "1:32:19", "remaining_time": "0:23:03", "throughput": 3596.29, "total_tokens": 19919976}
6124
  {"current_steps": 30540, "total_steps": 38160, "loss": 0.4998, "lr": 5.83272902304724e-06, "epoch": 16.0062893081761, "percentage": 80.03, "elapsed_time": "1:32:19", "remaining_time": "0:23:02", "throughput": 3596.09, "total_tokens": 19922312}
6125
+ {"current_steps": 30545, "total_steps": 38160, "loss": 0.6338, "lr": 5.825390028756683e-06, "epoch": 16.008909853249477, "percentage": 80.04, "elapsed_time": "1:32:20", "remaining_time": "0:23:01", "throughput": 3595.9, "total_tokens": 19924392}
6126
+ {"current_steps": 30550, "total_steps": 38160, "loss": 0.4768, "lr": 5.818055045575776e-06, "epoch": 16.01153039832285, "percentage": 80.06, "elapsed_time": "1:32:21", "remaining_time": "0:23:00", "throughput": 3595.88, "total_tokens": 19927528}
6127
+ {"current_steps": 30555, "total_steps": 38160, "loss": 0.4616, "lr": 5.810724075038917e-06, "epoch": 16.014150943396228, "percentage": 80.07, "elapsed_time": "1:32:22", "remaining_time": "0:22:59", "throughput": 3595.93, "total_tokens": 19930920}
6128
+ {"current_steps": 30560, "total_steps": 38160, "loss": 0.4171, "lr": 5.803397118679665e-06, "epoch": 16.0167714884696, "percentage": 80.08, "elapsed_time": "1:32:23", "remaining_time": "0:22:58", "throughput": 3595.95, "total_tokens": 19934152}
6129
+ {"current_steps": 30565, "total_steps": 38160, "loss": 0.3803, "lr": 5.796074178030727e-06, "epoch": 16.019392033542978, "percentage": 80.1, "elapsed_time": "1:32:24", "remaining_time": "0:22:57", "throughput": 3596.07, "total_tokens": 19938216}
6130
+ {"current_steps": 30570, "total_steps": 38160, "loss": 0.5628, "lr": 5.788755254623976e-06, "epoch": 16.02201257861635, "percentage": 80.11, "elapsed_time": "1:32:25", "remaining_time": "0:22:56", "throughput": 3596.19, "total_tokens": 19942088}
6131
+ {"current_steps": 30575, "total_steps": 38160, "loss": 0.8004, "lr": 5.7814403499904525e-06, "epoch": 16.02463312368973, "percentage": 80.12, "elapsed_time": "1:32:26", "remaining_time": "0:22:55", "throughput": 3596.14, "total_tokens": 19944744}
6132
+ {"current_steps": 30580, "total_steps": 38160, "loss": 0.475, "lr": 5.774129465660352e-06, "epoch": 16.0272536687631, "percentage": 80.14, "elapsed_time": "1:32:27", "remaining_time": "0:22:54", "throughput": 3596.23, "total_tokens": 19948360}
6133
+ {"current_steps": 30585, "total_steps": 38160, "loss": 0.5115, "lr": 5.7668226031630246e-06, "epoch": 16.02987421383648, "percentage": 80.15, "elapsed_time": "1:32:27", "remaining_time": "0:22:54", "throughput": 3596.15, "total_tokens": 19951016}
6134
+ {"current_steps": 30590, "total_steps": 38160, "loss": 0.4516, "lr": 5.759519764026999e-06, "epoch": 16.032494758909852, "percentage": 80.16, "elapsed_time": "1:32:28", "remaining_time": "0:22:53", "throughput": 3596.2, "total_tokens": 19954376}
6135
+ {"current_steps": 30595, "total_steps": 38160, "loss": 0.4949, "lr": 5.7522209497799185e-06, "epoch": 16.03511530398323, "percentage": 80.18, "elapsed_time": "1:32:29", "remaining_time": "0:22:52", "throughput": 3596.25, "total_tokens": 19957864}
6136
+ {"current_steps": 30600, "total_steps": 38160, "loss": 0.4387, "lr": 5.7449261619486246e-06, "epoch": 16.037735849056602, "percentage": 80.19, "elapsed_time": "1:32:30", "remaining_time": "0:22:51", "throughput": 3596.31, "total_tokens": 19961256}
6137
+ {"current_steps": 30605, "total_steps": 38160, "loss": 0.4223, "lr": 5.737635402059105e-06, "epoch": 16.04035639412998, "percentage": 80.2, "elapsed_time": "1:32:31", "remaining_time": "0:22:50", "throughput": 3596.38, "total_tokens": 19964744}
6138
+ {"current_steps": 30610, "total_steps": 38160, "loss": 0.5127, "lr": 5.730348671636501e-06, "epoch": 16.042976939203353, "percentage": 80.21, "elapsed_time": "1:32:32", "remaining_time": "0:22:49", "throughput": 3596.44, "total_tokens": 19968232}
6139
+ {"current_steps": 30615, "total_steps": 38160, "loss": 0.4557, "lr": 5.723065972205124e-06, "epoch": 16.04559748427673, "percentage": 80.23, "elapsed_time": "1:32:33", "remaining_time": "0:22:48", "throughput": 3596.45, "total_tokens": 19971176}
6140
+ {"current_steps": 30620, "total_steps": 38160, "loss": 0.488, "lr": 5.715787305288423e-06, "epoch": 16.048218029350103, "percentage": 80.24, "elapsed_time": "1:32:33", "remaining_time": "0:22:47", "throughput": 3596.53, "total_tokens": 19974888}
6141
+ {"current_steps": 30625, "total_steps": 38160, "loss": 0.418, "lr": 5.7085126724090024e-06, "epoch": 16.05083857442348, "percentage": 80.25, "elapsed_time": "1:32:34", "remaining_time": "0:22:46", "throughput": 3596.54, "total_tokens": 19977864}
6142
+ {"current_steps": 30630, "total_steps": 38160, "loss": 0.5577, "lr": 5.7012420750886445e-06, "epoch": 16.053459119496857, "percentage": 80.27, "elapsed_time": "1:32:35", "remaining_time": "0:22:45", "throughput": 3596.61, "total_tokens": 19981640}
6143
+ {"current_steps": 30635, "total_steps": 38160, "loss": 0.5527, "lr": 5.693975514848271e-06, "epoch": 16.05607966457023, "percentage": 80.28, "elapsed_time": "1:32:36", "remaining_time": "0:22:44", "throughput": 3596.75, "total_tokens": 19985640}
6144
+ {"current_steps": 30640, "total_steps": 38160, "loss": 0.3741, "lr": 5.686712993207971e-06, "epoch": 16.058700209643607, "percentage": 80.29, "elapsed_time": "1:32:37", "remaining_time": "0:22:43", "throughput": 3596.79, "total_tokens": 19988776}
6145
+ {"current_steps": 30645, "total_steps": 38160, "loss": 0.5101, "lr": 5.679454511686977e-06, "epoch": 16.06132075471698, "percentage": 80.31, "elapsed_time": "1:32:38", "remaining_time": "0:22:43", "throughput": 3596.78, "total_tokens": 19991976}
6146
+ {"current_steps": 30650, "total_steps": 38160, "loss": 0.4725, "lr": 5.672200071803671e-06, "epoch": 16.063941299790358, "percentage": 80.32, "elapsed_time": "1:32:39", "remaining_time": "0:22:42", "throughput": 3596.79, "total_tokens": 19995048}
6147
+ {"current_steps": 30655, "total_steps": 38160, "loss": 0.5106, "lr": 5.664949675075607e-06, "epoch": 16.06656184486373, "percentage": 80.33, "elapsed_time": "1:32:39", "remaining_time": "0:22:41", "throughput": 3596.81, "total_tokens": 19997992}
6148
+ {"current_steps": 30660, "total_steps": 38160, "loss": 0.407, "lr": 5.657703323019486e-06, "epoch": 16.069182389937108, "percentage": 80.35, "elapsed_time": "1:32:40", "remaining_time": "0:22:40", "throughput": 3596.74, "total_tokens": 20000520}
6149
+ {"current_steps": 30665, "total_steps": 38160, "loss": 0.6781, "lr": 5.65046101715116e-06, "epoch": 16.07180293501048, "percentage": 80.36, "elapsed_time": "1:32:41", "remaining_time": "0:22:39", "throughput": 3596.71, "total_tokens": 20003176}
6150
+ {"current_steps": 30670, "total_steps": 38160, "loss": 0.5539, "lr": 5.643222758985644e-06, "epoch": 16.07442348008386, "percentage": 80.37, "elapsed_time": "1:32:42", "remaining_time": "0:22:38", "throughput": 3596.79, "total_tokens": 20006664}
6151
+ {"current_steps": 30675, "total_steps": 38160, "loss": 0.5, "lr": 5.635988550037094e-06, "epoch": 16.07704402515723, "percentage": 80.39, "elapsed_time": "1:32:43", "remaining_time": "0:22:37", "throughput": 3596.76, "total_tokens": 20009320}
6152
+ {"current_steps": 30680, "total_steps": 38160, "loss": 0.402, "lr": 5.628758391818817e-06, "epoch": 16.07966457023061, "percentage": 80.4, "elapsed_time": "1:32:43", "remaining_time": "0:22:36", "throughput": 3596.65, "total_tokens": 20011656}
6153
+ {"current_steps": 30685, "total_steps": 38160, "loss": 0.4861, "lr": 5.621532285843287e-06, "epoch": 16.082285115303982, "percentage": 80.41, "elapsed_time": "1:32:44", "remaining_time": "0:22:35", "throughput": 3596.74, "total_tokens": 20015304}
6154
+ {"current_steps": 30690, "total_steps": 38160, "loss": 0.4286, "lr": 5.614310233622119e-06, "epoch": 16.08490566037736, "percentage": 80.42, "elapsed_time": "1:32:45", "remaining_time": "0:22:34", "throughput": 3596.8, "total_tokens": 20018504}
6155
+ {"current_steps": 30695, "total_steps": 38160, "loss": 0.4169, "lr": 5.607092236666093e-06, "epoch": 16.087526205450732, "percentage": 80.44, "elapsed_time": "1:32:46", "remaining_time": "0:22:33", "throughput": 3596.76, "total_tokens": 20021096}
6156
+ {"current_steps": 30700, "total_steps": 38160, "loss": 0.5036, "lr": 5.599878296485125e-06, "epoch": 16.09014675052411, "percentage": 80.45, "elapsed_time": "1:32:47", "remaining_time": "0:22:32", "throughput": 3596.77, "total_tokens": 20023912}
6157
+ {"current_steps": 30705, "total_steps": 38160, "loss": 0.4983, "lr": 5.592668414588284e-06, "epoch": 16.092767295597483, "percentage": 80.46, "elapsed_time": "1:32:47", "remaining_time": "0:22:31", "throughput": 3596.77, "total_tokens": 20026792}
6158
+ {"current_steps": 30710, "total_steps": 38160, "loss": 0.4848, "lr": 5.585462592483801e-06, "epoch": 16.09538784067086, "percentage": 80.48, "elapsed_time": "1:32:48", "remaining_time": "0:22:30", "throughput": 3596.64, "total_tokens": 20028840}
6159
+ {"current_steps": 30715, "total_steps": 38160, "loss": 0.4701, "lr": 5.578260831679047e-06, "epoch": 16.098008385744233, "percentage": 80.49, "elapsed_time": "1:32:49", "remaining_time": "0:22:30", "throughput": 3596.67, "total_tokens": 20032072}
6160
+ {"current_steps": 30720, "total_steps": 38160, "loss": 0.4221, "lr": 5.571063133680562e-06, "epoch": 16.10062893081761, "percentage": 80.5, "elapsed_time": "1:32:50", "remaining_time": "0:22:29", "throughput": 3596.67, "total_tokens": 20034824}
6161
+ {"current_steps": 30725, "total_steps": 38160, "loss": 0.3562, "lr": 5.563869499994004e-06, "epoch": 16.103249475890987, "percentage": 80.52, "elapsed_time": "1:32:51", "remaining_time": "0:22:28", "throughput": 3596.8, "total_tokens": 20038792}
6162
+ {"current_steps": 30730, "total_steps": 38160, "loss": 0.3739, "lr": 5.556679932124215e-06, "epoch": 16.10587002096436, "percentage": 80.53, "elapsed_time": "1:32:52", "remaining_time": "0:22:27", "throughput": 3596.86, "total_tokens": 20042024}
6163
+ {"current_steps": 30735, "total_steps": 38160, "loss": 0.5995, "lr": 5.549494431575159e-06, "epoch": 16.108490566037737, "percentage": 80.54, "elapsed_time": "1:32:52", "remaining_time": "0:22:26", "throughput": 3596.92, "total_tokens": 20045576}
6164
+ {"current_steps": 30740, "total_steps": 38160, "loss": 0.3925, "lr": 5.542312999849961e-06, "epoch": 16.11111111111111, "percentage": 80.56, "elapsed_time": "1:32:53", "remaining_time": "0:22:25", "throughput": 3597.01, "total_tokens": 20048872}
6165
+ {"current_steps": 30745, "total_steps": 38160, "loss": 0.5174, "lr": 5.5351356384509e-06, "epoch": 16.113731656184488, "percentage": 80.57, "elapsed_time": "1:32:54", "remaining_time": "0:22:24", "throughput": 3597.16, "total_tokens": 20053096}
6166
+ {"current_steps": 30750, "total_steps": 38160, "loss": 0.4987, "lr": 5.527962348879401e-06, "epoch": 16.11635220125786, "percentage": 80.58, "elapsed_time": "1:32:55", "remaining_time": "0:22:23", "throughput": 3597.2, "total_tokens": 20056296}
6167
+ {"current_steps": 30755, "total_steps": 38160, "loss": 0.564, "lr": 5.520793132636027e-06, "epoch": 16.118972746331238, "percentage": 80.59, "elapsed_time": "1:32:56", "remaining_time": "0:22:22", "throughput": 3597.21, "total_tokens": 20059368}
6168
+ {"current_steps": 30760, "total_steps": 38160, "loss": 0.3555, "lr": 5.513627991220502e-06, "epoch": 16.12159329140461, "percentage": 80.61, "elapsed_time": "1:32:57", "remaining_time": "0:22:21", "throughput": 3597.21, "total_tokens": 20062472}
6169
+ {"current_steps": 30765, "total_steps": 38160, "loss": 0.4051, "lr": 5.506466926131684e-06, "epoch": 16.12421383647799, "percentage": 80.62, "elapsed_time": "1:32:58", "remaining_time": "0:22:20", "throughput": 3597.32, "total_tokens": 20066024}
6170
+ {"current_steps": 30770, "total_steps": 38160, "loss": 0.4242, "lr": 5.4993099388675916e-06, "epoch": 16.12683438155136, "percentage": 80.63, "elapsed_time": "1:32:58", "remaining_time": "0:22:19", "throughput": 3597.35, "total_tokens": 20068968}
6171
+ {"current_steps": 30775, "total_steps": 38160, "loss": 0.482, "lr": 5.49215703092539e-06, "epoch": 16.12945492662474, "percentage": 80.65, "elapsed_time": "1:32:59", "remaining_time": "0:22:18", "throughput": 3597.33, "total_tokens": 20071720}
6172
+ {"current_steps": 30780, "total_steps": 38160, "loss": 0.712, "lr": 5.4850082038013755e-06, "epoch": 16.132075471698112, "percentage": 80.66, "elapsed_time": "1:33:00", "remaining_time": "0:22:17", "throughput": 3597.4, "total_tokens": 20074984}
6173
+ {"current_steps": 30785, "total_steps": 38160, "loss": 0.5959, "lr": 5.4778634589910115e-06, "epoch": 16.13469601677149, "percentage": 80.67, "elapsed_time": "1:33:01", "remaining_time": "0:22:17", "throughput": 3597.51, "total_tokens": 20078760}
6174
+ {"current_steps": 30790, "total_steps": 38160, "loss": 0.4599, "lr": 5.4707227979888834e-06, "epoch": 16.137316561844862, "percentage": 80.69, "elapsed_time": "1:33:02", "remaining_time": "0:22:16", "throughput": 3597.47, "total_tokens": 20081480}
6175
+ {"current_steps": 30795, "total_steps": 38160, "loss": 0.4938, "lr": 5.463586222288747e-06, "epoch": 16.13993710691824, "percentage": 80.7, "elapsed_time": "1:33:02", "remaining_time": "0:22:15", "throughput": 3597.42, "total_tokens": 20084072}
6176
+ {"current_steps": 30800, "total_steps": 38160, "loss": 0.583, "lr": 5.456453733383493e-06, "epoch": 16.142557651991613, "percentage": 80.71, "elapsed_time": "1:33:03", "remaining_time": "0:22:14", "throughput": 3597.54, "total_tokens": 20088488}
6177
+ {"current_steps": 30805, "total_steps": 38160, "loss": 0.4614, "lr": 5.449325332765151e-06, "epoch": 16.14517819706499, "percentage": 80.73, "elapsed_time": "1:33:04", "remaining_time": "0:22:13", "throughput": 3597.61, "total_tokens": 20092008}
6178
+ {"current_steps": 30810, "total_steps": 38160, "loss": 0.5731, "lr": 5.4422010219249e-06, "epoch": 16.147798742138363, "percentage": 80.74, "elapsed_time": "1:33:05", "remaining_time": "0:22:12", "throughput": 3597.54, "total_tokens": 20094664}
6179
+ {"current_steps": 30815, "total_steps": 38160, "loss": 0.4233, "lr": 5.435080802353077e-06, "epoch": 16.15041928721174, "percentage": 80.75, "elapsed_time": "1:33:07", "remaining_time": "0:22:11", "throughput": 3597.71, "total_tokens": 20101640}
6180
+ {"current_steps": 30820, "total_steps": 38160, "loss": 0.5023, "lr": 5.4279646755391325e-06, "epoch": 16.153039832285117, "percentage": 80.77, "elapsed_time": "1:33:08", "remaining_time": "0:22:10", "throughput": 3597.78, "total_tokens": 20104840}
6181
+ {"current_steps": 30825, "total_steps": 38160, "loss": 0.5477, "lr": 5.420852642971691e-06, "epoch": 16.15566037735849, "percentage": 80.78, "elapsed_time": "1:33:09", "remaining_time": "0:22:09", "throughput": 3597.83, "total_tokens": 20108456}
6182
+ {"current_steps": 30830, "total_steps": 38160, "loss": 0.5964, "lr": 5.41374470613851e-06, "epoch": 16.158280922431867, "percentage": 80.79, "elapsed_time": "1:33:09", "remaining_time": "0:22:09", "throughput": 3597.75, "total_tokens": 20110792}
6183
+ {"current_steps": 30835, "total_steps": 38160, "loss": 0.3779, "lr": 5.40664086652648e-06, "epoch": 16.16090146750524, "percentage": 80.8, "elapsed_time": "1:33:10", "remaining_time": "0:22:08", "throughput": 3597.69, "total_tokens": 20113256}
6184
+ {"current_steps": 30840, "total_steps": 38160, "loss": 0.3923, "lr": 5.399541125621646e-06, "epoch": 16.163522012578618, "percentage": 80.82, "elapsed_time": "1:33:11", "remaining_time": "0:22:07", "throughput": 3597.66, "total_tokens": 20116424}
6185
+ {"current_steps": 30845, "total_steps": 38160, "loss": 0.5131, "lr": 5.392445484909203e-06, "epoch": 16.16614255765199, "percentage": 80.83, "elapsed_time": "1:33:12", "remaining_time": "0:22:06", "throughput": 3597.75, "total_tokens": 20120200}
6186
+ {"current_steps": 30850, "total_steps": 38160, "loss": 0.5433, "lr": 5.385353945873464e-06, "epoch": 16.168763102725368, "percentage": 80.84, "elapsed_time": "1:33:13", "remaining_time": "0:22:05", "throughput": 3597.67, "total_tokens": 20122664}
6187
+ {"current_steps": 30855, "total_steps": 38160, "loss": 0.5605, "lr": 5.378266509997909e-06, "epoch": 16.17138364779874, "percentage": 80.86, "elapsed_time": "1:33:14", "remaining_time": "0:22:04", "throughput": 3597.6, "total_tokens": 20125224}
6188
+ {"current_steps": 30860, "total_steps": 38160, "loss": 0.4582, "lr": 5.371183178765135e-06, "epoch": 16.17400419287212, "percentage": 80.87, "elapsed_time": "1:33:14", "remaining_time": "0:22:03", "throughput": 3597.67, "total_tokens": 20128968}
6189
+ {"current_steps": 30865, "total_steps": 38160, "loss": 0.446, "lr": 5.364103953656904e-06, "epoch": 16.17662473794549, "percentage": 80.88, "elapsed_time": "1:33:15", "remaining_time": "0:22:02", "throughput": 3597.59, "total_tokens": 20131656}
6190
+ {"current_steps": 30870, "total_steps": 38160, "loss": 0.514, "lr": 5.35702883615411e-06, "epoch": 16.17924528301887, "percentage": 80.9, "elapsed_time": "1:33:16", "remaining_time": "0:22:01", "throughput": 3597.56, "total_tokens": 20134696}
6191
+ {"current_steps": 30875, "total_steps": 38160, "loss": 0.5825, "lr": 5.34995782773679e-06, "epoch": 16.181865828092242, "percentage": 80.91, "elapsed_time": "1:33:17", "remaining_time": "0:22:00", "throughput": 3597.51, "total_tokens": 20137192}
6192
+ {"current_steps": 30880, "total_steps": 38160, "loss": 0.3501, "lr": 5.3428909298841115e-06, "epoch": 16.18448637316562, "percentage": 80.92, "elapsed_time": "1:33:18", "remaining_time": "0:21:59", "throughput": 3597.56, "total_tokens": 20140712}
6193
+ {"current_steps": 30885, "total_steps": 38160, "loss": 0.6075, "lr": 5.335828144074384e-06, "epoch": 16.187106918238992, "percentage": 80.94, "elapsed_time": "1:33:19", "remaining_time": "0:21:58", "throughput": 3597.52, "total_tokens": 20143496}
6194
+ {"current_steps": 30890, "total_steps": 38160, "loss": 0.3903, "lr": 5.3287694717850695e-06, "epoch": 16.18972746331237, "percentage": 80.95, "elapsed_time": "1:33:20", "remaining_time": "0:21:58", "throughput": 3597.75, "total_tokens": 20148296}
6195
+ {"current_steps": 30895, "total_steps": 38160, "loss": 0.4439, "lr": 5.321714914492759e-06, "epoch": 16.192348008385743, "percentage": 80.96, "elapsed_time": "1:33:21", "remaining_time": "0:21:57", "throughput": 3597.85, "total_tokens": 20152168}
6196
+ {"current_steps": 30900, "total_steps": 38160, "loss": 0.5419, "lr": 5.314664473673195e-06, "epoch": 16.19496855345912, "percentage": 80.97, "elapsed_time": "1:33:22", "remaining_time": "0:21:56", "throughput": 3597.95, "total_tokens": 20155784}
6197
+ {"current_steps": 30905, "total_steps": 38160, "loss": 0.5938, "lr": 5.307618150801239e-06, "epoch": 16.197589098532493, "percentage": 80.99, "elapsed_time": "1:33:22", "remaining_time": "0:21:55", "throughput": 3598.02, "total_tokens": 20159112}
6198
+ {"current_steps": 30910, "total_steps": 38160, "loss": 0.4309, "lr": 5.300575947350897e-06, "epoch": 16.20020964360587, "percentage": 81.0, "elapsed_time": "1:33:23", "remaining_time": "0:21:54", "throughput": 3598.06, "total_tokens": 20162152}
6199
+ {"current_steps": 30915, "total_steps": 38160, "loss": 0.5679, "lr": 5.293537864795325e-06, "epoch": 16.202830188679247, "percentage": 81.01, "elapsed_time": "1:33:24", "remaining_time": "0:21:53", "throughput": 3598.07, "total_tokens": 20165032}
6200
+ {"current_steps": 30920, "total_steps": 38160, "loss": 0.4919, "lr": 5.2865039046068075e-06, "epoch": 16.20545073375262, "percentage": 81.03, "elapsed_time": "1:33:25", "remaining_time": "0:21:52", "throughput": 3598.12, "total_tokens": 20168136}
6201
+ {"current_steps": 30925, "total_steps": 38160, "loss": 0.405, "lr": 5.279474068256767e-06, "epoch": 16.208071278825997, "percentage": 81.04, "elapsed_time": "1:33:25", "remaining_time": "0:21:51", "throughput": 3598.18, "total_tokens": 20171368}
6202
+ {"current_steps": 30930, "total_steps": 38160, "loss": 0.2853, "lr": 5.272448357215779e-06, "epoch": 16.21069182389937, "percentage": 81.05, "elapsed_time": "1:33:26", "remaining_time": "0:21:50", "throughput": 3598.19, "total_tokens": 20174664}
6203
+ {"current_steps": 30935, "total_steps": 38160, "loss": 0.6859, "lr": 5.26542677295353e-06, "epoch": 16.213312368972748, "percentage": 81.07, "elapsed_time": "1:33:27", "remaining_time": "0:21:49", "throughput": 3598.15, "total_tokens": 20177224}
6204
+ {"current_steps": 30940, "total_steps": 38160, "loss": 0.4557, "lr": 5.258409316938848e-06, "epoch": 16.21593291404612, "percentage": 81.08, "elapsed_time": "1:33:28", "remaining_time": "0:21:48", "throughput": 3598.15, "total_tokens": 20180200}
6205
+ {"current_steps": 30945, "total_steps": 38160, "loss": 0.4092, "lr": 5.251395990639713e-06, "epoch": 16.218553459119498, "percentage": 81.09, "elapsed_time": "1:33:29", "remaining_time": "0:21:47", "throughput": 3598.26, "total_tokens": 20183592}
6206
+ {"current_steps": 30950, "total_steps": 38160, "loss": 0.4971, "lr": 5.244386795523235e-06, "epoch": 16.22117400419287, "percentage": 81.11, "elapsed_time": "1:33:30", "remaining_time": "0:21:46", "throughput": 3598.32, "total_tokens": 20186760}
6207
+ {"current_steps": 30955, "total_steps": 38160, "loss": 0.4444, "lr": 5.23738173305566e-06, "epoch": 16.22379454926625, "percentage": 81.12, "elapsed_time": "1:33:30", "remaining_time": "0:21:45", "throughput": 3598.37, "total_tokens": 20189864}
6208
+ {"current_steps": 30960, "total_steps": 38160, "loss": 0.5155, "lr": 5.230380804702362e-06, "epoch": 16.22641509433962, "percentage": 81.13, "elapsed_time": "1:33:31", "remaining_time": "0:21:45", "throughput": 3598.38, "total_tokens": 20193096}
6209
+ {"current_steps": 30965, "total_steps": 38160, "loss": 0.4646, "lr": 5.223384011927851e-06, "epoch": 16.229035639413, "percentage": 81.15, "elapsed_time": "1:33:32", "remaining_time": "0:21:44", "throughput": 3598.39, "total_tokens": 20196136}
6210
+ {"current_steps": 30970, "total_steps": 38160, "loss": 0.4972, "lr": 5.216391356195779e-06, "epoch": 16.231656184486372, "percentage": 81.16, "elapsed_time": "1:33:33", "remaining_time": "0:21:43", "throughput": 3598.49, "total_tokens": 20199880}
6211
+ {"current_steps": 30975, "total_steps": 38160, "loss": 0.4858, "lr": 5.209402838968935e-06, "epoch": 16.23427672955975, "percentage": 81.17, "elapsed_time": "1:33:34", "remaining_time": "0:21:42", "throughput": 3598.59, "total_tokens": 20203752}
6212
+ {"current_steps": 30980, "total_steps": 38160, "loss": 0.4086, "lr": 5.202418461709233e-06, "epoch": 16.236897274633122, "percentage": 81.18, "elapsed_time": "1:33:35", "remaining_time": "0:21:41", "throughput": 3598.63, "total_tokens": 20206760}
6213
+ {"current_steps": 30985, "total_steps": 38160, "loss": 0.5242, "lr": 5.195438225877738e-06, "epoch": 16.2395178197065, "percentage": 81.2, "elapsed_time": "1:33:35", "remaining_time": "0:21:40", "throughput": 3598.54, "total_tokens": 20209096}
6214
+ {"current_steps": 30990, "total_steps": 38160, "loss": 0.3368, "lr": 5.18846213293461e-06, "epoch": 16.242138364779873, "percentage": 81.21, "elapsed_time": "1:33:36", "remaining_time": "0:21:39", "throughput": 3598.61, "total_tokens": 20212520}
6215
+ {"current_steps": 30995, "total_steps": 38160, "loss": 0.4223, "lr": 5.181490184339185e-06, "epoch": 16.24475890985325, "percentage": 81.22, "elapsed_time": "1:33:37", "remaining_time": "0:21:38", "throughput": 3598.64, "total_tokens": 20215720}
6216
+ {"current_steps": 31000, "total_steps": 38160, "loss": 0.369, "lr": 5.17452238154991e-06, "epoch": 16.247379454926623, "percentage": 81.24, "elapsed_time": "1:33:38", "remaining_time": "0:21:37", "throughput": 3598.67, "total_tokens": 20219048}
6217
+ {"current_steps": 31005, "total_steps": 38160, "loss": 0.6478, "lr": 5.1675587260243715e-06, "epoch": 16.25, "percentage": 81.25, "elapsed_time": "1:33:39", "remaining_time": "0:21:36", "throughput": 3598.75, "total_tokens": 20222440}
6218
+ {"current_steps": 31010, "total_steps": 38160, "loss": 0.5014, "lr": 5.160599219219295e-06, "epoch": 16.252620545073377, "percentage": 81.26, "elapsed_time": "1:33:40", "remaining_time": "0:21:35", "throughput": 3598.71, "total_tokens": 20225224}
6219
+ {"current_steps": 31015, "total_steps": 38160, "loss": 0.5133, "lr": 5.1536438625905185e-06, "epoch": 16.25524109014675, "percentage": 81.28, "elapsed_time": "1:33:40", "remaining_time": "0:21:34", "throughput": 3598.8, "total_tokens": 20228680}
6220
+ {"current_steps": 31020, "total_steps": 38160, "loss": 0.6487, "lr": 5.146692657593025e-06, "epoch": 16.257861635220127, "percentage": 81.29, "elapsed_time": "1:33:41", "remaining_time": "0:21:33", "throughput": 3598.68, "total_tokens": 20231176}
6221
+ {"current_steps": 31025, "total_steps": 38160, "loss": 0.5265, "lr": 5.139745605680926e-06, "epoch": 16.2604821802935, "percentage": 81.3, "elapsed_time": "1:33:42", "remaining_time": "0:21:33", "throughput": 3598.62, "total_tokens": 20233672}
6222
+ {"current_steps": 31030, "total_steps": 38160, "loss": 0.3803, "lr": 5.132802708307469e-06, "epoch": 16.263102725366878, "percentage": 81.32, "elapsed_time": "1:33:43", "remaining_time": "0:21:32", "throughput": 3598.57, "total_tokens": 20236424}
6223
+ {"current_steps": 31035, "total_steps": 38160, "loss": 0.5042, "lr": 5.125863966925035e-06, "epoch": 16.26572327044025, "percentage": 81.33, "elapsed_time": "1:33:44", "remaining_time": "0:21:31", "throughput": 3598.53, "total_tokens": 20239336}
6224
+ {"current_steps": 31040, "total_steps": 38160, "loss": 0.5035, "lr": 5.118929382985116e-06, "epoch": 16.268343815513628, "percentage": 81.34, "elapsed_time": "1:33:45", "remaining_time": "0:21:30", "throughput": 3598.49, "total_tokens": 20242056}
6225
+ {"current_steps": 31045, "total_steps": 38160, "loss": 0.4319, "lr": 5.111998957938363e-06, "epoch": 16.270964360587, "percentage": 81.35, "elapsed_time": "1:33:45", "remaining_time": "0:21:29", "throughput": 3598.55, "total_tokens": 20245384}
6226
+ {"current_steps": 31050, "total_steps": 38160, "loss": 0.4901, "lr": 5.105072693234525e-06, "epoch": 16.27358490566038, "percentage": 81.37, "elapsed_time": "1:33:46", "remaining_time": "0:21:28", "throughput": 3598.63, "total_tokens": 20248936}
6227
+ {"current_steps": 31055, "total_steps": 38160, "loss": 0.4163, "lr": 5.098150590322506e-06, "epoch": 16.27620545073375, "percentage": 81.38, "elapsed_time": "1:33:47", "remaining_time": "0:21:27", "throughput": 3598.82, "total_tokens": 20253416}
6228
+ {"current_steps": 31060, "total_steps": 38160, "loss": 0.5198, "lr": 5.091232650650335e-06, "epoch": 16.27882599580713, "percentage": 81.39, "elapsed_time": "1:33:48", "remaining_time": "0:21:26", "throughput": 3598.91, "total_tokens": 20256968}
6229
+ {"current_steps": 31065, "total_steps": 38160, "loss": 0.6212, "lr": 5.0843188756651645e-06, "epoch": 16.281446540880502, "percentage": 81.41, "elapsed_time": "1:33:49", "remaining_time": "0:21:25", "throughput": 3598.87, "total_tokens": 20259752}
6230
+ {"current_steps": 31070, "total_steps": 38160, "loss": 0.5233, "lr": 5.077409266813276e-06, "epoch": 16.28406708595388, "percentage": 81.42, "elapsed_time": "1:33:50", "remaining_time": "0:21:24", "throughput": 3598.84, "total_tokens": 20262600}
6231
+ {"current_steps": 31075, "total_steps": 38160, "loss": 0.4228, "lr": 5.0705038255400765e-06, "epoch": 16.286687631027252, "percentage": 81.43, "elapsed_time": "1:33:51", "remaining_time": "0:21:23", "throughput": 3598.89, "total_tokens": 20265896}
6232
+ {"current_steps": 31080, "total_steps": 38160, "loss": 0.5611, "lr": 5.063602553290106e-06, "epoch": 16.28930817610063, "percentage": 81.45, "elapsed_time": "1:33:51", "remaining_time": "0:21:22", "throughput": 3598.94, "total_tokens": 20269224}
6233
+ {"current_steps": 31085, "total_steps": 38160, "loss": 0.3681, "lr": 5.056705451507035e-06, "epoch": 16.291928721174003, "percentage": 81.46, "elapsed_time": "1:33:52", "remaining_time": "0:21:22", "throughput": 3598.92, "total_tokens": 20272072}
6234
+ {"current_steps": 31090, "total_steps": 38160, "loss": 0.5048, "lr": 5.049812521633668e-06, "epoch": 16.29454926624738, "percentage": 81.47, "elapsed_time": "1:33:53", "remaining_time": "0:21:21", "throughput": 3598.95, "total_tokens": 20275400}
6235
+ {"current_steps": 31095, "total_steps": 38160, "loss": 0.4522, "lr": 5.042923765111907e-06, "epoch": 16.297169811320753, "percentage": 81.49, "elapsed_time": "1:33:54", "remaining_time": "0:21:20", "throughput": 3599.1, "total_tokens": 20279560}
6236
+ {"current_steps": 31100, "total_steps": 38160, "loss": 0.5117, "lr": 5.036039183382821e-06, "epoch": 16.29979035639413, "percentage": 81.5, "elapsed_time": "1:33:55", "remaining_time": "0:21:19", "throughput": 3599.13, "total_tokens": 20282728}
6237
+ {"current_steps": 31105, "total_steps": 38160, "loss": 0.5788, "lr": 5.02915877788657e-06, "epoch": 16.302410901467507, "percentage": 81.51, "elapsed_time": "1:33:56", "remaining_time": "0:21:18", "throughput": 3599.07, "total_tokens": 20285224}
6238
+ {"current_steps": 31110, "total_steps": 38160, "loss": 0.4189, "lr": 5.0222825500624625e-06, "epoch": 16.30503144654088, "percentage": 81.53, "elapsed_time": "1:33:57", "remaining_time": "0:21:17", "throughput": 3599.08, "total_tokens": 20288552}
6239
+ {"current_steps": 31115, "total_steps": 38160, "loss": 0.4791, "lr": 5.015410501348935e-06, "epoch": 16.307651991614257, "percentage": 81.54, "elapsed_time": "1:33:58", "remaining_time": "0:21:16", "throughput": 3599.21, "total_tokens": 20292776}
6240
+ {"current_steps": 31120, "total_steps": 38160, "loss": 0.6379, "lr": 5.008542633183527e-06, "epoch": 16.31027253668763, "percentage": 81.55, "elapsed_time": "1:33:58", "remaining_time": "0:21:15", "throughput": 3599.13, "total_tokens": 20295208}
6241
+ {"current_steps": 31125, "total_steps": 38160, "loss": 0.4862, "lr": 5.001678947002927e-06, "epoch": 16.312893081761008, "percentage": 81.56, "elapsed_time": "1:33:59", "remaining_time": "0:21:14", "throughput": 3599.11, "total_tokens": 20297960}
6242
+ {"current_steps": 31130, "total_steps": 38160, "loss": 0.584, "lr": 4.994819444242943e-06, "epoch": 16.31551362683438, "percentage": 81.58, "elapsed_time": "1:34:00", "remaining_time": "0:21:13", "throughput": 3599.1, "total_tokens": 20300968}
6243
+ {"current_steps": 31135, "total_steps": 38160, "loss": 0.5539, "lr": 4.9879641263384956e-06, "epoch": 16.318134171907758, "percentage": 81.59, "elapsed_time": "1:34:01", "remaining_time": "0:21:12", "throughput": 3599.19, "total_tokens": 20304648}
6244
+ {"current_steps": 31140, "total_steps": 38160, "loss": 0.5532, "lr": 4.981112994723644e-06, "epoch": 16.32075471698113, "percentage": 81.6, "elapsed_time": "1:34:02", "remaining_time": "0:21:11", "throughput": 3599.29, "total_tokens": 20308488}
6245
+ {"current_steps": 31145, "total_steps": 38160, "loss": 0.5114, "lr": 4.974266050831572e-06, "epoch": 16.32337526205451, "percentage": 81.62, "elapsed_time": "1:34:03", "remaining_time": "0:21:11", "throughput": 3599.33, "total_tokens": 20311560}
6246
+ {"current_steps": 31150, "total_steps": 38160, "loss": 0.4439, "lr": 4.967423296094567e-06, "epoch": 16.32599580712788, "percentage": 81.63, "elapsed_time": "1:34:04", "remaining_time": "0:21:10", "throughput": 3599.5, "total_tokens": 20315592}
6247
+ {"current_steps": 31155, "total_steps": 38160, "loss": 0.5198, "lr": 4.960584731944076e-06, "epoch": 16.32861635220126, "percentage": 81.64, "elapsed_time": "1:34:04", "remaining_time": "0:21:09", "throughput": 3599.59, "total_tokens": 20319304}
6248
+ {"current_steps": 31160, "total_steps": 38160, "loss": 0.4246, "lr": 4.953750359810627e-06, "epoch": 16.331236897274632, "percentage": 81.66, "elapsed_time": "1:34:05", "remaining_time": "0:21:08", "throughput": 3599.79, "total_tokens": 20324264}
6249
+ {"current_steps": 31165, "total_steps": 38160, "loss": 0.3985, "lr": 4.946920181123904e-06, "epoch": 16.33385744234801, "percentage": 81.67, "elapsed_time": "1:34:06", "remaining_time": "0:21:07", "throughput": 3599.66, "total_tokens": 20326696}
6250
+ {"current_steps": 31170, "total_steps": 38160, "loss": 0.568, "lr": 4.940094197312709e-06, "epoch": 16.336477987421382, "percentage": 81.68, "elapsed_time": "1:34:07", "remaining_time": "0:21:06", "throughput": 3599.72, "total_tokens": 20330248}
6251
+ {"current_steps": 31175, "total_steps": 38160, "loss": 0.3723, "lr": 4.933272409804946e-06, "epoch": 16.33909853249476, "percentage": 81.7, "elapsed_time": "1:34:08", "remaining_time": "0:21:05", "throughput": 3599.62, "total_tokens": 20332552}
6252
+ {"current_steps": 31180, "total_steps": 38160, "loss": 0.5429, "lr": 4.926454820027663e-06, "epoch": 16.341719077568133, "percentage": 81.71, "elapsed_time": "1:34:09", "remaining_time": "0:21:04", "throughput": 3599.59, "total_tokens": 20335304}
6253
+ {"current_steps": 31185, "total_steps": 38160, "loss": 0.5766, "lr": 4.919641429407024e-06, "epoch": 16.34433962264151, "percentage": 81.72, "elapsed_time": "1:34:10", "remaining_time": "0:21:03", "throughput": 3599.48, "total_tokens": 20337992}
6254
+ {"current_steps": 31190, "total_steps": 38160, "loss": 0.472, "lr": 4.912832239368304e-06, "epoch": 16.346960167714883, "percentage": 81.73, "elapsed_time": "1:34:11", "remaining_time": "0:21:02", "throughput": 3599.41, "total_tokens": 20340680}
6255
+ {"current_steps": 31195, "total_steps": 38160, "loss": 0.4561, "lr": 4.906027251335918e-06, "epoch": 16.34958071278826, "percentage": 81.75, "elapsed_time": "1:34:11", "remaining_time": "0:21:01", "throughput": 3599.41, "total_tokens": 20343848}
6256
+ {"current_steps": 31200, "total_steps": 38160, "loss": 0.4216, "lr": 4.899226466733381e-06, "epoch": 16.352201257861637, "percentage": 81.76, "elapsed_time": "1:34:12", "remaining_time": "0:21:01", "throughput": 3599.36, "total_tokens": 20346792}
6257
+ {"current_steps": 31205, "total_steps": 38160, "loss": 0.4659, "lr": 4.892429886983346e-06, "epoch": 16.35482180293501, "percentage": 81.77, "elapsed_time": "1:34:13", "remaining_time": "0:21:00", "throughput": 3599.31, "total_tokens": 20349544}
6258
+ {"current_steps": 31210, "total_steps": 38160, "loss": 0.5272, "lr": 4.88563751350758e-06, "epoch": 16.357442348008387, "percentage": 81.79, "elapsed_time": "1:34:14", "remaining_time": "0:20:59", "throughput": 3599.39, "total_tokens": 20352904}
6259
+ {"current_steps": 31215, "total_steps": 38160, "loss": 0.3617, "lr": 4.878849347726977e-06, "epoch": 16.36006289308176, "percentage": 81.8, "elapsed_time": "1:34:15", "remaining_time": "0:20:58", "throughput": 3599.47, "total_tokens": 20356424}
6260
+ {"current_steps": 31220, "total_steps": 38160, "loss": 0.4575, "lr": 4.872065391061526e-06, "epoch": 16.362683438155138, "percentage": 81.81, "elapsed_time": "1:34:16", "remaining_time": "0:20:57", "throughput": 3599.57, "total_tokens": 20360264}
6261
+ {"current_steps": 31225, "total_steps": 38160, "loss": 0.4745, "lr": 4.865285644930373e-06, "epoch": 16.36530398322851, "percentage": 81.83, "elapsed_time": "1:34:17", "remaining_time": "0:20:56", "throughput": 3599.66, "total_tokens": 20363848}
6262
+ {"current_steps": 31230, "total_steps": 38160, "loss": 0.4573, "lr": 4.858510110751746e-06, "epoch": 16.367924528301888, "percentage": 81.84, "elapsed_time": "1:34:17", "remaining_time": "0:20:55", "throughput": 3599.56, "total_tokens": 20366280}
6263
+ {"current_steps": 31235, "total_steps": 38160, "loss": 0.318, "lr": 4.851738789943019e-06, "epoch": 16.37054507337526, "percentage": 81.85, "elapsed_time": "1:34:18", "remaining_time": "0:20:54", "throughput": 3599.7, "total_tokens": 20370568}
6264
+ {"current_steps": 31240, "total_steps": 38160, "loss": 0.444, "lr": 4.844971683920674e-06, "epoch": 16.37316561844864, "percentage": 81.87, "elapsed_time": "1:34:19", "remaining_time": "0:20:53", "throughput": 3599.77, "total_tokens": 20373928}
6265
+ {"current_steps": 31245, "total_steps": 38160, "loss": 0.5077, "lr": 4.838208794100316e-06, "epoch": 16.37578616352201, "percentage": 81.88, "elapsed_time": "1:34:20", "remaining_time": "0:20:52", "throughput": 3599.68, "total_tokens": 20376488}
6266
+ {"current_steps": 31250, "total_steps": 38160, "loss": 0.4396, "lr": 4.831450121896661e-06, "epoch": 16.37840670859539, "percentage": 81.89, "elapsed_time": "1:34:21", "remaining_time": "0:20:51", "throughput": 3599.66, "total_tokens": 20379176}
6267
+ {"current_steps": 31255, "total_steps": 38160, "loss": 0.5614, "lr": 4.824695668723542e-06, "epoch": 16.381027253668762, "percentage": 81.91, "elapsed_time": "1:34:22", "remaining_time": "0:20:50", "throughput": 3599.6, "total_tokens": 20381736}
6268
+ {"current_steps": 31260, "total_steps": 38160, "loss": 0.4176, "lr": 4.817945435993915e-06, "epoch": 16.38364779874214, "percentage": 81.92, "elapsed_time": "1:34:23", "remaining_time": "0:20:50", "throughput": 3599.65, "total_tokens": 20385064}
6269
+ {"current_steps": 31265, "total_steps": 38160, "loss": 0.5263, "lr": 4.811199425119853e-06, "epoch": 16.386268343815512, "percentage": 81.93, "elapsed_time": "1:34:23", "remaining_time": "0:20:49", "throughput": 3599.66, "total_tokens": 20388072}
6270
+ {"current_steps": 31270, "total_steps": 38160, "loss": 0.5988, "lr": 4.804457637512552e-06, "epoch": 16.38888888888889, "percentage": 81.94, "elapsed_time": "1:34:24", "remaining_time": "0:20:48", "throughput": 3599.66, "total_tokens": 20391080}
6271
+ {"current_steps": 31275, "total_steps": 38160, "loss": 0.5208, "lr": 4.79772007458231e-06, "epoch": 16.391509433962263, "percentage": 81.96, "elapsed_time": "1:34:25", "remaining_time": "0:20:47", "throughput": 3599.64, "total_tokens": 20394024}
6272
+ {"current_steps": 31280, "total_steps": 38160, "loss": 0.4098, "lr": 4.7909867377385424e-06, "epoch": 16.39412997903564, "percentage": 81.97, "elapsed_time": "1:34:26", "remaining_time": "0:20:46", "throughput": 3599.67, "total_tokens": 20397672}
6273
+ {"current_steps": 31285, "total_steps": 38160, "loss": 0.5734, "lr": 4.784257628389794e-06, "epoch": 16.396750524109013, "percentage": 81.98, "elapsed_time": "1:34:27", "remaining_time": "0:20:45", "throughput": 3599.74, "total_tokens": 20401352}
6274
+ {"current_steps": 31290, "total_steps": 38160, "loss": 0.4369, "lr": 4.7775327479437166e-06, "epoch": 16.39937106918239, "percentage": 82.0, "elapsed_time": "1:34:28", "remaining_time": "0:20:44", "throughput": 3599.7, "total_tokens": 20404008}
6275
+ {"current_steps": 31295, "total_steps": 38160, "loss": 0.6482, "lr": 4.770812097807076e-06, "epoch": 16.401991614255767, "percentage": 82.01, "elapsed_time": "1:34:29", "remaining_time": "0:20:43", "throughput": 3599.77, "total_tokens": 20407496}
6276
+ {"current_steps": 31300, "total_steps": 38160, "loss": 0.4702, "lr": 4.764095679385766e-06, "epoch": 16.40461215932914, "percentage": 82.02, "elapsed_time": "1:34:29", "remaining_time": "0:20:42", "throughput": 3599.79, "total_tokens": 20410696}
6277
+ {"current_steps": 31305, "total_steps": 38160, "loss": 0.524, "lr": 4.7573834940847745e-06, "epoch": 16.407232704402517, "percentage": 82.04, "elapsed_time": "1:34:30", "remaining_time": "0:20:41", "throughput": 3599.8, "total_tokens": 20413736}
6278
+ {"current_steps": 31310, "total_steps": 38160, "loss": 0.442, "lr": 4.750675543308212e-06, "epoch": 16.40985324947589, "percentage": 82.05, "elapsed_time": "1:34:31", "remaining_time": "0:20:40", "throughput": 3599.85, "total_tokens": 20417160}
6279
+ {"current_steps": 31315, "total_steps": 38160, "loss": 0.4676, "lr": 4.743971828459307e-06, "epoch": 16.412473794549268, "percentage": 82.06, "elapsed_time": "1:34:32", "remaining_time": "0:20:39", "throughput": 3599.91, "total_tokens": 20420520}
6280
+ {"current_steps": 31320, "total_steps": 38160, "loss": 0.4598, "lr": 4.7372723509404025e-06, "epoch": 16.41509433962264, "percentage": 82.08, "elapsed_time": "1:34:33", "remaining_time": "0:20:38", "throughput": 3599.9, "total_tokens": 20423272}
6281
+ {"current_steps": 31325, "total_steps": 38160, "loss": 0.4749, "lr": 4.7305771121529615e-06, "epoch": 16.417714884696018, "percentage": 82.09, "elapsed_time": "1:34:34", "remaining_time": "0:20:38", "throughput": 3599.91, "total_tokens": 20426216}
6282
+ {"current_steps": 31330, "total_steps": 38160, "loss": 0.5001, "lr": 4.723886113497542e-06, "epoch": 16.42033542976939, "percentage": 82.1, "elapsed_time": "1:34:35", "remaining_time": "0:20:37", "throughput": 3599.78, "total_tokens": 20429512}
6283
+ {"current_steps": 31335, "total_steps": 38160, "loss": 0.535, "lr": 4.7171993563738195e-06, "epoch": 16.42295597484277, "percentage": 82.11, "elapsed_time": "1:34:36", "remaining_time": "0:20:36", "throughput": 3599.83, "total_tokens": 20433064}
6284
+ {"current_steps": 31340, "total_steps": 38160, "loss": 0.5611, "lr": 4.7105168421805924e-06, "epoch": 16.42557651991614, "percentage": 82.13, "elapsed_time": "1:34:36", "remaining_time": "0:20:35", "throughput": 3599.83, "total_tokens": 20436232}
6285
+ {"current_steps": 31345, "total_steps": 38160, "loss": 0.5302, "lr": 4.703838572315767e-06, "epoch": 16.42819706498952, "percentage": 82.14, "elapsed_time": "1:34:37", "remaining_time": "0:20:34", "throughput": 3599.82, "total_tokens": 20439688}
6286
+ {"current_steps": 31350, "total_steps": 38160, "loss": 0.6978, "lr": 4.697164548176367e-06, "epoch": 16.430817610062892, "percentage": 82.15, "elapsed_time": "1:34:38", "remaining_time": "0:20:33", "throughput": 3599.83, "total_tokens": 20442888}
6287
+ {"current_steps": 31355, "total_steps": 38160, "loss": 0.561, "lr": 4.690494771158519e-06, "epoch": 16.43343815513627, "percentage": 82.17, "elapsed_time": "1:34:39", "remaining_time": "0:20:32", "throughput": 3599.81, "total_tokens": 20445928}
6288
+ {"current_steps": 31360, "total_steps": 38160, "loss": 0.3856, "lr": 4.683829242657453e-06, "epoch": 16.436058700209642, "percentage": 82.18, "elapsed_time": "1:34:40", "remaining_time": "0:20:31", "throughput": 3599.74, "total_tokens": 20448552}
6289
+ {"current_steps": 31365, "total_steps": 38160, "loss": 0.5011, "lr": 4.677167964067533e-06, "epoch": 16.43867924528302, "percentage": 82.19, "elapsed_time": "1:34:41", "remaining_time": "0:20:30", "throughput": 3599.64, "total_tokens": 20451304}
6290
+ {"current_steps": 31370, "total_steps": 38160, "loss": 0.4976, "lr": 4.670510936782216e-06, "epoch": 16.441299790356393, "percentage": 82.21, "elapsed_time": "1:34:42", "remaining_time": "0:20:29", "throughput": 3599.66, "total_tokens": 20454408}
6291
+ {"current_steps": 31375, "total_steps": 38160, "loss": 0.4206, "lr": 4.6638581621940815e-06, "epoch": 16.44392033542977, "percentage": 82.22, "elapsed_time": "1:34:43", "remaining_time": "0:20:29", "throughput": 3599.63, "total_tokens": 20457384}
6292
+ {"current_steps": 31380, "total_steps": 38160, "loss": 0.4561, "lr": 4.657209641694821e-06, "epoch": 16.446540880503143, "percentage": 82.23, "elapsed_time": "1:34:44", "remaining_time": "0:20:28", "throughput": 3599.62, "total_tokens": 20460456}
6293
+ {"current_steps": 31385, "total_steps": 38160, "loss": 0.471, "lr": 4.65056537667522e-06, "epoch": 16.44916142557652, "percentage": 82.25, "elapsed_time": "1:34:44", "remaining_time": "0:20:27", "throughput": 3599.57, "total_tokens": 20463336}
6294
+ {"current_steps": 31390, "total_steps": 38160, "loss": 0.539, "lr": 4.643925368525176e-06, "epoch": 16.451781970649897, "percentage": 82.26, "elapsed_time": "1:34:45", "remaining_time": "0:20:26", "throughput": 3599.48, "total_tokens": 20465928}
6295
+ {"current_steps": 31395, "total_steps": 38160, "loss": 0.4484, "lr": 4.637289618633711e-06, "epoch": 16.45440251572327, "percentage": 82.27, "elapsed_time": "1:34:46", "remaining_time": "0:20:25", "throughput": 3599.38, "total_tokens": 20468584}
6296
+ {"current_steps": 31400, "total_steps": 38160, "loss": 0.5058, "lr": 4.630658128388948e-06, "epoch": 16.457023060796647, "percentage": 82.29, "elapsed_time": "1:34:47", "remaining_time": "0:20:24", "throughput": 3599.28, "total_tokens": 20471336}
6297
+ {"current_steps": 31405, "total_steps": 38160, "loss": 0.437, "lr": 4.624030899178125e-06, "epoch": 16.45964360587002, "percentage": 82.3, "elapsed_time": "1:34:48", "remaining_time": "0:20:23", "throughput": 3599.26, "total_tokens": 20474696}
6298
+ {"current_steps": 31410, "total_steps": 38160, "loss": 0.494, "lr": 4.617407932387569e-06, "epoch": 16.462264150943398, "percentage": 82.31, "elapsed_time": "1:34:49", "remaining_time": "0:20:22", "throughput": 3599.27, "total_tokens": 20477832}
6299
+ {"current_steps": 31415, "total_steps": 38160, "loss": 0.5637, "lr": 4.610789229402743e-06, "epoch": 16.46488469601677, "percentage": 82.32, "elapsed_time": "1:34:50", "remaining_time": "0:20:21", "throughput": 3599.29, "total_tokens": 20481160}
6300
+ {"current_steps": 31420, "total_steps": 38160, "loss": 0.4914, "lr": 4.604174791608193e-06, "epoch": 16.467505241090148, "percentage": 82.34, "elapsed_time": "1:34:51", "remaining_time": "0:20:20", "throughput": 3599.35, "total_tokens": 20484648}
6301
+ {"current_steps": 31425, "total_steps": 38160, "loss": 0.3074, "lr": 4.597564620387587e-06, "epoch": 16.47012578616352, "percentage": 82.35, "elapsed_time": "1:34:52", "remaining_time": "0:20:19", "throughput": 3599.13, "total_tokens": 20486696}
6302
+ {"current_steps": 31430, "total_steps": 38160, "loss": 0.4718, "lr": 4.590958717123708e-06, "epoch": 16.4727463312369, "percentage": 82.36, "elapsed_time": "1:34:52", "remaining_time": "0:20:19", "throughput": 3599.12, "total_tokens": 20489640}
6303
+ {"current_steps": 31435, "total_steps": 38160, "loss": 0.6078, "lr": 4.5843570831984174e-06, "epoch": 16.47536687631027, "percentage": 82.38, "elapsed_time": "1:34:53", "remaining_time": "0:20:18", "throughput": 3599.07, "total_tokens": 20492392}
6304
+ {"current_steps": 31440, "total_steps": 38160, "loss": 0.4301, "lr": 4.577759719992719e-06, "epoch": 16.47798742138365, "percentage": 82.39, "elapsed_time": "1:34:54", "remaining_time": "0:20:17", "throughput": 3599.01, "total_tokens": 20494984}
6305
+ {"current_steps": 31445, "total_steps": 38160, "loss": 0.4828, "lr": 4.571166628886692e-06, "epoch": 16.480607966457022, "percentage": 82.4, "elapsed_time": "1:34:55", "remaining_time": "0:20:16", "throughput": 3598.99, "total_tokens": 20497832}
6306
+ {"current_steps": 31450, "total_steps": 38160, "loss": 0.4014, "lr": 4.564577811259541e-06, "epoch": 16.4832285115304, "percentage": 82.42, "elapsed_time": "1:34:56", "remaining_time": "0:20:15", "throughput": 3598.93, "total_tokens": 20500488}
6307
+ {"current_steps": 31455, "total_steps": 38160, "loss": 0.5122, "lr": 4.557993268489574e-06, "epoch": 16.485849056603772, "percentage": 82.43, "elapsed_time": "1:34:57", "remaining_time": "0:20:14", "throughput": 3598.96, "total_tokens": 20503688}
6308
+ {"current_steps": 31460, "total_steps": 38160, "loss": 0.3941, "lr": 4.551413001954205e-06, "epoch": 16.48846960167715, "percentage": 82.44, "elapsed_time": "1:34:58", "remaining_time": "0:20:13", "throughput": 3599.09, "total_tokens": 20507816}
6309
+ {"current_steps": 31465, "total_steps": 38160, "loss": 0.603, "lr": 4.54483701302994e-06, "epoch": 16.491090146750523, "percentage": 82.46, "elapsed_time": "1:34:58", "remaining_time": "0:20:12", "throughput": 3599.1, "total_tokens": 20510920}
6310
+ {"current_steps": 31470, "total_steps": 38160, "loss": 0.4447, "lr": 4.538265303092415e-06, "epoch": 16.4937106918239, "percentage": 82.47, "elapsed_time": "1:34:59", "remaining_time": "0:20:11", "throughput": 3599.15, "total_tokens": 20514408}
6311
+ {"current_steps": 31475, "total_steps": 38160, "loss": 0.43, "lr": 4.531697873516338e-06, "epoch": 16.496331236897273, "percentage": 82.48, "elapsed_time": "1:35:00", "remaining_time": "0:20:10", "throughput": 3599.28, "total_tokens": 20518472}
6312
+ {"current_steps": 31480, "total_steps": 38160, "loss": 0.601, "lr": 4.525134725675556e-06, "epoch": 16.49895178197065, "percentage": 82.49, "elapsed_time": "1:35:01", "remaining_time": "0:20:09", "throughput": 3599.17, "total_tokens": 20521064}
6313
+ {"current_steps": 31485, "total_steps": 38160, "loss": 0.5395, "lr": 4.518575860943003e-06, "epoch": 16.501572327044027, "percentage": 82.51, "elapsed_time": "1:35:02", "remaining_time": "0:20:08", "throughput": 3599.09, "total_tokens": 20523784}
6314
+ {"current_steps": 31490, "total_steps": 38160, "loss": 0.5134, "lr": 4.512021280690712e-06, "epoch": 16.5041928721174, "percentage": 82.52, "elapsed_time": "1:35:03", "remaining_time": "0:20:08", "throughput": 3599.25, "total_tokens": 20527976}
6315
+ {"current_steps": 31495, "total_steps": 38160, "loss": 0.5806, "lr": 4.505470986289828e-06, "epoch": 16.506813417190777, "percentage": 82.53, "elapsed_time": "1:35:04", "remaining_time": "0:20:07", "throughput": 3599.33, "total_tokens": 20531784}
6316
+ {"current_steps": 31500, "total_steps": 38160, "loss": 0.5234, "lr": 4.498924979110605e-06, "epoch": 16.50943396226415, "percentage": 82.55, "elapsed_time": "1:35:05", "remaining_time": "0:20:06", "throughput": 3599.33, "total_tokens": 20535048}
6317
+ {"current_steps": 31505, "total_steps": 38160, "loss": 0.587, "lr": 4.492383260522384e-06, "epoch": 16.512054507337528, "percentage": 82.56, "elapsed_time": "1:35:06", "remaining_time": "0:20:05", "throughput": 3599.3, "total_tokens": 20537672}
6318
+ {"current_steps": 31510, "total_steps": 38160, "loss": 0.3998, "lr": 4.485845831893626e-06, "epoch": 16.5146750524109, "percentage": 82.57, "elapsed_time": "1:35:06", "remaining_time": "0:20:04", "throughput": 3599.26, "total_tokens": 20540264}
6319
+ {"current_steps": 31515, "total_steps": 38160, "loss": 0.4128, "lr": 4.479312694591878e-06, "epoch": 16.517295597484278, "percentage": 82.59, "elapsed_time": "1:35:07", "remaining_time": "0:20:03", "throughput": 3599.27, "total_tokens": 20543208}
6320
+ {"current_steps": 31520, "total_steps": 38160, "loss": 0.4569, "lr": 4.4727838499838e-06, "epoch": 16.51991614255765, "percentage": 82.6, "elapsed_time": "1:35:08", "remaining_time": "0:20:02", "throughput": 3599.32, "total_tokens": 20546632}
6321
+ {"current_steps": 31525, "total_steps": 38160, "loss": 0.6067, "lr": 4.4662592994351545e-06, "epoch": 16.52253668763103, "percentage": 82.61, "elapsed_time": "1:35:09", "remaining_time": "0:20:01", "throughput": 3599.25, "total_tokens": 20549256}
6322
+ {"current_steps": 31530, "total_steps": 38160, "loss": 0.5909, "lr": 4.4597390443108065e-06, "epoch": 16.5251572327044, "percentage": 82.63, "elapsed_time": "1:35:10", "remaining_time": "0:20:00", "throughput": 3599.19, "total_tokens": 20551880}
6323
+ {"current_steps": 31535, "total_steps": 38160, "loss": 0.459, "lr": 4.453223085974712e-06, "epoch": 16.52777777777778, "percentage": 82.64, "elapsed_time": "1:35:10", "remaining_time": "0:19:59", "throughput": 3599.27, "total_tokens": 20555368}
6324
+ {"current_steps": 31540, "total_steps": 38160, "loss": 0.3561, "lr": 4.4467114257899425e-06, "epoch": 16.530398322851152, "percentage": 82.65, "elapsed_time": "1:35:11", "remaining_time": "0:19:58", "throughput": 3599.21, "total_tokens": 20557864}
6325
+ {"current_steps": 31545, "total_steps": 38160, "loss": 0.4259, "lr": 4.44020406511865e-06, "epoch": 16.53301886792453, "percentage": 82.67, "elapsed_time": "1:35:12", "remaining_time": "0:19:57", "throughput": 3599.34, "total_tokens": 20561800}
6326
+ {"current_steps": 31550, "total_steps": 38160, "loss": 0.5478, "lr": 4.43370100532211e-06, "epoch": 16.535639412997902, "percentage": 82.68, "elapsed_time": "1:35:13", "remaining_time": "0:19:57", "throughput": 3599.36, "total_tokens": 20565128}
6327
+ {"current_steps": 31555, "total_steps": 38160, "loss": 0.4067, "lr": 4.427202247760695e-06, "epoch": 16.53825995807128, "percentage": 82.69, "elapsed_time": "1:35:14", "remaining_time": "0:19:56", "throughput": 3599.44, "total_tokens": 20568648}
6328
+ {"current_steps": 31560, "total_steps": 38160, "loss": 0.4426, "lr": 4.420707793793854e-06, "epoch": 16.540880503144653, "percentage": 82.7, "elapsed_time": "1:35:15", "remaining_time": "0:19:55", "throughput": 3599.45, "total_tokens": 20571816}
6329
+ {"current_steps": 31565, "total_steps": 38160, "loss": 0.5734, "lr": 4.414217644780172e-06, "epoch": 16.54350104821803, "percentage": 82.72, "elapsed_time": "1:35:16", "remaining_time": "0:19:54", "throughput": 3599.59, "total_tokens": 20576136}
6330
+ {"current_steps": 31570, "total_steps": 38160, "loss": 0.544, "lr": 4.407731802077294e-06, "epoch": 16.546121593291403, "percentage": 82.73, "elapsed_time": "1:35:17", "remaining_time": "0:19:53", "throughput": 3599.49, "total_tokens": 20578472}
6331
+ {"current_steps": 31575, "total_steps": 38160, "loss": 0.5688, "lr": 4.401250267041996e-06, "epoch": 16.54874213836478, "percentage": 82.74, "elapsed_time": "1:35:17", "remaining_time": "0:19:52", "throughput": 3599.55, "total_tokens": 20581800}
6332
+ {"current_steps": 31580, "total_steps": 38160, "loss": 0.6797, "lr": 4.394773041030137e-06, "epoch": 16.551362683438157, "percentage": 82.76, "elapsed_time": "1:35:18", "remaining_time": "0:19:51", "throughput": 3599.41, "total_tokens": 20584296}
6333
+ {"current_steps": 31585, "total_steps": 38160, "loss": 0.4675, "lr": 4.388300125396685e-06, "epoch": 16.55398322851153, "percentage": 82.77, "elapsed_time": "1:35:19", "remaining_time": "0:19:50", "throughput": 3599.43, "total_tokens": 20587368}
6334
+ {"current_steps": 31590, "total_steps": 38160, "loss": 0.572, "lr": 4.381831521495699e-06, "epoch": 16.556603773584907, "percentage": 82.78, "elapsed_time": "1:35:20", "remaining_time": "0:19:49", "throughput": 3599.45, "total_tokens": 20590504}
6335
+ {"current_steps": 31595, "total_steps": 38160, "loss": 0.5152, "lr": 4.37536723068033e-06, "epoch": 16.55922431865828, "percentage": 82.8, "elapsed_time": "1:35:21", "remaining_time": "0:19:48", "throughput": 3599.42, "total_tokens": 20593448}
6336
+ {"current_steps": 31600, "total_steps": 38160, "loss": 0.4797, "lr": 4.368907254302837e-06, "epoch": 16.561844863731658, "percentage": 82.81, "elapsed_time": "1:35:22", "remaining_time": "0:19:47", "throughput": 3599.45, "total_tokens": 20596904}
6337
+ {"current_steps": 31605, "total_steps": 38160, "loss": 0.4928, "lr": 4.3624515937145725e-06, "epoch": 16.56446540880503, "percentage": 82.82, "elapsed_time": "1:35:23", "remaining_time": "0:19:46", "throughput": 3599.48, "total_tokens": 20600040}
6338
+ {"current_steps": 31610, "total_steps": 38160, "loss": 0.53, "lr": 4.3560002502659905e-06, "epoch": 16.567085953878408, "percentage": 82.84, "elapsed_time": "1:35:23", "remaining_time": "0:19:46", "throughput": 3599.52, "total_tokens": 20603112}
6339
+ {"current_steps": 31615, "total_steps": 38160, "loss": 0.4788, "lr": 4.349553225306644e-06, "epoch": 16.56970649895178, "percentage": 82.85, "elapsed_time": "1:35:24", "remaining_time": "0:19:45", "throughput": 3599.61, "total_tokens": 20606760}
6340
+ {"current_steps": 31620, "total_steps": 38160, "loss": 0.4764, "lr": 4.343110520185167e-06, "epoch": 16.572327044025158, "percentage": 82.86, "elapsed_time": "1:35:25", "remaining_time": "0:19:44", "throughput": 3599.61, "total_tokens": 20609896}
6341
+ {"current_steps": 31625, "total_steps": 38160, "loss": 0.5529, "lr": 4.336672136249298e-06, "epoch": 16.57494758909853, "percentage": 82.87, "elapsed_time": "1:35:26", "remaining_time": "0:19:43", "throughput": 3599.57, "total_tokens": 20612712}
6342
+ {"current_steps": 31630, "total_steps": 38160, "loss": 0.7489, "lr": 4.33023807484588e-06, "epoch": 16.57756813417191, "percentage": 82.89, "elapsed_time": "1:35:27", "remaining_time": "0:19:42", "throughput": 3599.63, "total_tokens": 20616488}
6343
+ {"current_steps": 31635, "total_steps": 38160, "loss": 0.5996, "lr": 4.323808337320842e-06, "epoch": 16.580188679245282, "percentage": 82.9, "elapsed_time": "1:35:28", "remaining_time": "0:19:41", "throughput": 3599.71, "total_tokens": 20619944}
6344
+ {"current_steps": 31640, "total_steps": 38160, "loss": 0.4943, "lr": 4.31738292501922e-06, "epoch": 16.58280922431866, "percentage": 82.91, "elapsed_time": "1:35:29", "remaining_time": "0:19:40", "throughput": 3599.72, "total_tokens": 20622952}
6345
+ {"current_steps": 31645, "total_steps": 38160, "loss": 0.38, "lr": 4.31096183928513e-06, "epoch": 16.585429769392032, "percentage": 82.93, "elapsed_time": "1:35:29", "remaining_time": "0:19:39", "throughput": 3599.79, "total_tokens": 20626248}
6346
+ {"current_steps": 31650, "total_steps": 38160, "loss": 0.3859, "lr": 4.304545081461783e-06, "epoch": 16.58805031446541, "percentage": 82.94, "elapsed_time": "1:35:30", "remaining_time": "0:19:38", "throughput": 3599.8, "total_tokens": 20630088}
6347
+ {"current_steps": 31655, "total_steps": 38160, "loss": 0.5118, "lr": 4.298132652891501e-06, "epoch": 16.590670859538783, "percentage": 82.95, "elapsed_time": "1:35:31", "remaining_time": "0:19:37", "throughput": 3599.75, "total_tokens": 20632840}
6348
+ {"current_steps": 31660, "total_steps": 38160, "loss": 0.4575, "lr": 4.291724554915688e-06, "epoch": 16.59329140461216, "percentage": 82.97, "elapsed_time": "1:35:32", "remaining_time": "0:19:36", "throughput": 3599.73, "total_tokens": 20635720}
6349
+ {"current_steps": 31665, "total_steps": 38160, "loss": 0.3942, "lr": 4.285320788874853e-06, "epoch": 16.595911949685533, "percentage": 82.98, "elapsed_time": "1:35:33", "remaining_time": "0:19:36", "throughput": 3599.77, "total_tokens": 20639400}
6350
+ {"current_steps": 31670, "total_steps": 38160, "loss": 0.4463, "lr": 4.278921356108581e-06, "epoch": 16.59853249475891, "percentage": 82.99, "elapsed_time": "1:35:34", "remaining_time": "0:19:35", "throughput": 3599.88, "total_tokens": 20643656}
6351
+ {"current_steps": 31675, "total_steps": 38160, "loss": 0.4484, "lr": 4.272526257955556e-06, "epoch": 16.601153039832283, "percentage": 83.01, "elapsed_time": "1:35:35", "remaining_time": "0:19:34", "throughput": 3599.86, "total_tokens": 20646504}
6352
+ {"current_steps": 31680, "total_steps": 38160, "loss": 0.5581, "lr": 4.266135495753567e-06, "epoch": 16.60377358490566, "percentage": 83.02, "elapsed_time": "1:35:36", "remaining_time": "0:19:33", "throughput": 3599.93, "total_tokens": 20650120}
6353
+ {"current_steps": 31685, "total_steps": 38160, "loss": 0.4209, "lr": 4.259749070839491e-06, "epoch": 16.606394129979037, "percentage": 83.03, "elapsed_time": "1:35:37", "remaining_time": "0:19:32", "throughput": 3599.89, "total_tokens": 20653064}
6354
+ {"current_steps": 31690, "total_steps": 38160, "loss": 0.4589, "lr": 4.253366984549289e-06, "epoch": 16.60901467505241, "percentage": 83.05, "elapsed_time": "1:35:38", "remaining_time": "0:19:31", "throughput": 3600.0, "total_tokens": 20657160}
6355
+ {"current_steps": 31695, "total_steps": 38160, "loss": 0.3705, "lr": 4.24698923821803e-06, "epoch": 16.611635220125788, "percentage": 83.06, "elapsed_time": "1:35:38", "remaining_time": "0:19:30", "throughput": 3599.96, "total_tokens": 20659912}
6356
+ {"current_steps": 31700, "total_steps": 38160, "loss": 0.3702, "lr": 4.240615833179861e-06, "epoch": 16.61425576519916, "percentage": 83.07, "elapsed_time": "1:35:39", "remaining_time": "0:19:29", "throughput": 3599.89, "total_tokens": 20662312}
6357
+ {"current_steps": 31705, "total_steps": 38160, "loss": 0.4458, "lr": 4.234246770768016e-06, "epoch": 16.616876310272538, "percentage": 83.08, "elapsed_time": "1:35:40", "remaining_time": "0:19:28", "throughput": 3599.97, "total_tokens": 20666248}
6358
+ {"current_steps": 31710, "total_steps": 38160, "loss": 0.4558, "lr": 4.227882052314841e-06, "epoch": 16.61949685534591, "percentage": 83.1, "elapsed_time": "1:35:41", "remaining_time": "0:19:27", "throughput": 3600.12, "total_tokens": 20670888}
6359
+ {"current_steps": 31715, "total_steps": 38160, "loss": 0.4604, "lr": 4.2215216791517575e-06, "epoch": 16.622117400419288, "percentage": 83.11, "elapsed_time": "1:35:42", "remaining_time": "0:19:26", "throughput": 3600.04, "total_tokens": 20673320}
6360
+ {"current_steps": 31720, "total_steps": 38160, "loss": 0.3571, "lr": 4.215165652609293e-06, "epoch": 16.62473794549266, "percentage": 83.12, "elapsed_time": "1:35:43", "remaining_time": "0:19:26", "throughput": 3600.08, "total_tokens": 20676552}
6361
+ {"current_steps": 31725, "total_steps": 38160, "loss": 0.4818, "lr": 4.208813974017048e-06, "epoch": 16.62735849056604, "percentage": 83.14, "elapsed_time": "1:35:44", "remaining_time": "0:19:25", "throughput": 3599.94, "total_tokens": 20678792}
6362
+ {"current_steps": 31730, "total_steps": 38160, "loss": 0.3948, "lr": 4.202466644703712e-06, "epoch": 16.629979035639412, "percentage": 83.15, "elapsed_time": "1:35:45", "remaining_time": "0:19:24", "throughput": 3600.06, "total_tokens": 20683240}
6363
+ {"current_steps": 31735, "total_steps": 38160, "loss": 0.5781, "lr": 4.196123665997087e-06, "epoch": 16.63259958071279, "percentage": 83.16, "elapsed_time": "1:35:46", "remaining_time": "0:19:23", "throughput": 3600.08, "total_tokens": 20686536}
6364
+ {"current_steps": 31740, "total_steps": 38160, "loss": 0.5214, "lr": 4.189785039224045e-06, "epoch": 16.635220125786162, "percentage": 83.18, "elapsed_time": "1:35:47", "remaining_time": "0:19:22", "throughput": 3600.09, "total_tokens": 20689832}
6365
+ {"current_steps": 31745, "total_steps": 38160, "loss": 0.554, "lr": 4.183450765710567e-06, "epoch": 16.63784067085954, "percentage": 83.19, "elapsed_time": "1:35:47", "remaining_time": "0:19:21", "throughput": 3600.09, "total_tokens": 20692648}
6366
+ {"current_steps": 31750, "total_steps": 38160, "loss": 0.4978, "lr": 4.177120846781693e-06, "epoch": 16.640461215932913, "percentage": 83.2, "elapsed_time": "1:35:48", "remaining_time": "0:19:20", "throughput": 3600.01, "total_tokens": 20695048}
6367
+ {"current_steps": 31755, "total_steps": 38160, "loss": 0.4326, "lr": 4.170795283761583e-06, "epoch": 16.64308176100629, "percentage": 83.22, "elapsed_time": "1:35:49", "remaining_time": "0:19:19", "throughput": 3600.06, "total_tokens": 20698216}
6368
+ {"current_steps": 31760, "total_steps": 38160, "loss": 0.5203, "lr": 4.164474077973462e-06, "epoch": 16.645702306079663, "percentage": 83.23, "elapsed_time": "1:35:50", "remaining_time": "0:19:18", "throughput": 3600.15, "total_tokens": 20702024}
6369
+ {"current_steps": 31765, "total_steps": 38160, "loss": 0.3613, "lr": 4.158157230739659e-06, "epoch": 16.64832285115304, "percentage": 83.24, "elapsed_time": "1:35:51", "remaining_time": "0:19:17", "throughput": 3600.14, "total_tokens": 20705032}
6370
+ {"current_steps": 31770, "total_steps": 38160, "loss": 0.4001, "lr": 4.151844743381589e-06, "epoch": 16.650943396226417, "percentage": 83.25, "elapsed_time": "1:35:52", "remaining_time": "0:19:16", "throughput": 3600.15, "total_tokens": 20708072}
6371
+ {"current_steps": 31775, "total_steps": 38160, "loss": 0.4747, "lr": 4.145536617219756e-06, "epoch": 16.65356394129979, "percentage": 83.27, "elapsed_time": "1:35:52", "remaining_time": "0:19:15", "throughput": 3600.22, "total_tokens": 20711432}
6372
+ {"current_steps": 31780, "total_steps": 38160, "loss": 0.5317, "lr": 4.139232853573735e-06, "epoch": 16.656184486373167, "percentage": 83.28, "elapsed_time": "1:35:53", "remaining_time": "0:19:15", "throughput": 3600.32, "total_tokens": 20715112}
6373
+ {"current_steps": 31785, "total_steps": 38160, "loss": 0.4594, "lr": 4.132933453762214e-06, "epoch": 16.65880503144654, "percentage": 83.29, "elapsed_time": "1:35:54", "remaining_time": "0:19:14", "throughput": 3600.29, "total_tokens": 20718024}
6374
+ {"current_steps": 31790, "total_steps": 38160, "loss": 0.448, "lr": 4.126638419102946e-06, "epoch": 16.661425576519918, "percentage": 83.31, "elapsed_time": "1:35:55", "remaining_time": "0:19:13", "throughput": 3600.32, "total_tokens": 20721192}
6375
+ {"current_steps": 31795, "total_steps": 38160, "loss": 0.411, "lr": 4.120347750912784e-06, "epoch": 16.66404612159329, "percentage": 83.32, "elapsed_time": "1:35:56", "remaining_time": "0:19:12", "throughput": 3600.22, "total_tokens": 20723656}
6376
+ {"current_steps": 31800, "total_steps": 38160, "loss": 0.3969, "lr": 4.1140614505076724e-06, "epoch": 16.666666666666668, "percentage": 83.33, "elapsed_time": "1:35:57", "remaining_time": "0:19:11", "throughput": 3600.11, "total_tokens": 20726088}
6377
+ {"current_steps": 31805, "total_steps": 38160, "loss": 0.4148, "lr": 4.10777951920262e-06, "epoch": 16.66928721174004, "percentage": 83.35, "elapsed_time": "1:35:57", "remaining_time": "0:19:10", "throughput": 3600.08, "total_tokens": 20728840}
6378
+ {"current_steps": 31810, "total_steps": 38160, "loss": 0.3664, "lr": 4.1015019583117455e-06, "epoch": 16.671907756813418, "percentage": 83.36, "elapsed_time": "1:35:58", "remaining_time": "0:19:09", "throughput": 3600.12, "total_tokens": 20732424}
6379
+ {"current_steps": 31815, "total_steps": 38160, "loss": 0.3617, "lr": 4.095228769148232e-06, "epoch": 16.67452830188679, "percentage": 83.37, "elapsed_time": "1:36:00", "remaining_time": "0:19:08", "throughput": 3600.37, "total_tokens": 20738184}
6380
+ {"current_steps": 31820, "total_steps": 38160, "loss": 0.3885, "lr": 4.088959953024368e-06, "epoch": 16.67714884696017, "percentage": 83.39, "elapsed_time": "1:36:01", "remaining_time": "0:19:07", "throughput": 3600.58, "total_tokens": 20743464}
6381
+ {"current_steps": 31825, "total_steps": 38160, "loss": 0.4491, "lr": 4.082695511251522e-06, "epoch": 16.679769392033542, "percentage": 83.4, "elapsed_time": "1:36:02", "remaining_time": "0:19:06", "throughput": 3600.57, "total_tokens": 20746536}
6382
+ {"current_steps": 31830, "total_steps": 38160, "loss": 0.5639, "lr": 4.076435445140131e-06, "epoch": 16.68238993710692, "percentage": 83.41, "elapsed_time": "1:36:02", "remaining_time": "0:19:06", "throughput": 3600.6, "total_tokens": 20749896}
6383
+ {"current_steps": 31835, "total_steps": 38160, "loss": 0.4646, "lr": 4.070179755999737e-06, "epoch": 16.685010482180292, "percentage": 83.43, "elapsed_time": "1:36:03", "remaining_time": "0:19:05", "throughput": 3600.69, "total_tokens": 20753448}
6384
+ {"current_steps": 31840, "total_steps": 38160, "loss": 0.5061, "lr": 4.063928445138962e-06, "epoch": 16.68763102725367, "percentage": 83.44, "elapsed_time": "1:36:04", "remaining_time": "0:19:04", "throughput": 3600.73, "total_tokens": 20757032}
6385
+ {"current_steps": 31845, "total_steps": 38160, "loss": 0.374, "lr": 4.057681513865499e-06, "epoch": 16.690251572327043, "percentage": 83.45, "elapsed_time": "1:36:05", "remaining_time": "0:19:03", "throughput": 3600.75, "total_tokens": 20760136}
6386
+ {"current_steps": 31850, "total_steps": 38160, "loss": 0.5534, "lr": 4.0514389634861426e-06, "epoch": 16.69287211740042, "percentage": 83.46, "elapsed_time": "1:36:06", "remaining_time": "0:19:02", "throughput": 3600.76, "total_tokens": 20763144}
6387
+ {"current_steps": 31855, "total_steps": 38160, "loss": 0.5072, "lr": 4.045200795306767e-06, "epoch": 16.695492662473793, "percentage": 83.48, "elapsed_time": "1:36:07", "remaining_time": "0:19:01", "throughput": 3600.8, "total_tokens": 20766248}
6388
+ {"current_steps": 31860, "total_steps": 38160, "loss": 0.5404, "lr": 4.038967010632314e-06, "epoch": 16.69811320754717, "percentage": 83.49, "elapsed_time": "1:36:07", "remaining_time": "0:19:00", "throughput": 3600.82, "total_tokens": 20769160}
6389
+ {"current_steps": 31865, "total_steps": 38160, "loss": 0.4173, "lr": 4.032737610766826e-06, "epoch": 16.700733752620543, "percentage": 83.5, "elapsed_time": "1:36:08", "remaining_time": "0:18:59", "throughput": 3600.96, "total_tokens": 20773384}
6390
+ {"current_steps": 31870, "total_steps": 38160, "loss": 0.5919, "lr": 4.026512597013429e-06, "epoch": 16.70335429769392, "percentage": 83.52, "elapsed_time": "1:36:09", "remaining_time": "0:18:58", "throughput": 3600.99, "total_tokens": 20777416}
6391
+ {"current_steps": 31875, "total_steps": 38160, "loss": 0.5756, "lr": 4.020291970674312e-06, "epoch": 16.705974842767297, "percentage": 83.53, "elapsed_time": "1:36:10", "remaining_time": "0:18:57", "throughput": 3601.05, "total_tokens": 20780808}
6392
+ {"current_steps": 31880, "total_steps": 38160, "loss": 0.3318, "lr": 4.0140757330507705e-06, "epoch": 16.70859538784067, "percentage": 83.54, "elapsed_time": "1:36:11", "remaining_time": "0:18:56", "throughput": 3601.05, "total_tokens": 20784040}
6393
+ {"current_steps": 31885, "total_steps": 38160, "loss": 0.4676, "lr": 4.007863885443161e-06, "epoch": 16.711215932914047, "percentage": 83.56, "elapsed_time": "1:36:12", "remaining_time": "0:18:56", "throughput": 3601.05, "total_tokens": 20787144}
6394
+ {"current_steps": 31890, "total_steps": 38160, "loss": 0.455, "lr": 4.001656429150935e-06, "epoch": 16.71383647798742, "percentage": 83.57, "elapsed_time": "1:36:13", "remaining_time": "0:18:55", "throughput": 3601.08, "total_tokens": 20790408}
6395
+ {"current_steps": 31895, "total_steps": 38160, "loss": 0.5084, "lr": 3.9954533654726235e-06, "epoch": 16.716457023060798, "percentage": 83.58, "elapsed_time": "1:36:14", "remaining_time": "0:18:54", "throughput": 3601.1, "total_tokens": 20793768}
6396
+ {"current_steps": 31900, "total_steps": 38160, "loss": 0.4323, "lr": 3.989254695705841e-06, "epoch": 16.71907756813417, "percentage": 83.6, "elapsed_time": "1:36:15", "remaining_time": "0:18:53", "throughput": 3601.25, "total_tokens": 20799624}
6397
+ {"current_steps": 31905, "total_steps": 38160, "loss": 0.5124, "lr": 3.983060421147272e-06, "epoch": 16.721698113207548, "percentage": 83.61, "elapsed_time": "1:36:16", "remaining_time": "0:18:52", "throughput": 3601.19, "total_tokens": 20802152}
6398
+ {"current_steps": 31910, "total_steps": 38160, "loss": 0.488, "lr": 3.976870543092684e-06, "epoch": 16.72431865828092, "percentage": 83.62, "elapsed_time": "1:36:17", "remaining_time": "0:18:51", "throughput": 3601.14, "total_tokens": 20804776}
6399
+ {"current_steps": 31915, "total_steps": 38160, "loss": 0.5178, "lr": 3.970685062836932e-06, "epoch": 16.7269392033543, "percentage": 83.63, "elapsed_time": "1:36:18", "remaining_time": "0:18:50", "throughput": 3601.12, "total_tokens": 20807880}
6400
+ {"current_steps": 31920, "total_steps": 38160, "loss": 0.4799, "lr": 3.96450398167395e-06, "epoch": 16.729559748427672, "percentage": 83.65, "elapsed_time": "1:36:18", "remaining_time": "0:18:49", "throughput": 3601.08, "total_tokens": 20810568}
6401
+ {"current_steps": 31925, "total_steps": 38160, "loss": 0.4796, "lr": 3.958327300896758e-06, "epoch": 16.73218029350105, "percentage": 83.66, "elapsed_time": "1:36:20", "remaining_time": "0:18:48", "throughput": 3601.25, "total_tokens": 20815368}
6402
+ {"current_steps": 31930, "total_steps": 38160, "loss": 0.5717, "lr": 3.952155021797429e-06, "epoch": 16.734800838574422, "percentage": 83.67, "elapsed_time": "1:36:20", "remaining_time": "0:18:47", "throughput": 3601.29, "total_tokens": 20818472}
6403
+ {"current_steps": 31935, "total_steps": 38160, "loss": 0.4808, "lr": 3.945987145667151e-06, "epoch": 16.7374213836478, "percentage": 83.69, "elapsed_time": "1:36:21", "remaining_time": "0:18:47", "throughput": 3601.38, "total_tokens": 20822120}
6404
+ {"current_steps": 31940, "total_steps": 38160, "loss": 0.4669, "lr": 3.939823673796162e-06, "epoch": 16.740041928721173, "percentage": 83.7, "elapsed_time": "1:36:22", "remaining_time": "0:18:46", "throughput": 3601.41, "total_tokens": 20825160}
6405
+ {"current_steps": 31945, "total_steps": 38160, "loss": 0.6465, "lr": 3.933664607473789e-06, "epoch": 16.74266247379455, "percentage": 83.71, "elapsed_time": "1:36:23", "remaining_time": "0:18:45", "throughput": 3601.56, "total_tokens": 20830056}
6406
+ {"current_steps": 31950, "total_steps": 38160, "loss": 0.5273, "lr": 3.927509947988445e-06, "epoch": 16.745283018867923, "percentage": 83.73, "elapsed_time": "1:36:24", "remaining_time": "0:18:44", "throughput": 3601.55, "total_tokens": 20833032}
6407
+ {"current_steps": 31955, "total_steps": 38160, "loss": 0.4649, "lr": 3.9213596966276164e-06, "epoch": 16.7479035639413, "percentage": 83.74, "elapsed_time": "1:36:25", "remaining_time": "0:18:43", "throughput": 3601.58, "total_tokens": 20836200}
6408
+ {"current_steps": 31960, "total_steps": 38160, "loss": 0.4861, "lr": 3.9152138546778625e-06, "epoch": 16.750524109014677, "percentage": 83.75, "elapsed_time": "1:36:26", "remaining_time": "0:18:42", "throughput": 3601.63, "total_tokens": 20839784}
6409
+ {"current_steps": 31965, "total_steps": 38160, "loss": 0.3931, "lr": 3.9090724234248175e-06, "epoch": 16.75314465408805, "percentage": 83.77, "elapsed_time": "1:36:27", "remaining_time": "0:18:41", "throughput": 3601.66, "total_tokens": 20842952}
6410
+ {"current_steps": 31970, "total_steps": 38160, "loss": 0.5345, "lr": 3.902935404153202e-06, "epoch": 16.755765199161427, "percentage": 83.78, "elapsed_time": "1:36:27", "remaining_time": "0:18:40", "throughput": 3601.7, "total_tokens": 20846344}
6411
+ {"current_steps": 31975, "total_steps": 38160, "loss": 0.4254, "lr": 3.896802798146809e-06, "epoch": 16.7583857442348, "percentage": 83.79, "elapsed_time": "1:36:28", "remaining_time": "0:18:39", "throughput": 3601.71, "total_tokens": 20849512}
6412
+ {"current_steps": 31980, "total_steps": 38160, "loss": 0.6267, "lr": 3.890674606688524e-06, "epoch": 16.761006289308177, "percentage": 83.81, "elapsed_time": "1:36:29", "remaining_time": "0:18:38", "throughput": 3601.73, "total_tokens": 20852744}
6413
+ {"current_steps": 31985, "total_steps": 38160, "loss": 0.5332, "lr": 3.88455083106028e-06, "epoch": 16.76362683438155, "percentage": 83.82, "elapsed_time": "1:36:30", "remaining_time": "0:18:37", "throughput": 3601.8, "total_tokens": 20856392}
6414
+ {"current_steps": 31990, "total_steps": 38160, "loss": 0.4882, "lr": 3.878431472543098e-06, "epoch": 16.766247379454928, "percentage": 83.83, "elapsed_time": "1:36:31", "remaining_time": "0:18:36", "throughput": 3601.74, "total_tokens": 20859048}
6415
+ {"current_steps": 31995, "total_steps": 38160, "loss": 0.4607, "lr": 3.872316532417086e-06, "epoch": 16.7688679245283, "percentage": 83.84, "elapsed_time": "1:36:32", "remaining_time": "0:18:36", "throughput": 3601.74, "total_tokens": 20861992}
6416
+ {"current_steps": 32000, "total_steps": 38160, "loss": 0.5172, "lr": 3.866206011961418e-06, "epoch": 16.771488469601678, "percentage": 83.86, "elapsed_time": "1:36:33", "remaining_time": "0:18:35", "throughput": 3601.69, "total_tokens": 20864904}
6417
+ {"current_steps": 32005, "total_steps": 38160, "loss": 0.5016, "lr": 3.860099912454346e-06, "epoch": 16.77410901467505, "percentage": 83.87, "elapsed_time": "1:36:33", "remaining_time": "0:18:34", "throughput": 3601.69, "total_tokens": 20867976}
6418
+ {"current_steps": 32010, "total_steps": 38160, "loss": 0.4949, "lr": 3.853998235173201e-06, "epoch": 16.77672955974843, "percentage": 83.88, "elapsed_time": "1:36:34", "remaining_time": "0:18:33", "throughput": 3601.73, "total_tokens": 20871464}
6419
+ {"current_steps": 32015, "total_steps": 38160, "loss": 0.5167, "lr": 3.847900981394379e-06, "epoch": 16.779350104821802, "percentage": 83.9, "elapsed_time": "1:36:35", "remaining_time": "0:18:32", "throughput": 3601.74, "total_tokens": 20874984}
6420
+ {"current_steps": 32020, "total_steps": 38160, "loss": 0.3827, "lr": 3.841808152393348e-06, "epoch": 16.78197064989518, "percentage": 83.91, "elapsed_time": "1:36:36", "remaining_time": "0:18:31", "throughput": 3601.71, "total_tokens": 20877832}
6421
+ {"current_steps": 32025, "total_steps": 38160, "loss": 0.6136, "lr": 3.835719749444666e-06, "epoch": 16.784591194968552, "percentage": 83.92, "elapsed_time": "1:36:37", "remaining_time": "0:18:30", "throughput": 3601.74, "total_tokens": 20881096}
6422
+ {"current_steps": 32030, "total_steps": 38160, "loss": 0.5217, "lr": 3.8296357738219606e-06, "epoch": 16.78721174004193, "percentage": 83.94, "elapsed_time": "1:36:38", "remaining_time": "0:18:29", "throughput": 3601.83, "total_tokens": 20884680}
6423
+ {"current_steps": 32035, "total_steps": 38160, "loss": 0.6125, "lr": 3.823556226797931e-06, "epoch": 16.789832285115303, "percentage": 83.95, "elapsed_time": "1:36:39", "remaining_time": "0:18:28", "throughput": 3601.88, "total_tokens": 20888008}
6424
+ {"current_steps": 32040, "total_steps": 38160, "loss": 0.4494, "lr": 3.817481109644344e-06, "epoch": 16.79245283018868, "percentage": 83.96, "elapsed_time": "1:36:40", "remaining_time": "0:18:27", "throughput": 3601.93, "total_tokens": 20891240}
6425
+ {"current_steps": 32045, "total_steps": 38160, "loss": 0.3849, "lr": 3.811410423632042e-06, "epoch": 16.795073375262053, "percentage": 83.98, "elapsed_time": "1:36:40", "remaining_time": "0:18:26", "throughput": 3602.01, "total_tokens": 20894664}
6426
+ {"current_steps": 32050, "total_steps": 38160, "loss": 0.5257, "lr": 3.8053441700309473e-06, "epoch": 16.79769392033543, "percentage": 83.99, "elapsed_time": "1:36:41", "remaining_time": "0:18:26", "throughput": 3602.02, "total_tokens": 20897608}
6427
+ {"current_steps": 32055, "total_steps": 38160, "loss": 0.4725, "lr": 3.7992823501100507e-06, "epoch": 16.800314465408803, "percentage": 84.0, "elapsed_time": "1:36:42", "remaining_time": "0:18:25", "throughput": 3602.1, "total_tokens": 20901096}
6428
+ {"current_steps": 32060, "total_steps": 38160, "loss": 0.4408, "lr": 3.7932249651374207e-06, "epoch": 16.80293501048218, "percentage": 84.01, "elapsed_time": "1:36:43", "remaining_time": "0:18:24", "throughput": 3602.15, "total_tokens": 20904520}
6429
+ {"current_steps": 32065, "total_steps": 38160, "loss": 0.4151, "lr": 3.787172016380183e-06, "epoch": 16.805555555555557, "percentage": 84.03, "elapsed_time": "1:36:44", "remaining_time": "0:18:23", "throughput": 3602.18, "total_tokens": 20907592}
6430
+ {"current_steps": 32070, "total_steps": 38160, "loss": 0.4431, "lr": 3.7811235051045558e-06, "epoch": 16.80817610062893, "percentage": 84.04, "elapsed_time": "1:36:45", "remaining_time": "0:18:22", "throughput": 3602.31, "total_tokens": 20911752}
6431
+ {"current_steps": 32075, "total_steps": 38160, "loss": 0.4977, "lr": 3.775079432575809e-06, "epoch": 16.810796645702307, "percentage": 84.05, "elapsed_time": "1:36:45", "remaining_time": "0:18:21", "throughput": 3602.3, "total_tokens": 20914728}
6432
+ {"current_steps": 32080, "total_steps": 38160, "loss": 0.4527, "lr": 3.7690398000582964e-06, "epoch": 16.81341719077568, "percentage": 84.07, "elapsed_time": "1:36:46", "remaining_time": "0:18:20", "throughput": 3602.32, "total_tokens": 20917704}
6433
+ {"current_steps": 32085, "total_steps": 38160, "loss": 0.5096, "lr": 3.763004608815443e-06, "epoch": 16.816037735849058, "percentage": 84.08, "elapsed_time": "1:36:47", "remaining_time": "0:18:19", "throughput": 3602.36, "total_tokens": 20920808}
6434
+ {"current_steps": 32090, "total_steps": 38160, "loss": 0.4685, "lr": 3.756973860109747e-06, "epoch": 16.81865828092243, "percentage": 84.09, "elapsed_time": "1:36:48", "remaining_time": "0:18:18", "throughput": 3602.33, "total_tokens": 20923528}
6435
+ {"current_steps": 32095, "total_steps": 38160, "loss": 0.453, "lr": 3.7509475552027663e-06, "epoch": 16.821278825995808, "percentage": 84.11, "elapsed_time": "1:36:49", "remaining_time": "0:18:17", "throughput": 3602.29, "total_tokens": 20926376}
6436
+ {"current_steps": 32100, "total_steps": 38160, "loss": 0.4201, "lr": 3.744925695355131e-06, "epoch": 16.82389937106918, "percentage": 84.12, "elapsed_time": "1:36:49", "remaining_time": "0:18:16", "throughput": 3602.35, "total_tokens": 20929672}
6437
+ {"current_steps": 32105, "total_steps": 38160, "loss": 0.5628, "lr": 3.7389082818265476e-06, "epoch": 16.82651991614256, "percentage": 84.13, "elapsed_time": "1:36:50", "remaining_time": "0:18:15", "throughput": 3602.34, "total_tokens": 20932616}
6438
+ {"current_steps": 32110, "total_steps": 38160, "loss": 0.4132, "lr": 3.732895315875792e-06, "epoch": 16.829140461215932, "percentage": 84.15, "elapsed_time": "1:36:51", "remaining_time": "0:18:14", "throughput": 3602.33, "total_tokens": 20935400}
6439
+ {"current_steps": 32115, "total_steps": 38160, "loss": 0.3976, "lr": 3.726886798760715e-06, "epoch": 16.83176100628931, "percentage": 84.16, "elapsed_time": "1:36:52", "remaining_time": "0:18:14", "throughput": 3602.4, "total_tokens": 20938824}
6440
+ {"current_steps": 32120, "total_steps": 38160, "loss": 0.614, "lr": 3.720882731738215e-06, "epoch": 16.834381551362682, "percentage": 84.17, "elapsed_time": "1:36:53", "remaining_time": "0:18:13", "throughput": 3602.42, "total_tokens": 20941960}
6441
+ {"current_steps": 32125, "total_steps": 38160, "loss": 0.4092, "lr": 3.714883116064291e-06, "epoch": 16.83700209643606, "percentage": 84.19, "elapsed_time": "1:36:54", "remaining_time": "0:18:12", "throughput": 3602.66, "total_tokens": 20948712}
6442
+ {"current_steps": 32130, "total_steps": 38160, "loss": 0.5122, "lr": 3.708887952993978e-06, "epoch": 16.839622641509433, "percentage": 84.2, "elapsed_time": "1:36:55", "remaining_time": "0:18:11", "throughput": 3602.6, "total_tokens": 20951592}
6443
+ {"current_steps": 32135, "total_steps": 38160, "loss": 0.5574, "lr": 3.7028972437814057e-06, "epoch": 16.84224318658281, "percentage": 84.21, "elapsed_time": "1:36:56", "remaining_time": "0:18:10", "throughput": 3602.73, "total_tokens": 20955880}
6444
+ {"current_steps": 32140, "total_steps": 38160, "loss": 0.4521, "lr": 3.696910989679764e-06, "epoch": 16.844863731656183, "percentage": 84.22, "elapsed_time": "1:36:57", "remaining_time": "0:18:09", "throughput": 3602.8, "total_tokens": 20959112}
6445
+ {"current_steps": 32145, "total_steps": 38160, "loss": 0.3662, "lr": 3.6909291919413003e-06, "epoch": 16.84748427672956, "percentage": 84.24, "elapsed_time": "1:36:58", "remaining_time": "0:18:08", "throughput": 3602.79, "total_tokens": 20961992}
6446
+ {"current_steps": 32150, "total_steps": 38160, "loss": 0.3773, "lr": 3.684951851817342e-06, "epoch": 16.850104821802937, "percentage": 84.25, "elapsed_time": "1:36:59", "remaining_time": "0:18:07", "throughput": 3602.76, "total_tokens": 20964776}
6447
+ {"current_steps": 32155, "total_steps": 38160, "loss": 0.4922, "lr": 3.678978970558286e-06, "epoch": 16.85272536687631, "percentage": 84.26, "elapsed_time": "1:36:59", "remaining_time": "0:18:06", "throughput": 3602.8, "total_tokens": 20967944}
6448
+ {"current_steps": 32160, "total_steps": 38160, "loss": 0.4834, "lr": 3.673010549413583e-06, "epoch": 16.855345911949687, "percentage": 84.28, "elapsed_time": "1:37:00", "remaining_time": "0:18:05", "throughput": 3602.9, "total_tokens": 20971976}
6449
+ {"current_steps": 32165, "total_steps": 38160, "loss": 0.4311, "lr": 3.6670465896317592e-06, "epoch": 16.85796645702306, "percentage": 84.29, "elapsed_time": "1:37:01", "remaining_time": "0:18:05", "throughput": 3602.91, "total_tokens": 20975208}
6450
+ {"current_steps": 32170, "total_steps": 38160, "loss": 0.5082, "lr": 3.661087092460419e-06, "epoch": 16.860587002096437, "percentage": 84.3, "elapsed_time": "1:37:02", "remaining_time": "0:18:04", "throughput": 3602.85, "total_tokens": 20977736}
6451
+ {"current_steps": 32175, "total_steps": 38160, "loss": 0.482, "lr": 3.655132059146202e-06, "epoch": 16.86320754716981, "percentage": 84.32, "elapsed_time": "1:37:03", "remaining_time": "0:18:03", "throughput": 3602.96, "total_tokens": 20981704}
6452
+ {"current_steps": 32180, "total_steps": 38160, "loss": 0.5378, "lr": 3.6491814909348526e-06, "epoch": 16.865828092243188, "percentage": 84.33, "elapsed_time": "1:37:04", "remaining_time": "0:18:02", "throughput": 3603.12, "total_tokens": 20985992}
6453
+ {"current_steps": 32185, "total_steps": 38160, "loss": 0.5568, "lr": 3.6432353890711424e-06, "epoch": 16.86844863731656, "percentage": 84.34, "elapsed_time": "1:37:05", "remaining_time": "0:18:01", "throughput": 3603.16, "total_tokens": 20989544}
6454
+ {"current_steps": 32190, "total_steps": 38160, "loss": 0.5317, "lr": 3.6372937547989425e-06, "epoch": 16.871069182389938, "percentage": 84.36, "elapsed_time": "1:37:06", "remaining_time": "0:18:00", "throughput": 3603.42, "total_tokens": 20996392}
6455
+ {"current_steps": 32195, "total_steps": 38160, "loss": 0.4904, "lr": 3.631356589361173e-06, "epoch": 16.87368972746331, "percentage": 84.37, "elapsed_time": "1:37:07", "remaining_time": "0:17:59", "throughput": 3603.42, "total_tokens": 20999400}
6456
+ {"current_steps": 32200, "total_steps": 38160, "loss": 0.4269, "lr": 3.625423893999813e-06, "epoch": 16.87631027253669, "percentage": 84.38, "elapsed_time": "1:37:08", "remaining_time": "0:17:58", "throughput": 3603.29, "total_tokens": 21001704}
6457
+ {"current_steps": 32205, "total_steps": 38160, "loss": 0.5215, "lr": 3.6194956699559217e-06, "epoch": 16.878930817610062, "percentage": 84.39, "elapsed_time": "1:37:09", "remaining_time": "0:17:57", "throughput": 3603.34, "total_tokens": 21004872}
6458
+ {"current_steps": 32210, "total_steps": 38160, "loss": 0.5331, "lr": 3.6135719184696188e-06, "epoch": 16.88155136268344, "percentage": 84.41, "elapsed_time": "1:37:10", "remaining_time": "0:17:57", "throughput": 3603.5, "total_tokens": 21011656}
6459
+ {"current_steps": 32215, "total_steps": 38160, "loss": 0.4755, "lr": 3.6076526407800766e-06, "epoch": 16.884171907756812, "percentage": 84.42, "elapsed_time": "1:37:11", "remaining_time": "0:17:56", "throughput": 3603.47, "total_tokens": 21014472}
6460
+ {"current_steps": 32220, "total_steps": 38160, "loss": 0.4782, "lr": 3.601737838125552e-06, "epoch": 16.88679245283019, "percentage": 84.43, "elapsed_time": "1:37:12", "remaining_time": "0:17:55", "throughput": 3603.47, "total_tokens": 21017960}
6461
+ {"current_steps": 32225, "total_steps": 38160, "loss": 0.5132, "lr": 3.595827511743341e-06, "epoch": 16.889412997903563, "percentage": 84.45, "elapsed_time": "1:37:13", "remaining_time": "0:17:54", "throughput": 3603.51, "total_tokens": 21021288}
6462
+ {"current_steps": 32230, "total_steps": 38160, "loss": 0.5063, "lr": 3.5899216628698217e-06, "epoch": 16.89203354297694, "percentage": 84.46, "elapsed_time": "1:37:14", "remaining_time": "0:17:53", "throughput": 3603.57, "total_tokens": 21024872}
6463
+ {"current_steps": 32235, "total_steps": 38160, "loss": 0.436, "lr": 3.584020292740431e-06, "epoch": 16.894654088050313, "percentage": 84.47, "elapsed_time": "1:37:15", "remaining_time": "0:17:52", "throughput": 3603.58, "total_tokens": 21028008}
6464
+ {"current_steps": 32240, "total_steps": 38160, "loss": 0.3655, "lr": 3.578123402589675e-06, "epoch": 16.89727463312369, "percentage": 84.49, "elapsed_time": "1:37:16", "remaining_time": "0:17:51", "throughput": 3603.57, "total_tokens": 21030920}
6465
+ {"current_steps": 32245, "total_steps": 38160, "loss": 0.5767, "lr": 3.5722309936511044e-06, "epoch": 16.899895178197063, "percentage": 84.5, "elapsed_time": "1:37:16", "remaining_time": "0:17:50", "throughput": 3603.61, "total_tokens": 21034248}
6466
+ {"current_steps": 32250, "total_steps": 38160, "loss": 0.5081, "lr": 3.5663430671573572e-06, "epoch": 16.90251572327044, "percentage": 84.51, "elapsed_time": "1:37:17", "remaining_time": "0:17:49", "throughput": 3603.66, "total_tokens": 21037672}
6467
+ {"current_steps": 32255, "total_steps": 38160, "loss": 0.5152, "lr": 3.5604596243401038e-06, "epoch": 16.905136268343817, "percentage": 84.53, "elapsed_time": "1:37:18", "remaining_time": "0:17:48", "throughput": 3603.73, "total_tokens": 21041192}
6468
+ {"current_steps": 32260, "total_steps": 38160, "loss": 0.449, "lr": 3.554580666430102e-06, "epoch": 16.90775681341719, "percentage": 84.54, "elapsed_time": "1:37:19", "remaining_time": "0:17:47", "throughput": 3603.81, "total_tokens": 21044488}
6469
+ {"current_steps": 32265, "total_steps": 38160, "loss": 0.5655, "lr": 3.5487061946571614e-06, "epoch": 16.910377358490567, "percentage": 84.55, "elapsed_time": "1:37:20", "remaining_time": "0:17:47", "throughput": 3603.87, "total_tokens": 21047752}
6470
+ {"current_steps": 32270, "total_steps": 38160, "loss": 0.4279, "lr": 3.542836210250161e-06, "epoch": 16.91299790356394, "percentage": 84.56, "elapsed_time": "1:37:21", "remaining_time": "0:17:46", "throughput": 3603.81, "total_tokens": 21050344}
6471
+ {"current_steps": 32275, "total_steps": 38160, "loss": 0.5236, "lr": 3.536970714437032e-06, "epoch": 16.915618448637318, "percentage": 84.58, "elapsed_time": "1:37:22", "remaining_time": "0:17:45", "throughput": 3603.77, "total_tokens": 21053288}
6472
+ {"current_steps": 32280, "total_steps": 38160, "loss": 0.4819, "lr": 3.5311097084447575e-06, "epoch": 16.91823899371069, "percentage": 84.59, "elapsed_time": "1:37:22", "remaining_time": "0:17:44", "throughput": 3603.89, "total_tokens": 21056904}
6473
+ {"current_steps": 32285, "total_steps": 38160, "loss": 0.4443, "lr": 3.5252531934994033e-06, "epoch": 16.920859538784068, "percentage": 84.6, "elapsed_time": "1:37:23", "remaining_time": "0:17:43", "throughput": 3603.88, "total_tokens": 21059848}
6474
+ {"current_steps": 32290, "total_steps": 38160, "loss": 0.4517, "lr": 3.5194011708260866e-06, "epoch": 16.92348008385744, "percentage": 84.62, "elapsed_time": "1:37:24", "remaining_time": "0:17:42", "throughput": 3603.79, "total_tokens": 21062472}
6475
+ {"current_steps": 32295, "total_steps": 38160, "loss": 0.4372, "lr": 3.513553641648984e-06, "epoch": 16.92610062893082, "percentage": 84.63, "elapsed_time": "1:37:25", "remaining_time": "0:17:41", "throughput": 3603.94, "total_tokens": 21067752}
6476
+ {"current_steps": 32300, "total_steps": 38160, "loss": 0.5646, "lr": 3.5077106071913307e-06, "epoch": 16.928721174004192, "percentage": 84.64, "elapsed_time": "1:37:26", "remaining_time": "0:17:40", "throughput": 3603.88, "total_tokens": 21070632}
6477
+ {"current_steps": 32305, "total_steps": 38160, "loss": 0.5367, "lr": 3.5018720686754133e-06, "epoch": 16.93134171907757, "percentage": 84.66, "elapsed_time": "1:37:27", "remaining_time": "0:17:39", "throughput": 3603.99, "total_tokens": 21074568}
6478
+ {"current_steps": 32310, "total_steps": 38160, "loss": 0.4196, "lr": 3.4960380273225985e-06, "epoch": 16.933962264150942, "percentage": 84.67, "elapsed_time": "1:37:28", "remaining_time": "0:17:38", "throughput": 3603.94, "total_tokens": 21077288}
6479
+ {"current_steps": 32315, "total_steps": 38160, "loss": 0.4183, "lr": 3.490208484353297e-06, "epoch": 16.93658280922432, "percentage": 84.68, "elapsed_time": "1:37:29", "remaining_time": "0:17:37", "throughput": 3603.84, "total_tokens": 21079912}
6480
+ {"current_steps": 32320, "total_steps": 38160, "loss": 0.5772, "lr": 3.4843834409869824e-06, "epoch": 16.939203354297693, "percentage": 84.7, "elapsed_time": "1:37:30", "remaining_time": "0:17:37", "throughput": 3603.9, "total_tokens": 21084040}
6481
+ {"current_steps": 32325, "total_steps": 38160, "loss": 0.4995, "lr": 3.4785628984421977e-06, "epoch": 16.94182389937107, "percentage": 84.71, "elapsed_time": "1:37:31", "remaining_time": "0:17:36", "throughput": 3603.97, "total_tokens": 21087528}
6482
+ {"current_steps": 32330, "total_steps": 38160, "loss": 0.4265, "lr": 3.4727468579365235e-06, "epoch": 16.944444444444443, "percentage": 84.72, "elapsed_time": "1:37:32", "remaining_time": "0:17:35", "throughput": 3604.01, "total_tokens": 21090888}
6483
+ {"current_steps": 32335, "total_steps": 38160, "loss": 0.5094, "lr": 3.466935320686604e-06, "epoch": 16.94706498951782, "percentage": 84.74, "elapsed_time": "1:37:32", "remaining_time": "0:17:34", "throughput": 3604.12, "total_tokens": 21094792}
6484
+ {"current_steps": 32340, "total_steps": 38160, "loss": 0.4398, "lr": 3.4611282879081553e-06, "epoch": 16.949685534591197, "percentage": 84.75, "elapsed_time": "1:37:33", "remaining_time": "0:17:33", "throughput": 3604.09, "total_tokens": 21097736}
6485
+ {"current_steps": 32345, "total_steps": 38160, "loss": 0.3297, "lr": 3.455325760815939e-06, "epoch": 16.95230607966457, "percentage": 84.76, "elapsed_time": "1:37:34", "remaining_time": "0:17:32", "throughput": 3604.13, "total_tokens": 21100776}
6486
+ {"current_steps": 32350, "total_steps": 38160, "loss": 0.4952, "lr": 3.4495277406237843e-06, "epoch": 16.954926624737947, "percentage": 84.77, "elapsed_time": "1:37:35", "remaining_time": "0:17:31", "throughput": 3604.18, "total_tokens": 21104040}
6487
+ {"current_steps": 32355, "total_steps": 38160, "loss": 0.4276, "lr": 3.443734228544565e-06, "epoch": 16.95754716981132, "percentage": 84.79, "elapsed_time": "1:37:36", "remaining_time": "0:17:30", "throughput": 3604.17, "total_tokens": 21106856}
6488
+ {"current_steps": 32360, "total_steps": 38160, "loss": 0.4658, "lr": 3.437945225790215e-06, "epoch": 16.960167714884697, "percentage": 84.8, "elapsed_time": "1:37:37", "remaining_time": "0:17:29", "throughput": 3604.16, "total_tokens": 21109768}
6489
+ {"current_steps": 32365, "total_steps": 38160, "loss": 0.4746, "lr": 3.432160733571729e-06, "epoch": 16.96278825995807, "percentage": 84.81, "elapsed_time": "1:37:37", "remaining_time": "0:17:28", "throughput": 3604.25, "total_tokens": 21113512}
6490
+ {"current_steps": 32370, "total_steps": 38160, "loss": 0.4598, "lr": 3.4263807530991595e-06, "epoch": 16.965408805031448, "percentage": 84.83, "elapsed_time": "1:37:38", "remaining_time": "0:17:27", "throughput": 3604.36, "total_tokens": 21117544}
6491
+ {"current_steps": 32375, "total_steps": 38160, "loss": 0.4421, "lr": 3.4206052855816155e-06, "epoch": 16.96802935010482, "percentage": 84.84, "elapsed_time": "1:37:39", "remaining_time": "0:17:27", "throughput": 3604.55, "total_tokens": 21121864}
6492
+ {"current_steps": 32380, "total_steps": 38160, "loss": 0.5123, "lr": 3.4148343322272567e-06, "epoch": 16.970649895178198, "percentage": 84.85, "elapsed_time": "1:37:40", "remaining_time": "0:17:26", "throughput": 3604.48, "total_tokens": 21124328}
6493
+ {"current_steps": 32385, "total_steps": 38160, "loss": 0.3901, "lr": 3.4090678942432923e-06, "epoch": 16.97327044025157, "percentage": 84.87, "elapsed_time": "1:37:41", "remaining_time": "0:17:25", "throughput": 3604.45, "total_tokens": 21127336}
6494
+ {"current_steps": 32390, "total_steps": 38160, "loss": 0.3881, "lr": 3.4033059728360018e-06, "epoch": 16.97589098532495, "percentage": 84.88, "elapsed_time": "1:37:42", "remaining_time": "0:17:24", "throughput": 3604.37, "total_tokens": 21129896}
6495
+ {"current_steps": 32395, "total_steps": 38160, "loss": 0.5074, "lr": 3.397548569210715e-06, "epoch": 16.978511530398322, "percentage": 84.89, "elapsed_time": "1:37:43", "remaining_time": "0:17:23", "throughput": 3604.41, "total_tokens": 21133512}
6496
+ {"current_steps": 32400, "total_steps": 38160, "loss": 0.4813, "lr": 3.3917956845718136e-06, "epoch": 16.9811320754717, "percentage": 84.91, "elapsed_time": "1:37:44", "remaining_time": "0:17:22", "throughput": 3604.28, "total_tokens": 21135816}
6497
+ {"current_steps": 32405, "total_steps": 38160, "loss": 0.3514, "lr": 3.38604732012274e-06, "epoch": 16.983752620545072, "percentage": 84.92, "elapsed_time": "1:37:44", "remaining_time": "0:17:21", "throughput": 3604.2, "total_tokens": 21138536}
6498
+ {"current_steps": 32410, "total_steps": 38160, "loss": 0.5148, "lr": 3.3803034770659825e-06, "epoch": 16.98637316561845, "percentage": 84.93, "elapsed_time": "1:37:45", "remaining_time": "0:17:20", "throughput": 3604.13, "total_tokens": 21141160}
6499
+ {"current_steps": 32415, "total_steps": 38160, "loss": 0.4343, "lr": 3.3745641566030826e-06, "epoch": 16.988993710691823, "percentage": 84.94, "elapsed_time": "1:37:46", "remaining_time": "0:17:19", "throughput": 3604.05, "total_tokens": 21143624}
6500
+ {"current_steps": 32420, "total_steps": 38160, "loss": 0.526, "lr": 3.3688293599346442e-06, "epoch": 16.9916142557652, "percentage": 84.96, "elapsed_time": "1:37:47", "remaining_time": "0:17:18", "throughput": 3604.15, "total_tokens": 21147528}
6501
+ {"current_steps": 32425, "total_steps": 38160, "loss": 0.4656, "lr": 3.3630990882603213e-06, "epoch": 16.994234800838573, "percentage": 84.97, "elapsed_time": "1:37:48", "remaining_time": "0:17:17", "throughput": 3604.15, "total_tokens": 21150376}
6502
+ {"current_steps": 32430, "total_steps": 38160, "loss": 0.5109, "lr": 3.35737334277883e-06, "epoch": 16.99685534591195, "percentage": 84.98, "elapsed_time": "1:37:49", "remaining_time": "0:17:17", "throughput": 3604.29, "total_tokens": 21155016}
6503
+ {"current_steps": 32435, "total_steps": 38160, "loss": 0.3044, "lr": 3.351652124687918e-06, "epoch": 16.999475890985323, "percentage": 85.0, "elapsed_time": "1:37:50", "remaining_time": "0:17:16", "throughput": 3604.29, "total_tokens": 21157992}
6504
+ {"current_steps": 32436, "total_steps": 38160, "eval_loss": 0.46868664026260376, "epoch": 17.0, "percentage": 85.0, "elapsed_time": "1:38:00", "remaining_time": "0:17:17", "throughput": 3598.07, "total_tokens": 21158120}
6505
+ {"current_steps": 32440, "total_steps": 38160, "loss": 0.3522, "lr": 3.345935435184408e-06, "epoch": 17.0020964360587, "percentage": 85.01, "elapsed_time": "1:38:02", "remaining_time": "0:17:17", "throughput": 3597.15, "total_tokens": 21160712}
6506
+ {"current_steps": 32445, "total_steps": 38160, "loss": 0.4398, "lr": 3.3402232754641576e-06, "epoch": 17.004716981132077, "percentage": 85.02, "elapsed_time": "1:38:03", "remaining_time": "0:17:16", "throughput": 3597.06, "total_tokens": 21163528}
6507
+ {"current_steps": 32450, "total_steps": 38160, "loss": 0.573, "lr": 3.334515646722089e-06, "epoch": 17.00733752620545, "percentage": 85.04, "elapsed_time": "1:38:04", "remaining_time": "0:17:15", "throughput": 3597.18, "total_tokens": 21167304}