rbelanec commited on
Commit
c9e3452
·
verified ·
1 Parent(s): 3df0d7c

Training in progress, step 38160

Browse files
Files changed (2) hide show
  1. adapter_model.safetensors +1 -1
  2. trainer_log.jsonl +380 -0
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fba4e4e60cffdac0ba29fd1f8b2988e027328643d0b7ce715498d3b9aa8d6e52
3
  size 819328
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:966de7cbaa31654cc8218deb8a421711fcfda1e0d6d76d3d34a29f697f370e06
3
  size 819328
trainer_log.jsonl CHANGED
@@ -7271,3 +7271,383 @@
7271
  {"current_steps": 36260, "total_steps": 38160, "loss": 0.3818, "lr": 0.00022621887738311474, "epoch": 19.0041928721174, "percentage": 95.02, "elapsed_time": "1:32:29", "remaining_time": "0:04:50", "throughput": 4274.32, "total_tokens": 23720384}
7272
  {"current_steps": 36265, "total_steps": 38160, "loss": 0.4326, "lr": 0.0002250334234984158, "epoch": 19.006813417190777, "percentage": 95.03, "elapsed_time": "1:32:30", "remaining_time": "0:04:50", "throughput": 4274.46, "total_tokens": 23725024}
7273
  {"current_steps": 36270, "total_steps": 38160, "loss": 0.2384, "lr": 0.0002238510603681626, "epoch": 19.00943396226415, "percentage": 95.05, "elapsed_time": "1:32:31", "remaining_time": "0:04:49", "throughput": 4274.51, "total_tokens": 23728640}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7271
  {"current_steps": 36260, "total_steps": 38160, "loss": 0.3818, "lr": 0.00022621887738311474, "epoch": 19.0041928721174, "percentage": 95.02, "elapsed_time": "1:32:29", "remaining_time": "0:04:50", "throughput": 4274.32, "total_tokens": 23720384}
7272
  {"current_steps": 36265, "total_steps": 38160, "loss": 0.4326, "lr": 0.0002250334234984158, "epoch": 19.006813417190777, "percentage": 95.03, "elapsed_time": "1:32:30", "remaining_time": "0:04:50", "throughput": 4274.46, "total_tokens": 23725024}
7273
  {"current_steps": 36270, "total_steps": 38160, "loss": 0.2384, "lr": 0.0002238510603681626, "epoch": 19.00943396226415, "percentage": 95.05, "elapsed_time": "1:32:31", "remaining_time": "0:04:49", "throughput": 4274.51, "total_tokens": 23728640}
7274
+ {"current_steps": 36275, "total_steps": 38160, "loss": 0.5343, "lr": 0.00022267178823969224, "epoch": 19.012054507337528, "percentage": 95.06, "elapsed_time": "1:32:31", "remaining_time": "0:04:48", "throughput": 4274.45, "total_tokens": 23731392}
7275
+ {"current_steps": 36280, "total_steps": 38160, "loss": 0.3901, "lr": 0.00022149560735969576, "epoch": 19.0146750524109, "percentage": 95.07, "elapsed_time": "1:32:32", "remaining_time": "0:04:47", "throughput": 4274.5, "total_tokens": 23734912}
7276
+ {"current_steps": 36285, "total_steps": 38160, "loss": 0.3645, "lr": 0.00022032251797421464, "epoch": 19.017295597484278, "percentage": 95.09, "elapsed_time": "1:32:33", "remaining_time": "0:04:46", "throughput": 4274.5, "total_tokens": 23737856}
7277
+ {"current_steps": 36290, "total_steps": 38160, "loss": 0.3882, "lr": 0.00021915252032864927, "epoch": 19.01991614255765, "percentage": 95.1, "elapsed_time": "1:32:34", "remaining_time": "0:04:46", "throughput": 4274.55, "total_tokens": 23741408}
7278
+ {"current_steps": 36295, "total_steps": 38160, "loss": 0.3975, "lr": 0.00021798561466774557, "epoch": 19.02253668763103, "percentage": 95.11, "elapsed_time": "1:32:34", "remaining_time": "0:04:45", "throughput": 4274.57, "total_tokens": 23744448}
7279
+ {"current_steps": 36300, "total_steps": 38160, "loss": 0.1964, "lr": 0.00021682180123561157, "epoch": 19.0251572327044, "percentage": 95.13, "elapsed_time": "1:32:35", "remaining_time": "0:04:44", "throughput": 4274.62, "total_tokens": 23748128}
7280
+ {"current_steps": 36305, "total_steps": 38160, "loss": 0.3546, "lr": 0.0002156610802756992, "epoch": 19.02777777777778, "percentage": 95.14, "elapsed_time": "1:32:36", "remaining_time": "0:04:43", "throughput": 4274.61, "total_tokens": 23750880}
7281
+ {"current_steps": 36310, "total_steps": 38160, "loss": 0.3065, "lr": 0.00021450345203082255, "epoch": 19.030398322851152, "percentage": 95.15, "elapsed_time": "1:32:36", "remaining_time": "0:04:43", "throughput": 4274.63, "total_tokens": 23754112}
7282
+ {"current_steps": 36315, "total_steps": 38160, "loss": 0.3613, "lr": 0.0002133489167431396, "epoch": 19.03301886792453, "percentage": 95.17, "elapsed_time": "1:32:37", "remaining_time": "0:04:42", "throughput": 4274.69, "total_tokens": 23757792}
7283
+ {"current_steps": 36320, "total_steps": 38160, "loss": 0.4247, "lr": 0.00021219747465416882, "epoch": 19.035639412997902, "percentage": 95.18, "elapsed_time": "1:32:38", "remaining_time": "0:04:41", "throughput": 4274.82, "total_tokens": 23762336}
7284
+ {"current_steps": 36325, "total_steps": 38160, "loss": 0.2423, "lr": 0.0002110491260047792, "epoch": 19.03825995807128, "percentage": 95.19, "elapsed_time": "1:32:39", "remaining_time": "0:04:40", "throughput": 4274.78, "total_tokens": 23764800}
7285
+ {"current_steps": 36330, "total_steps": 38160, "loss": 0.3819, "lr": 0.00020990387103519026, "epoch": 19.040880503144653, "percentage": 95.2, "elapsed_time": "1:32:40", "remaining_time": "0:04:40", "throughput": 4274.83, "total_tokens": 23768288}
7286
+ {"current_steps": 36335, "total_steps": 38160, "loss": 0.2921, "lr": 0.000208761709984977, "epoch": 19.04350104821803, "percentage": 95.22, "elapsed_time": "1:32:40", "remaining_time": "0:04:39", "throughput": 4274.84, "total_tokens": 23771456}
7287
+ {"current_steps": 36340, "total_steps": 38160, "loss": 0.3314, "lr": 0.00020762264309306833, "epoch": 19.046121593291403, "percentage": 95.23, "elapsed_time": "1:32:41", "remaining_time": "0:04:38", "throughput": 4274.82, "total_tokens": 23774144}
7288
+ {"current_steps": 36345, "total_steps": 38160, "loss": 0.3144, "lr": 0.0002064866705977403, "epoch": 19.04874213836478, "percentage": 95.24, "elapsed_time": "1:32:42", "remaining_time": "0:04:37", "throughput": 4274.84, "total_tokens": 23777280}
7289
+ {"current_steps": 36350, "total_steps": 38160, "loss": 0.3155, "lr": 0.00020535379273662778, "epoch": 19.051362683438157, "percentage": 95.26, "elapsed_time": "1:32:42", "remaining_time": "0:04:36", "throughput": 4274.88, "total_tokens": 23780672}
7290
+ {"current_steps": 36355, "total_steps": 38160, "loss": 0.4396, "lr": 0.00020422400974671627, "epoch": 19.05398322851153, "percentage": 95.27, "elapsed_time": "1:32:43", "remaining_time": "0:04:36", "throughput": 4274.94, "total_tokens": 23784224}
7291
+ {"current_steps": 36360, "total_steps": 38160, "loss": 0.2816, "lr": 0.0002030973218643417, "epoch": 19.056603773584907, "percentage": 95.28, "elapsed_time": "1:32:44", "remaining_time": "0:04:35", "throughput": 4275.01, "total_tokens": 23787904}
7292
+ {"current_steps": 36365, "total_steps": 38160, "loss": 0.3904, "lr": 0.0002019737293251972, "epoch": 19.05922431865828, "percentage": 95.3, "elapsed_time": "1:32:45", "remaining_time": "0:04:34", "throughput": 4275.01, "total_tokens": 23790784}
7293
+ {"current_steps": 36370, "total_steps": 38160, "loss": 0.6173, "lr": 0.00020085323236432306, "epoch": 19.061844863731658, "percentage": 95.31, "elapsed_time": "1:32:45", "remaining_time": "0:04:33", "throughput": 4275.04, "total_tokens": 23794048}
7294
+ {"current_steps": 36375, "total_steps": 38160, "loss": 0.358, "lr": 0.00019973583121611347, "epoch": 19.06446540880503, "percentage": 95.32, "elapsed_time": "1:32:46", "remaining_time": "0:04:33", "throughput": 4275.11, "total_tokens": 23797792}
7295
+ {"current_steps": 36380, "total_steps": 38160, "loss": 0.3103, "lr": 0.00019862152611431982, "epoch": 19.067085953878408, "percentage": 95.34, "elapsed_time": "1:32:47", "remaining_time": "0:04:32", "throughput": 4275.11, "total_tokens": 23800768}
7296
+ {"current_steps": 36385, "total_steps": 38160, "loss": 0.4729, "lr": 0.00019751031729203894, "epoch": 19.06970649895178, "percentage": 95.35, "elapsed_time": "1:32:47", "remaining_time": "0:04:31", "throughput": 4275.14, "total_tokens": 23803968}
7297
+ {"current_steps": 36390, "total_steps": 38160, "loss": 0.3757, "lr": 0.00019640220498172323, "epoch": 19.072327044025158, "percentage": 95.36, "elapsed_time": "1:32:48", "remaining_time": "0:04:30", "throughput": 4275.16, "total_tokens": 23807136}
7298
+ {"current_steps": 36395, "total_steps": 38160, "loss": 0.3923, "lr": 0.00019529718941517892, "epoch": 19.07494758909853, "percentage": 95.37, "elapsed_time": "1:32:49", "remaining_time": "0:04:30", "throughput": 4275.28, "total_tokens": 23811520}
7299
+ {"current_steps": 36400, "total_steps": 38160, "loss": 0.3174, "lr": 0.00019419527082356112, "epoch": 19.07756813417191, "percentage": 95.39, "elapsed_time": "1:32:50", "remaining_time": "0:04:29", "throughput": 4275.28, "total_tokens": 23814624}
7300
+ {"current_steps": 36405, "total_steps": 38160, "loss": 0.3698, "lr": 0.00019309644943738046, "epoch": 19.080188679245282, "percentage": 95.4, "elapsed_time": "1:32:51", "remaining_time": "0:04:28", "throughput": 4275.3, "total_tokens": 23817856}
7301
+ {"current_steps": 36410, "total_steps": 38160, "loss": 0.3915, "lr": 0.0001920007254864947, "epoch": 19.08280922431866, "percentage": 95.41, "elapsed_time": "1:32:51", "remaining_time": "0:04:27", "throughput": 4275.35, "total_tokens": 23821472}
7302
+ {"current_steps": 36415, "total_steps": 38160, "loss": 0.369, "lr": 0.00019090809920011885, "epoch": 19.085429769392032, "percentage": 95.43, "elapsed_time": "1:32:52", "remaining_time": "0:04:27", "throughput": 4275.36, "total_tokens": 23824512}
7303
+ {"current_steps": 36420, "total_steps": 38160, "loss": 0.2933, "lr": 0.00018981857080681842, "epoch": 19.08805031446541, "percentage": 95.44, "elapsed_time": "1:32:53", "remaining_time": "0:04:26", "throughput": 4275.38, "total_tokens": 23827776}
7304
+ {"current_steps": 36425, "total_steps": 38160, "loss": 0.342, "lr": 0.00018873214053450937, "epoch": 19.090670859538783, "percentage": 95.45, "elapsed_time": "1:32:53", "remaining_time": "0:04:25", "throughput": 4275.38, "total_tokens": 23830688}
7305
+ {"current_steps": 36430, "total_steps": 38160, "loss": 0.2831, "lr": 0.00018764880861045995, "epoch": 19.09329140461216, "percentage": 95.47, "elapsed_time": "1:32:54", "remaining_time": "0:04:24", "throughput": 4275.37, "total_tokens": 23833536}
7306
+ {"current_steps": 36435, "total_steps": 38160, "loss": 0.3054, "lr": 0.0001865685752612922, "epoch": 19.095911949685533, "percentage": 95.48, "elapsed_time": "1:32:55", "remaining_time": "0:04:23", "throughput": 4275.52, "total_tokens": 23838272}
7307
+ {"current_steps": 36440, "total_steps": 38160, "loss": 0.4517, "lr": 0.00018549144071297707, "epoch": 19.09853249475891, "percentage": 95.49, "elapsed_time": "1:32:56", "remaining_time": "0:04:23", "throughput": 4275.55, "total_tokens": 23841600}
7308
+ {"current_steps": 36445, "total_steps": 38160, "loss": 0.3623, "lr": 0.00018441740519084093, "epoch": 19.101153039832287, "percentage": 95.51, "elapsed_time": "1:32:57", "remaining_time": "0:04:22", "throughput": 4275.6, "total_tokens": 23845120}
7309
+ {"current_steps": 36450, "total_steps": 38160, "loss": 0.3775, "lr": 0.0001833464689195574, "epoch": 19.10377358490566, "percentage": 95.52, "elapsed_time": "1:32:57", "remaining_time": "0:04:21", "throughput": 4275.63, "total_tokens": 23848480}
7310
+ {"current_steps": 36455, "total_steps": 38160, "loss": 0.3156, "lr": 0.00018227863212315565, "epoch": 19.106394129979037, "percentage": 95.53, "elapsed_time": "1:32:58", "remaining_time": "0:04:20", "throughput": 4275.66, "total_tokens": 23852000}
7311
+ {"current_steps": 36460, "total_steps": 38160, "loss": 0.314, "lr": 0.00018121389502501528, "epoch": 19.10901467505241, "percentage": 95.55, "elapsed_time": "1:32:59", "remaining_time": "0:04:20", "throughput": 4275.77, "total_tokens": 23856224}
7312
+ {"current_steps": 36465, "total_steps": 38160, "loss": 0.3057, "lr": 0.0001801522578478648, "epoch": 19.111635220125788, "percentage": 95.56, "elapsed_time": "1:33:00", "remaining_time": "0:04:19", "throughput": 4275.82, "total_tokens": 23859680}
7313
+ {"current_steps": 36470, "total_steps": 38160, "loss": 0.3237, "lr": 0.00017909372081378994, "epoch": 19.11425576519916, "percentage": 95.57, "elapsed_time": "1:33:00", "remaining_time": "0:04:18", "throughput": 4275.87, "total_tokens": 23863168}
7314
+ {"current_steps": 36475, "total_steps": 38160, "loss": 0.4084, "lr": 0.00017803828414422184, "epoch": 19.116876310272538, "percentage": 95.58, "elapsed_time": "1:33:01", "remaining_time": "0:04:17", "throughput": 4275.88, "total_tokens": 23866208}
7315
+ {"current_steps": 36480, "total_steps": 38160, "loss": 0.2933, "lr": 0.00017698594805994728, "epoch": 19.11949685534591, "percentage": 95.6, "elapsed_time": "1:33:02", "remaining_time": "0:04:17", "throughput": 4275.88, "total_tokens": 23869152}
7316
+ {"current_steps": 36485, "total_steps": 38160, "loss": 0.2484, "lr": 0.00017593671278110345, "epoch": 19.122117400419288, "percentage": 95.61, "elapsed_time": "1:33:03", "remaining_time": "0:04:16", "throughput": 4275.93, "total_tokens": 23872768}
7317
+ {"current_steps": 36490, "total_steps": 38160, "loss": 0.2941, "lr": 0.00017489057852717482, "epoch": 19.12473794549266, "percentage": 95.62, "elapsed_time": "1:33:03", "remaining_time": "0:04:15", "throughput": 4275.94, "total_tokens": 23875744}
7318
+ {"current_steps": 36495, "total_steps": 38160, "loss": 0.227, "lr": 0.00017384754551700465, "epoch": 19.12735849056604, "percentage": 95.64, "elapsed_time": "1:33:04", "remaining_time": "0:04:14", "throughput": 4275.88, "total_tokens": 23877984}
7319
+ {"current_steps": 36500, "total_steps": 38160, "loss": 0.4987, "lr": 0.0001728076139687834, "epoch": 19.129979035639412, "percentage": 95.65, "elapsed_time": "1:33:05", "remaining_time": "0:04:14", "throughput": 4275.94, "total_tokens": 23881504}
7320
+ {"current_steps": 36505, "total_steps": 38160, "loss": 0.2796, "lr": 0.00017177078410005042, "epoch": 19.13259958071279, "percentage": 95.66, "elapsed_time": "1:33:05", "remaining_time": "0:04:13", "throughput": 4275.97, "total_tokens": 23884864}
7321
+ {"current_steps": 36510, "total_steps": 38160, "loss": 0.4054, "lr": 0.00017073705612770217, "epoch": 19.135220125786162, "percentage": 95.68, "elapsed_time": "1:33:06", "remaining_time": "0:04:12", "throughput": 4275.99, "total_tokens": 23888000}
7322
+ {"current_steps": 36515, "total_steps": 38160, "loss": 0.4292, "lr": 0.00016970643026797738, "epoch": 19.13784067085954, "percentage": 95.69, "elapsed_time": "1:33:07", "remaining_time": "0:04:11", "throughput": 4275.95, "total_tokens": 23890528}
7323
+ {"current_steps": 36520, "total_steps": 38160, "loss": 0.2784, "lr": 0.00016867890673647522, "epoch": 19.140461215932913, "percentage": 95.7, "elapsed_time": "1:33:07", "remaining_time": "0:04:10", "throughput": 4275.93, "total_tokens": 23893184}
7324
+ {"current_steps": 36525, "total_steps": 38160, "loss": 0.3679, "lr": 0.0001676544857481421, "epoch": 19.14308176100629, "percentage": 95.72, "elapsed_time": "1:33:08", "remaining_time": "0:04:10", "throughput": 4275.93, "total_tokens": 23895968}
7325
+ {"current_steps": 36530, "total_steps": 38160, "loss": 0.349, "lr": 0.0001666331675172733, "epoch": 19.145702306079663, "percentage": 95.73, "elapsed_time": "1:33:09", "remaining_time": "0:04:09", "throughput": 4275.92, "total_tokens": 23898880}
7326
+ {"current_steps": 36535, "total_steps": 38160, "loss": 0.3741, "lr": 0.00016561495225751955, "epoch": 19.14832285115304, "percentage": 95.74, "elapsed_time": "1:33:09", "remaining_time": "0:04:08", "throughput": 4275.89, "total_tokens": 23901504}
7327
+ {"current_steps": 36540, "total_steps": 38160, "loss": 0.3812, "lr": 0.00016459984018187712, "epoch": 19.150943396226417, "percentage": 95.75, "elapsed_time": "1:33:10", "remaining_time": "0:04:07", "throughput": 4275.91, "total_tokens": 23904768}
7328
+ {"current_steps": 36545, "total_steps": 38160, "loss": 0.3968, "lr": 0.00016358783150269784, "epoch": 19.15356394129979, "percentage": 95.77, "elapsed_time": "1:33:11", "remaining_time": "0:04:07", "throughput": 4275.93, "total_tokens": 23907872}
7329
+ {"current_steps": 36550, "total_steps": 38160, "loss": 0.2943, "lr": 0.00016257892643167904, "epoch": 19.156184486373167, "percentage": 95.78, "elapsed_time": "1:33:11", "remaining_time": "0:04:06", "throughput": 4275.93, "total_tokens": 23910816}
7330
+ {"current_steps": 36555, "total_steps": 38160, "loss": 0.39, "lr": 0.00016157312517987686, "epoch": 19.15880503144654, "percentage": 95.79, "elapsed_time": "1:33:12", "remaining_time": "0:04:05", "throughput": 4275.95, "total_tokens": 23914048}
7331
+ {"current_steps": 36560, "total_steps": 38160, "loss": 0.2836, "lr": 0.00016057042795769138, "epoch": 19.161425576519918, "percentage": 95.81, "elapsed_time": "1:33:13", "remaining_time": "0:04:04", "throughput": 4276.04, "total_tokens": 23918016}
7332
+ {"current_steps": 36565, "total_steps": 38160, "loss": 0.3, "lr": 0.00015957083497487477, "epoch": 19.16404612159329, "percentage": 95.82, "elapsed_time": "1:33:14", "remaining_time": "0:04:04", "throughput": 4276.04, "total_tokens": 23921056}
7333
+ {"current_steps": 36570, "total_steps": 38160, "loss": 0.3982, "lr": 0.0001585743464405298, "epoch": 19.166666666666668, "percentage": 95.83, "elapsed_time": "1:33:14", "remaining_time": "0:04:03", "throughput": 4276.01, "total_tokens": 23923648}
7334
+ {"current_steps": 36575, "total_steps": 38160, "loss": 0.2714, "lr": 0.00015758096256311138, "epoch": 19.16928721174004, "percentage": 95.85, "elapsed_time": "1:33:15", "remaining_time": "0:04:02", "throughput": 4276.01, "total_tokens": 23926592}
7335
+ {"current_steps": 36580, "total_steps": 38160, "loss": 0.3439, "lr": 0.00015659068355042494, "epoch": 19.171907756813418, "percentage": 95.86, "elapsed_time": "1:33:16", "remaining_time": "0:04:01", "throughput": 4276.03, "total_tokens": 23929856}
7336
+ {"current_steps": 36585, "total_steps": 38160, "loss": 0.2868, "lr": 0.00015560350960962642, "epoch": 19.17452830188679, "percentage": 95.87, "elapsed_time": "1:33:16", "remaining_time": "0:04:00", "throughput": 4276.01, "total_tokens": 23932608}
7337
+ {"current_steps": 36590, "total_steps": 38160, "loss": 0.404, "lr": 0.00015461944094721734, "epoch": 19.17714884696017, "percentage": 95.89, "elapsed_time": "1:33:17", "remaining_time": "0:04:00", "throughput": 4276.01, "total_tokens": 23935488}
7338
+ {"current_steps": 36595, "total_steps": 38160, "loss": 0.3735, "lr": 0.00015363847776905803, "epoch": 19.179769392033542, "percentage": 95.9, "elapsed_time": "1:33:18", "remaining_time": "0:03:59", "throughput": 4276.01, "total_tokens": 23938432}
7339
+ {"current_steps": 36600, "total_steps": 38160, "loss": 0.3256, "lr": 0.00015266062028035265, "epoch": 19.18238993710692, "percentage": 95.91, "elapsed_time": "1:33:18", "remaining_time": "0:03:58", "throughput": 4275.99, "total_tokens": 23941120}
7340
+ {"current_steps": 36605, "total_steps": 38160, "loss": 0.3497, "lr": 0.0001516858686856576, "epoch": 19.185010482180292, "percentage": 95.93, "elapsed_time": "1:33:19", "remaining_time": "0:03:57", "throughput": 4276.01, "total_tokens": 23944320}
7341
+ {"current_steps": 36610, "total_steps": 38160, "loss": 0.3459, "lr": 0.0001507142231888797, "epoch": 19.18763102725367, "percentage": 95.94, "elapsed_time": "1:33:20", "remaining_time": "0:03:57", "throughput": 4275.95, "total_tokens": 23946624}
7342
+ {"current_steps": 36615, "total_steps": 38160, "loss": 0.2888, "lr": 0.00014974568399327815, "epoch": 19.190251572327043, "percentage": 95.95, "elapsed_time": "1:33:20", "remaining_time": "0:03:56", "throughput": 4275.94, "total_tokens": 23949440}
7343
+ {"current_steps": 36620, "total_steps": 38160, "loss": 0.2548, "lr": 0.00014878025130145745, "epoch": 19.19287211740042, "percentage": 95.96, "elapsed_time": "1:33:21", "remaining_time": "0:03:55", "throughput": 4275.92, "total_tokens": 23952224}
7344
+ {"current_steps": 36625, "total_steps": 38160, "loss": 0.3127, "lr": 0.00014781792531537774, "epoch": 19.195492662473793, "percentage": 95.98, "elapsed_time": "1:33:22", "remaining_time": "0:03:54", "throughput": 4275.91, "total_tokens": 23955072}
7345
+ {"current_steps": 36630, "total_steps": 38160, "loss": 0.302, "lr": 0.00014685870623634467, "epoch": 19.19811320754717, "percentage": 95.99, "elapsed_time": "1:33:23", "remaining_time": "0:03:54", "throughput": 4275.94, "total_tokens": 23958336}
7346
+ {"current_steps": 36635, "total_steps": 38160, "loss": 0.2247, "lr": 0.00014590259426501773, "epoch": 19.200733752620547, "percentage": 96.0, "elapsed_time": "1:33:23", "remaining_time": "0:03:53", "throughput": 4275.95, "total_tokens": 23961504}
7347
+ {"current_steps": 36640, "total_steps": 38160, "loss": 0.5122, "lr": 0.00014494958960140357, "epoch": 19.20335429769392, "percentage": 96.02, "elapsed_time": "1:33:24", "remaining_time": "0:03:52", "throughput": 4275.94, "total_tokens": 23964288}
7348
+ {"current_steps": 36645, "total_steps": 38160, "loss": 0.3896, "lr": 0.00014399969244485776, "epoch": 19.205974842767297, "percentage": 96.03, "elapsed_time": "1:33:25", "remaining_time": "0:03:51", "throughput": 4275.99, "total_tokens": 23967776}
7349
+ {"current_steps": 36650, "total_steps": 38160, "loss": 0.2385, "lr": 0.000143052902994093, "epoch": 19.20859538784067, "percentage": 96.04, "elapsed_time": "1:33:25", "remaining_time": "0:03:50", "throughput": 4275.99, "total_tokens": 23970688}
7350
+ {"current_steps": 36655, "total_steps": 38160, "loss": 0.3352, "lr": 0.00014210922144716087, "epoch": 19.211215932914047, "percentage": 96.06, "elapsed_time": "1:33:26", "remaining_time": "0:03:50", "throughput": 4276.03, "total_tokens": 23974208}
7351
+ {"current_steps": 36660, "total_steps": 38160, "loss": 0.3876, "lr": 0.00014116864800147344, "epoch": 19.21383647798742, "percentage": 96.07, "elapsed_time": "1:33:27", "remaining_time": "0:03:49", "throughput": 4276.11, "total_tokens": 23978080}
7352
+ {"current_steps": 36665, "total_steps": 38160, "loss": 0.4301, "lr": 0.00014023118285378666, "epoch": 19.216457023060798, "percentage": 96.08, "elapsed_time": "1:33:28", "remaining_time": "0:03:48", "throughput": 4276.26, "total_tokens": 23982976}
7353
+ {"current_steps": 36670, "total_steps": 38160, "loss": 0.3656, "lr": 0.000139296826200207, "epoch": 19.21907756813417, "percentage": 96.1, "elapsed_time": "1:33:29", "remaining_time": "0:03:47", "throughput": 4276.31, "total_tokens": 23986528}
7354
+ {"current_steps": 36675, "total_steps": 38160, "loss": 0.3083, "lr": 0.00013836557823618977, "epoch": 19.221698113207548, "percentage": 96.11, "elapsed_time": "1:33:29", "remaining_time": "0:03:47", "throughput": 4276.3, "total_tokens": 23989376}
7355
+ {"current_steps": 36680, "total_steps": 38160, "loss": 0.3078, "lr": 0.0001374374391565458, "epoch": 19.22431865828092, "percentage": 96.12, "elapsed_time": "1:33:30", "remaining_time": "0:03:46", "throughput": 4276.24, "total_tokens": 23991648}
7356
+ {"current_steps": 36685, "total_steps": 38160, "loss": 0.3557, "lr": 0.0001365124091554265, "epoch": 19.2269392033543, "percentage": 96.13, "elapsed_time": "1:33:31", "remaining_time": "0:03:45", "throughput": 4276.22, "total_tokens": 23994336}
7357
+ {"current_steps": 36690, "total_steps": 38160, "loss": 0.3778, "lr": 0.00013559048842634036, "epoch": 19.229559748427672, "percentage": 96.15, "elapsed_time": "1:33:31", "remaining_time": "0:03:44", "throughput": 4276.25, "total_tokens": 23997632}
7358
+ {"current_steps": 36695, "total_steps": 38160, "loss": 0.2854, "lr": 0.00013467167716214146, "epoch": 19.23218029350105, "percentage": 96.16, "elapsed_time": "1:33:32", "remaining_time": "0:03:44", "throughput": 4276.23, "total_tokens": 24000384}
7359
+ {"current_steps": 36700, "total_steps": 38160, "loss": 0.3773, "lr": 0.00013375597555503603, "epoch": 19.234800838574422, "percentage": 96.17, "elapsed_time": "1:33:33", "remaining_time": "0:03:43", "throughput": 4276.26, "total_tokens": 24003616}
7360
+ {"current_steps": 36705, "total_steps": 38160, "loss": 0.3565, "lr": 0.0001328433837965759, "epoch": 19.2374213836478, "percentage": 96.19, "elapsed_time": "1:33:33", "remaining_time": "0:03:42", "throughput": 4276.23, "total_tokens": 24006336}
7361
+ {"current_steps": 36710, "total_steps": 38160, "loss": 0.2916, "lr": 0.0001319339020776683, "epoch": 19.240041928721173, "percentage": 96.2, "elapsed_time": "1:33:34", "remaining_time": "0:03:41", "throughput": 4276.38, "total_tokens": 24011168}
7362
+ {"current_steps": 36715, "total_steps": 38160, "loss": 0.2623, "lr": 0.00013102753058856276, "epoch": 19.24266247379455, "percentage": 96.21, "elapsed_time": "1:33:35", "remaining_time": "0:03:41", "throughput": 4276.4, "total_tokens": 24014240}
7363
+ {"current_steps": 36720, "total_steps": 38160, "loss": 0.3854, "lr": 0.00013012426951886425, "epoch": 19.245283018867923, "percentage": 96.23, "elapsed_time": "1:33:36", "remaining_time": "0:03:40", "throughput": 4276.42, "total_tokens": 24017568}
7364
+ {"current_steps": 36725, "total_steps": 38160, "loss": 0.4246, "lr": 0.00012922411905752496, "epoch": 19.2479035639413, "percentage": 96.24, "elapsed_time": "1:33:37", "remaining_time": "0:03:39", "throughput": 4276.49, "total_tokens": 24021376}
7365
+ {"current_steps": 36730, "total_steps": 38160, "loss": 0.3285, "lr": 0.00012832707939284426, "epoch": 19.250524109014677, "percentage": 96.25, "elapsed_time": "1:33:37", "remaining_time": "0:03:38", "throughput": 4276.54, "total_tokens": 24024928}
7366
+ {"current_steps": 36735, "total_steps": 38160, "loss": 0.3254, "lr": 0.00012743315071247374, "epoch": 19.25314465408805, "percentage": 96.27, "elapsed_time": "1:33:38", "remaining_time": "0:03:37", "throughput": 4276.57, "total_tokens": 24028128}
7367
+ {"current_steps": 36740, "total_steps": 38160, "loss": 0.3136, "lr": 0.00012654233320341212, "epoch": 19.255765199161427, "percentage": 96.28, "elapsed_time": "1:33:39", "remaining_time": "0:03:37", "throughput": 4276.6, "total_tokens": 24031584}
7368
+ {"current_steps": 36745, "total_steps": 38160, "loss": 0.367, "lr": 0.00012565462705201036, "epoch": 19.2583857442348, "percentage": 96.29, "elapsed_time": "1:33:39", "remaining_time": "0:03:36", "throughput": 4276.58, "total_tokens": 24034336}
7369
+ {"current_steps": 36750, "total_steps": 38160, "loss": 0.3999, "lr": 0.0001247700324439649, "epoch": 19.261006289308177, "percentage": 96.31, "elapsed_time": "1:33:40", "remaining_time": "0:03:35", "throughput": 4276.59, "total_tokens": 24037408}
7370
+ {"current_steps": 36755, "total_steps": 38160, "loss": 0.3707, "lr": 0.00012388854956432106, "epoch": 19.26362683438155, "percentage": 96.32, "elapsed_time": "1:33:41", "remaining_time": "0:03:34", "throughput": 4276.56, "total_tokens": 24040128}
7371
+ {"current_steps": 36760, "total_steps": 38160, "loss": 0.3164, "lr": 0.00012301017859747964, "epoch": 19.266247379454928, "percentage": 96.33, "elapsed_time": "1:33:42", "remaining_time": "0:03:34", "throughput": 4276.56, "total_tokens": 24042944}
7372
+ {"current_steps": 36765, "total_steps": 38160, "loss": 0.353, "lr": 0.00012213491972718203, "epoch": 19.2688679245283, "percentage": 96.34, "elapsed_time": "1:33:42", "remaining_time": "0:03:33", "throughput": 4276.56, "total_tokens": 24045952}
7373
+ {"current_steps": 36770, "total_steps": 38160, "loss": 0.4193, "lr": 0.00012126277313652345, "epoch": 19.271488469601678, "percentage": 96.36, "elapsed_time": "1:33:43", "remaining_time": "0:03:32", "throughput": 4276.56, "total_tokens": 24048768}
7374
+ {"current_steps": 36775, "total_steps": 38160, "loss": 0.3746, "lr": 0.00012039373900794792, "epoch": 19.27410901467505, "percentage": 96.37, "elapsed_time": "1:33:44", "remaining_time": "0:03:31", "throughput": 4276.64, "total_tokens": 24052704}
7375
+ {"current_steps": 36780, "total_steps": 38160, "loss": 0.4728, "lr": 0.00011952781752324503, "epoch": 19.27672955974843, "percentage": 96.38, "elapsed_time": "1:33:45", "remaining_time": "0:03:31", "throughput": 4276.73, "total_tokens": 24056896}
7376
+ {"current_steps": 36785, "total_steps": 38160, "loss": 0.3576, "lr": 0.00011866500886355823, "epoch": 19.279350104821802, "percentage": 96.4, "elapsed_time": "1:33:45", "remaining_time": "0:03:30", "throughput": 4276.67, "total_tokens": 24059168}
7377
+ {"current_steps": 36790, "total_steps": 38160, "loss": 0.294, "lr": 0.00011780531320937647, "epoch": 19.28197064989518, "percentage": 96.41, "elapsed_time": "1:33:46", "remaining_time": "0:03:29", "throughput": 4276.67, "total_tokens": 24062176}
7378
+ {"current_steps": 36795, "total_steps": 38160, "loss": 0.3226, "lr": 0.00011694873074053924, "epoch": 19.284591194968552, "percentage": 96.42, "elapsed_time": "1:33:47", "remaining_time": "0:03:28", "throughput": 4276.71, "total_tokens": 24065536}
7379
+ {"current_steps": 36800, "total_steps": 38160, "loss": 0.37, "lr": 0.00011609526163623151, "epoch": 19.28721174004193, "percentage": 96.44, "elapsed_time": "1:33:47", "remaining_time": "0:03:27", "throughput": 4276.72, "total_tokens": 24068608}
7380
+ {"current_steps": 36805, "total_steps": 38160, "loss": 0.3436, "lr": 0.00011524490607499049, "epoch": 19.289832285115303, "percentage": 96.45, "elapsed_time": "1:33:48", "remaining_time": "0:03:27", "throughput": 4276.83, "total_tokens": 24072928}
7381
+ {"current_steps": 36810, "total_steps": 38160, "loss": 0.3078, "lr": 0.00011439766423470054, "epoch": 19.29245283018868, "percentage": 96.46, "elapsed_time": "1:33:49", "remaining_time": "0:03:26", "throughput": 4277.01, "total_tokens": 24078176}
7382
+ {"current_steps": 36815, "total_steps": 38160, "loss": 0.3098, "lr": 0.0001135535362925949, "epoch": 19.295073375262053, "percentage": 96.48, "elapsed_time": "1:33:50", "remaining_time": "0:03:25", "throughput": 4276.98, "total_tokens": 24080768}
7383
+ {"current_steps": 36820, "total_steps": 38160, "loss": 0.367, "lr": 0.00011271252242525731, "epoch": 19.29769392033543, "percentage": 96.49, "elapsed_time": "1:33:51", "remaining_time": "0:03:24", "throughput": 4276.99, "total_tokens": 24083936}
7384
+ {"current_steps": 36825, "total_steps": 38160, "loss": 0.2917, "lr": 0.00011187462280861704, "epoch": 19.300314465408803, "percentage": 96.5, "elapsed_time": "1:33:51", "remaining_time": "0:03:24", "throughput": 4277.02, "total_tokens": 24087200}
7385
+ {"current_steps": 36830, "total_steps": 38160, "loss": 0.3546, "lr": 0.00011103983761795222, "epoch": 19.30293501048218, "percentage": 96.51, "elapsed_time": "1:33:52", "remaining_time": "0:03:23", "throughput": 4277.04, "total_tokens": 24090432}
7386
+ {"current_steps": 36835, "total_steps": 38160, "loss": 0.3529, "lr": 0.00011020816702788982, "epoch": 19.305555555555557, "percentage": 96.53, "elapsed_time": "1:33:53", "remaining_time": "0:03:22", "throughput": 4277.05, "total_tokens": 24093504}
7387
+ {"current_steps": 36840, "total_steps": 38160, "loss": 0.3966, "lr": 0.00010937961121240902, "epoch": 19.30817610062893, "percentage": 96.54, "elapsed_time": "1:33:53", "remaining_time": "0:03:21", "throughput": 4277.1, "total_tokens": 24097088}
7388
+ {"current_steps": 36845, "total_steps": 38160, "loss": 0.2529, "lr": 0.00010855417034483117, "epoch": 19.310796645702307, "percentage": 96.55, "elapsed_time": "1:33:54", "remaining_time": "0:03:21", "throughput": 4277.06, "total_tokens": 24099552}
7389
+ {"current_steps": 36850, "total_steps": 38160, "loss": 0.3818, "lr": 0.00010773184459783147, "epoch": 19.31341719077568, "percentage": 96.57, "elapsed_time": "1:33:55", "remaining_time": "0:03:20", "throughput": 4277.08, "total_tokens": 24102816}
7390
+ {"current_steps": 36855, "total_steps": 38160, "loss": 0.33, "lr": 0.00010691263414343066, "epoch": 19.316037735849058, "percentage": 96.58, "elapsed_time": "1:33:56", "remaining_time": "0:03:19", "throughput": 4277.08, "total_tokens": 24105856}
7391
+ {"current_steps": 36860, "total_steps": 38160, "loss": 0.3258, "lr": 0.00010609653915299499, "epoch": 19.31865828092243, "percentage": 96.59, "elapsed_time": "1:33:57", "remaining_time": "0:03:18", "throughput": 4277.25, "total_tokens": 24110848}
7392
+ {"current_steps": 36865, "total_steps": 38160, "loss": 0.2841, "lr": 0.00010528355979724624, "epoch": 19.321278825995808, "percentage": 96.61, "elapsed_time": "1:33:57", "remaining_time": "0:03:18", "throughput": 4277.28, "total_tokens": 24114144}
7393
+ {"current_steps": 36870, "total_steps": 38160, "loss": 0.252, "lr": 0.00010447369624624836, "epoch": 19.32389937106918, "percentage": 96.62, "elapsed_time": "1:33:58", "remaining_time": "0:03:17", "throughput": 4277.3, "total_tokens": 24117472}
7394
+ {"current_steps": 36875, "total_steps": 38160, "loss": 0.279, "lr": 0.00010366694866941583, "epoch": 19.32651991614256, "percentage": 96.63, "elapsed_time": "1:33:59", "remaining_time": "0:03:16", "throughput": 4277.31, "total_tokens": 24120576}
7395
+ {"current_steps": 36880, "total_steps": 38160, "loss": 0.3081, "lr": 0.00010286331723551201, "epoch": 19.329140461215932, "percentage": 96.65, "elapsed_time": "1:33:59", "remaining_time": "0:03:15", "throughput": 4277.27, "total_tokens": 24123072}
7396
+ {"current_steps": 36885, "total_steps": 38160, "loss": 0.2929, "lr": 0.00010206280211264573, "epoch": 19.33176100628931, "percentage": 96.66, "elapsed_time": "1:34:00", "remaining_time": "0:03:14", "throughput": 4277.26, "total_tokens": 24125952}
7397
+ {"current_steps": 36890, "total_steps": 38160, "loss": 0.3837, "lr": 0.00010126540346827806, "epoch": 19.334381551362682, "percentage": 96.67, "elapsed_time": "1:34:01", "remaining_time": "0:03:14", "throughput": 4277.26, "total_tokens": 24128896}
7398
+ {"current_steps": 36895, "total_steps": 38160, "loss": 0.2988, "lr": 0.00010047112146921222, "epoch": 19.33700209643606, "percentage": 96.69, "elapsed_time": "1:34:01", "remaining_time": "0:03:13", "throughput": 4277.27, "total_tokens": 24131936}
7399
+ {"current_steps": 36900, "total_steps": 38160, "loss": 0.3667, "lr": 9.967995628160697e-05, "epoch": 19.339622641509433, "percentage": 96.7, "elapsed_time": "1:34:02", "remaining_time": "0:03:12", "throughput": 4277.26, "total_tokens": 24134912}
7400
+ {"current_steps": 36905, "total_steps": 38160, "loss": 0.3149, "lr": 9.889190807096159e-05, "epoch": 19.34224318658281, "percentage": 96.71, "elapsed_time": "1:34:03", "remaining_time": "0:03:11", "throughput": 4277.21, "total_tokens": 24137312}
7401
+ {"current_steps": 36910, "total_steps": 38160, "loss": 0.3062, "lr": 9.810697700212922e-05, "epoch": 19.344863731656183, "percentage": 96.72, "elapsed_time": "1:34:04", "remaining_time": "0:03:11", "throughput": 4277.28, "total_tokens": 24141024}
7402
+ {"current_steps": 36915, "total_steps": 38160, "loss": 0.2682, "lr": 9.732516323930684e-05, "epoch": 19.34748427672956, "percentage": 96.74, "elapsed_time": "1:34:04", "remaining_time": "0:03:10", "throughput": 4277.32, "total_tokens": 24144512}
7403
+ {"current_steps": 36920, "total_steps": 38160, "loss": 0.2785, "lr": 9.654646694604197e-05, "epoch": 19.350104821802937, "percentage": 96.75, "elapsed_time": "1:34:05", "remaining_time": "0:03:09", "throughput": 4277.32, "total_tokens": 24147392}
7404
+ {"current_steps": 36925, "total_steps": 38160, "loss": 0.3393, "lr": 9.577088828522761e-05, "epoch": 19.35272536687631, "percentage": 96.76, "elapsed_time": "1:34:06", "remaining_time": "0:03:08", "throughput": 4277.28, "total_tokens": 24149824}
7405
+ {"current_steps": 36930, "total_steps": 38160, "loss": 0.3966, "lr": 9.499842741910902e-05, "epoch": 19.355345911949687, "percentage": 96.78, "elapsed_time": "1:34:06", "remaining_time": "0:03:08", "throughput": 4277.28, "total_tokens": 24152768}
7406
+ {"current_steps": 36935, "total_steps": 38160, "loss": 0.3542, "lr": 9.422908450927358e-05, "epoch": 19.35796645702306, "percentage": 96.79, "elapsed_time": "1:34:07", "remaining_time": "0:03:07", "throughput": 4277.35, "total_tokens": 24156480}
7407
+ {"current_steps": 36940, "total_steps": 38160, "loss": 0.3522, "lr": 9.346285971665924e-05, "epoch": 19.360587002096437, "percentage": 96.8, "elapsed_time": "1:34:08", "remaining_time": "0:03:06", "throughput": 4277.33, "total_tokens": 24159136}
7408
+ {"current_steps": 36945, "total_steps": 38160, "loss": 0.2989, "lr": 9.26997532015511e-05, "epoch": 19.36320754716981, "percentage": 96.82, "elapsed_time": "1:34:09", "remaining_time": "0:03:05", "throughput": 4277.42, "total_tokens": 24163232}
7409
+ {"current_steps": 36950, "total_steps": 38160, "loss": 0.3399, "lr": 9.193976512358314e-05, "epoch": 19.365828092243188, "percentage": 96.83, "elapsed_time": "1:34:09", "remaining_time": "0:03:05", "throughput": 4277.45, "total_tokens": 24166624}
7410
+ {"current_steps": 36955, "total_steps": 38160, "loss": 0.286, "lr": 9.11828956417382e-05, "epoch": 19.36844863731656, "percentage": 96.84, "elapsed_time": "1:34:10", "remaining_time": "0:03:04", "throughput": 4277.44, "total_tokens": 24169440}
7411
+ {"current_steps": 36960, "total_steps": 38160, "loss": 0.247, "lr": 9.042914491433961e-05, "epoch": 19.371069182389938, "percentage": 96.86, "elapsed_time": "1:34:11", "remaining_time": "0:03:03", "throughput": 4277.49, "total_tokens": 24172960}
7412
+ {"current_steps": 36965, "total_steps": 38160, "loss": 0.2674, "lr": 8.96785130990696e-05, "epoch": 19.37368972746331, "percentage": 96.87, "elapsed_time": "1:34:12", "remaining_time": "0:03:02", "throughput": 4277.59, "total_tokens": 24177152}
7413
+ {"current_steps": 36970, "total_steps": 38160, "loss": 0.3887, "lr": 8.893100035294754e-05, "epoch": 19.37631027253669, "percentage": 96.88, "elapsed_time": "1:34:12", "remaining_time": "0:03:01", "throughput": 4277.64, "total_tokens": 24180672}
7414
+ {"current_steps": 36975, "total_steps": 38160, "loss": 0.3403, "lr": 8.818660683234502e-05, "epoch": 19.378930817610062, "percentage": 96.89, "elapsed_time": "1:34:13", "remaining_time": "0:03:01", "throughput": 4277.69, "total_tokens": 24184256}
7415
+ {"current_steps": 36980, "total_steps": 38160, "loss": 0.2492, "lr": 8.744533269298248e-05, "epoch": 19.38155136268344, "percentage": 96.91, "elapsed_time": "1:34:14", "remaining_time": "0:03:00", "throughput": 4277.87, "total_tokens": 24189568}
7416
+ {"current_steps": 36985, "total_steps": 38160, "loss": 0.3692, "lr": 8.670717808992423e-05, "epoch": 19.384171907756812, "percentage": 96.92, "elapsed_time": "1:34:15", "remaining_time": "0:02:59", "throughput": 4277.85, "total_tokens": 24192288}
7417
+ {"current_steps": 36990, "total_steps": 38160, "loss": 0.3417, "lr": 8.597214317758339e-05, "epoch": 19.38679245283019, "percentage": 96.93, "elapsed_time": "1:34:15", "remaining_time": "0:02:58", "throughput": 4277.87, "total_tokens": 24195584}
7418
+ {"current_steps": 36995, "total_steps": 38160, "loss": 0.3482, "lr": 8.524022810972365e-05, "epoch": 19.389412997903563, "percentage": 96.95, "elapsed_time": "1:34:16", "remaining_time": "0:02:58", "throughput": 4277.83, "total_tokens": 24198048}
7419
+ {"current_steps": 37000, "total_steps": 38160, "loss": 0.3043, "lr": 8.451143303945085e-05, "epoch": 19.39203354297694, "percentage": 96.96, "elapsed_time": "1:34:17", "remaining_time": "0:02:57", "throughput": 4277.83, "total_tokens": 24200896}
7420
+ {"current_steps": 37005, "total_steps": 38160, "loss": 0.2972, "lr": 8.37857581192214e-05, "epoch": 19.394654088050313, "percentage": 96.97, "elapsed_time": "1:34:17", "remaining_time": "0:02:56", "throughput": 4277.84, "total_tokens": 24203968}
7421
+ {"current_steps": 37010, "total_steps": 38160, "loss": 0.3042, "lr": 8.306320350083885e-05, "epoch": 19.39727463312369, "percentage": 96.99, "elapsed_time": "1:34:18", "remaining_time": "0:02:55", "throughput": 4277.9, "total_tokens": 24207712}
7422
+ {"current_steps": 37015, "total_steps": 38160, "loss": 0.3242, "lr": 8.234376933545229e-05, "epoch": 19.399895178197063, "percentage": 97.0, "elapsed_time": "1:34:19", "remaining_time": "0:02:55", "throughput": 4277.94, "total_tokens": 24211040}
7423
+ {"current_steps": 37020, "total_steps": 38160, "loss": 0.3706, "lr": 8.162745577355968e-05, "epoch": 19.40251572327044, "percentage": 97.01, "elapsed_time": "1:34:20", "remaining_time": "0:02:54", "throughput": 4277.98, "total_tokens": 24214592}
7424
+ {"current_steps": 37025, "total_steps": 38160, "loss": 0.3687, "lr": 8.091426296500614e-05, "epoch": 19.405136268343817, "percentage": 97.03, "elapsed_time": "1:34:21", "remaining_time": "0:02:53", "throughput": 4278.01, "total_tokens": 24217952}
7425
+ {"current_steps": 37030, "total_steps": 38160, "loss": 0.3735, "lr": 8.020419105898068e-05, "epoch": 19.40775681341719, "percentage": 97.04, "elapsed_time": "1:34:21", "remaining_time": "0:02:52", "throughput": 4278.05, "total_tokens": 24221344}
7426
+ {"current_steps": 37035, "total_steps": 38160, "loss": 0.5479, "lr": 7.949724020402615e-05, "epoch": 19.410377358490567, "percentage": 97.05, "elapsed_time": "1:34:22", "remaining_time": "0:02:52", "throughput": 4278.08, "total_tokens": 24224640}
7427
+ {"current_steps": 37040, "total_steps": 38160, "loss": 0.3504, "lr": 7.879341054802757e-05, "epoch": 19.41299790356394, "percentage": 97.06, "elapsed_time": "1:34:23", "remaining_time": "0:02:51", "throughput": 4278.07, "total_tokens": 24227424}
7428
+ {"current_steps": 37045, "total_steps": 38160, "loss": 0.3472, "lr": 7.809270223821552e-05, "epoch": 19.415618448637318, "percentage": 97.08, "elapsed_time": "1:34:23", "remaining_time": "0:02:50", "throughput": 4278.15, "total_tokens": 24231424}
7429
+ {"current_steps": 37050, "total_steps": 38160, "loss": 0.4187, "lr": 7.739511542117438e-05, "epoch": 19.41823899371069, "percentage": 97.09, "elapsed_time": "1:34:24", "remaining_time": "0:02:49", "throughput": 4278.13, "total_tokens": 24234176}
7430
+ {"current_steps": 37055, "total_steps": 38160, "loss": 0.3259, "lr": 7.670065024282746e-05, "epoch": 19.420859538784068, "percentage": 97.1, "elapsed_time": "1:34:25", "remaining_time": "0:02:48", "throughput": 4278.13, "total_tokens": 24236992}
7431
+ {"current_steps": 37060, "total_steps": 38160, "loss": 0.2333, "lr": 7.60093068484502e-05, "epoch": 19.42348008385744, "percentage": 97.12, "elapsed_time": "1:34:26", "remaining_time": "0:02:48", "throughput": 4278.12, "total_tokens": 24239936}
7432
+ {"current_steps": 37065, "total_steps": 38160, "loss": 0.3174, "lr": 7.532108538266358e-05, "epoch": 19.42610062893082, "percentage": 97.13, "elapsed_time": "1:34:26", "remaining_time": "0:02:47", "throughput": 4278.13, "total_tokens": 24242976}
7433
+ {"current_steps": 37070, "total_steps": 38160, "loss": 0.2737, "lr": 7.463598598943743e-05, "epoch": 19.428721174004192, "percentage": 97.14, "elapsed_time": "1:34:27", "remaining_time": "0:02:46", "throughput": 4278.1, "total_tokens": 24245504}
7434
+ {"current_steps": 37075, "total_steps": 38160, "loss": 0.4112, "lr": 7.395400881208546e-05, "epoch": 19.43134171907757, "percentage": 97.16, "elapsed_time": "1:34:28", "remaining_time": "0:02:45", "throughput": 4278.12, "total_tokens": 24248704}
7435
+ {"current_steps": 37080, "total_steps": 38160, "loss": 0.3397, "lr": 7.327515399326855e-05, "epoch": 19.433962264150942, "percentage": 97.17, "elapsed_time": "1:34:28", "remaining_time": "0:02:45", "throughput": 4278.17, "total_tokens": 24252288}
7436
+ {"current_steps": 37085, "total_steps": 38160, "loss": 0.3341, "lr": 7.25994216749981e-05, "epoch": 19.43658280922432, "percentage": 97.18, "elapsed_time": "1:34:29", "remaining_time": "0:02:44", "throughput": 4278.22, "total_tokens": 24255776}
7437
+ {"current_steps": 37090, "total_steps": 38160, "loss": 0.3258, "lr": 7.192681199862604e-05, "epoch": 19.439203354297693, "percentage": 97.2, "elapsed_time": "1:34:30", "remaining_time": "0:02:43", "throughput": 4278.21, "total_tokens": 24258624}
7438
+ {"current_steps": 37095, "total_steps": 38160, "loss": 0.2951, "lr": 7.125732510485649e-05, "epoch": 19.44182389937107, "percentage": 97.21, "elapsed_time": "1:34:30", "remaining_time": "0:02:42", "throughput": 4278.18, "total_tokens": 24261120}
7439
+ {"current_steps": 37100, "total_steps": 38160, "loss": 0.309, "lr": 7.059096113373908e-05, "epoch": 19.444444444444443, "percentage": 97.22, "elapsed_time": "1:34:31", "remaining_time": "0:02:42", "throughput": 4278.17, "total_tokens": 24264032}
7440
+ {"current_steps": 37105, "total_steps": 38160, "loss": 0.2314, "lr": 6.992772022467064e-05, "epoch": 19.44706498951782, "percentage": 97.24, "elapsed_time": "1:34:32", "remaining_time": "0:02:41", "throughput": 4278.21, "total_tokens": 24267424}
7441
+ {"current_steps": 37110, "total_steps": 38160, "loss": 0.4141, "lr": 6.926760251638852e-05, "epoch": 19.449685534591197, "percentage": 97.25, "elapsed_time": "1:34:33", "remaining_time": "0:02:40", "throughput": 4278.26, "total_tokens": 24270976}
7442
+ {"current_steps": 37115, "total_steps": 38160, "loss": 0.3663, "lr": 6.861060814698727e-05, "epoch": 19.45230607966457, "percentage": 97.26, "elapsed_time": "1:34:33", "remaining_time": "0:02:39", "throughput": 4278.26, "total_tokens": 24273856}
7443
+ {"current_steps": 37120, "total_steps": 38160, "loss": 0.3682, "lr": 6.795673725390027e-05, "epoch": 19.454926624737947, "percentage": 97.27, "elapsed_time": "1:34:34", "remaining_time": "0:02:38", "throughput": 4278.31, "total_tokens": 24277536}
7444
+ {"current_steps": 37125, "total_steps": 38160, "loss": 0.3741, "lr": 6.73059899739098e-05, "epoch": 19.45754716981132, "percentage": 97.29, "elapsed_time": "1:34:35", "remaining_time": "0:02:38", "throughput": 4278.52, "total_tokens": 24283520}
7445
+ {"current_steps": 37130, "total_steps": 38160, "loss": 0.3739, "lr": 6.66583664431436e-05, "epoch": 19.460167714884697, "percentage": 97.3, "elapsed_time": "1:34:36", "remaining_time": "0:02:37", "throughput": 4278.53, "total_tokens": 24286592}
7446
+ {"current_steps": 37135, "total_steps": 38160, "loss": 0.3586, "lr": 6.601386679708165e-05, "epoch": 19.46278825995807, "percentage": 97.31, "elapsed_time": "1:34:37", "remaining_time": "0:02:36", "throughput": 4278.62, "total_tokens": 24290624}
7447
+ {"current_steps": 37140, "total_steps": 38160, "loss": 0.3042, "lr": 6.537249117054111e-05, "epoch": 19.465408805031448, "percentage": 97.33, "elapsed_time": "1:34:37", "remaining_time": "0:02:35", "throughput": 4278.62, "total_tokens": 24293568}
7448
+ {"current_steps": 37145, "total_steps": 38160, "loss": 0.3791, "lr": 6.473423969769132e-05, "epoch": 19.46802935010482, "percentage": 97.34, "elapsed_time": "1:34:38", "remaining_time": "0:02:35", "throughput": 4278.6, "total_tokens": 24296288}
7449
+ {"current_steps": 37150, "total_steps": 38160, "loss": 0.2975, "lr": 6.40991125120488e-05, "epoch": 19.470649895178198, "percentage": 97.35, "elapsed_time": "1:34:39", "remaining_time": "0:02:34", "throughput": 4278.55, "total_tokens": 24298592}
7450
+ {"current_steps": 37155, "total_steps": 38160, "loss": 0.3348, "lr": 6.346710974647563e-05, "epoch": 19.47327044025157, "percentage": 97.37, "elapsed_time": "1:34:39", "remaining_time": "0:02:33", "throughput": 4278.58, "total_tokens": 24301856}
7451
+ {"current_steps": 37160, "total_steps": 38160, "loss": 0.2647, "lr": 6.283823153317602e-05, "epoch": 19.47589098532495, "percentage": 97.38, "elapsed_time": "1:34:40", "remaining_time": "0:02:32", "throughput": 4278.58, "total_tokens": 24304832}
7452
+ {"current_steps": 37165, "total_steps": 38160, "loss": 0.3354, "lr": 6.22124780037081e-05, "epoch": 19.478511530398322, "percentage": 97.39, "elapsed_time": "1:34:41", "remaining_time": "0:02:32", "throughput": 4278.64, "total_tokens": 24308480}
7453
+ {"current_steps": 37170, "total_steps": 38160, "loss": 0.3636, "lr": 6.158984928896882e-05, "epoch": 19.4811320754717, "percentage": 97.41, "elapsed_time": "1:34:42", "remaining_time": "0:02:31", "throughput": 4278.66, "total_tokens": 24311552}
7454
+ {"current_steps": 37175, "total_steps": 38160, "loss": 0.4682, "lr": 6.097034551920732e-05, "epoch": 19.483752620545072, "percentage": 97.42, "elapsed_time": "1:34:42", "remaining_time": "0:02:30", "throughput": 4278.64, "total_tokens": 24314336}
7455
+ {"current_steps": 37180, "total_steps": 38160, "loss": 0.396, "lr": 6.0353966824016615e-05, "epoch": 19.48637316561845, "percentage": 97.43, "elapsed_time": "1:34:43", "remaining_time": "0:02:29", "throughput": 4278.64, "total_tokens": 24317184}
7456
+ {"current_steps": 37185, "total_steps": 38160, "loss": 0.3641, "lr": 5.974071333233355e-05, "epoch": 19.488993710691823, "percentage": 97.44, "elapsed_time": "1:34:44", "remaining_time": "0:02:29", "throughput": 4278.68, "total_tokens": 24320544}
7457
+ {"current_steps": 37190, "total_steps": 38160, "loss": 0.383, "lr": 5.9130585172448845e-05, "epoch": 19.4916142557652, "percentage": 97.46, "elapsed_time": "1:34:44", "remaining_time": "0:02:28", "throughput": 4278.73, "total_tokens": 24324192}
7458
+ {"current_steps": 37195, "total_steps": 38160, "loss": 0.3597, "lr": 5.852358247199041e-05, "epoch": 19.494234800838573, "percentage": 97.47, "elapsed_time": "1:34:45", "remaining_time": "0:02:27", "throughput": 4278.8, "total_tokens": 24328000}
7459
+ {"current_steps": 37200, "total_steps": 38160, "loss": 0.3945, "lr": 5.7919705357935e-05, "epoch": 19.49685534591195, "percentage": 97.48, "elapsed_time": "1:34:46", "remaining_time": "0:02:26", "throughput": 4278.81, "total_tokens": 24331136}
7460
+ {"current_steps": 37205, "total_steps": 38160, "loss": 0.3352, "lr": 5.731895395660991e-05, "epoch": 19.499475890985323, "percentage": 97.5, "elapsed_time": "1:34:47", "remaining_time": "0:02:25", "throughput": 4278.84, "total_tokens": 24334368}
7461
+ {"current_steps": 37210, "total_steps": 38160, "loss": 0.3662, "lr": 5.672132839368626e-05, "epoch": 19.5020964360587, "percentage": 97.51, "elapsed_time": "1:34:47", "remaining_time": "0:02:25", "throughput": 4278.84, "total_tokens": 24337248}
7462
+ {"current_steps": 37215, "total_steps": 38160, "loss": 0.3692, "lr": 5.61268287941774e-05, "epoch": 19.504716981132077, "percentage": 97.52, "elapsed_time": "1:34:48", "remaining_time": "0:02:24", "throughput": 4278.85, "total_tokens": 24340352}
7463
+ {"current_steps": 37220, "total_steps": 38160, "loss": 0.5113, "lr": 5.553545528244719e-05, "epoch": 19.50733752620545, "percentage": 97.54, "elapsed_time": "1:34:49", "remaining_time": "0:02:23", "throughput": 4278.87, "total_tokens": 24343584}
7464
+ {"current_steps": 37225, "total_steps": 38160, "loss": 0.3896, "lr": 5.494720798220498e-05, "epoch": 19.509958071278827, "percentage": 97.55, "elapsed_time": "1:34:50", "remaining_time": "0:02:22", "throughput": 4278.89, "total_tokens": 24346880}
7465
+ {"current_steps": 37230, "total_steps": 38160, "loss": 0.2565, "lr": 5.436208701650402e-05, "epoch": 19.5125786163522, "percentage": 97.56, "elapsed_time": "1:34:50", "remaining_time": "0:02:22", "throughput": 4279.04, "total_tokens": 24351552}
7466
+ {"current_steps": 37235, "total_steps": 38160, "loss": 0.2964, "lr": 5.3780092507744734e-05, "epoch": 19.515199161425578, "percentage": 97.58, "elapsed_time": "1:34:51", "remaining_time": "0:02:21", "throughput": 4279.11, "total_tokens": 24355360}
7467
+ {"current_steps": 37240, "total_steps": 38160, "loss": 0.2703, "lr": 5.3201224577676396e-05, "epoch": 19.51781970649895, "percentage": 97.59, "elapsed_time": "1:34:52", "remaining_time": "0:02:20", "throughput": 4279.1, "total_tokens": 24358336}
7468
+ {"current_steps": 37245, "total_steps": 38160, "loss": 0.3414, "lr": 5.262548334738881e-05, "epoch": 19.520440251572328, "percentage": 97.6, "elapsed_time": "1:34:53", "remaining_time": "0:02:19", "throughput": 4279.08, "total_tokens": 24360992}
7469
+ {"current_steps": 37250, "total_steps": 38160, "loss": 0.3221, "lr": 5.2052868937322306e-05, "epoch": 19.5230607966457, "percentage": 97.62, "elapsed_time": "1:34:53", "remaining_time": "0:02:19", "throughput": 4279.02, "total_tokens": 24363296}
7470
+ {"current_steps": 37255, "total_steps": 38160, "loss": 0.2277, "lr": 5.148338146725939e-05, "epoch": 19.52568134171908, "percentage": 97.63, "elapsed_time": "1:34:54", "remaining_time": "0:02:18", "throughput": 4278.97, "total_tokens": 24365568}
7471
+ {"current_steps": 37260, "total_steps": 38160, "loss": 0.2886, "lr": 5.091702105633144e-05, "epoch": 19.528301886792452, "percentage": 97.64, "elapsed_time": "1:34:54", "remaining_time": "0:02:17", "throughput": 4278.98, "total_tokens": 24368672}
7472
+ {"current_steps": 37265, "total_steps": 38160, "loss": 0.2782, "lr": 5.035378782301369e-05, "epoch": 19.53092243186583, "percentage": 97.65, "elapsed_time": "1:34:55", "remaining_time": "0:02:16", "throughput": 4278.97, "total_tokens": 24371456}
7473
+ {"current_steps": 37270, "total_steps": 38160, "loss": 0.3426, "lr": 4.979368188513189e-05, "epoch": 19.533542976939202, "percentage": 97.67, "elapsed_time": "1:34:56", "remaining_time": "0:02:16", "throughput": 4279.03, "total_tokens": 24375200}
7474
+ {"current_steps": 37275, "total_steps": 38160, "loss": 0.3137, "lr": 4.9236703359848975e-05, "epoch": 19.53616352201258, "percentage": 97.68, "elapsed_time": "1:34:57", "remaining_time": "0:02:15", "throughput": 4279.0, "total_tokens": 24377824}
7475
+ {"current_steps": 37280, "total_steps": 38160, "loss": 0.309, "lr": 4.8682852363680084e-05, "epoch": 19.538784067085953, "percentage": 97.69, "elapsed_time": "1:34:57", "remaining_time": "0:02:14", "throughput": 4279.03, "total_tokens": 24381184}
7476
+ {"current_steps": 37285, "total_steps": 38160, "loss": 0.2841, "lr": 4.813212901248587e-05, "epoch": 19.54140461215933, "percentage": 97.71, "elapsed_time": "1:34:58", "remaining_time": "0:02:13", "throughput": 4279.05, "total_tokens": 24384448}
7477
+ {"current_steps": 37290, "total_steps": 38160, "loss": 0.2258, "lr": 4.7584533421469176e-05, "epoch": 19.544025157232703, "percentage": 97.72, "elapsed_time": "1:34:59", "remaining_time": "0:02:12", "throughput": 4279.04, "total_tokens": 24387104}
7478
+ {"current_steps": 37295, "total_steps": 38160, "loss": 0.3665, "lr": 4.704006570518171e-05, "epoch": 19.54664570230608, "percentage": 97.73, "elapsed_time": "1:34:59", "remaining_time": "0:02:12", "throughput": 4279.02, "total_tokens": 24389792}
7479
+ {"current_steps": 37300, "total_steps": 38160, "loss": 0.3726, "lr": 4.6498725977520695e-05, "epoch": 19.549266247379457, "percentage": 97.75, "elapsed_time": "1:35:00", "remaining_time": "0:02:11", "throughput": 4279.09, "total_tokens": 24393632}
7480
+ {"current_steps": 37305, "total_steps": 38160, "loss": 0.306, "lr": 4.596051435172887e-05, "epoch": 19.55188679245283, "percentage": 97.76, "elapsed_time": "1:35:01", "remaining_time": "0:02:10", "throughput": 4279.15, "total_tokens": 24397152}
7481
+ {"current_steps": 37310, "total_steps": 38160, "loss": 0.391, "lr": 4.542543094039119e-05, "epoch": 19.554507337526207, "percentage": 97.77, "elapsed_time": "1:35:02", "remaining_time": "0:02:09", "throughput": 4279.24, "total_tokens": 24401152}
7482
+ {"current_steps": 37315, "total_steps": 38160, "loss": 0.3873, "lr": 4.489347585544312e-05, "epoch": 19.55712788259958, "percentage": 97.79, "elapsed_time": "1:35:03", "remaining_time": "0:02:09", "throughput": 4279.29, "total_tokens": 24404896}
7483
+ {"current_steps": 37320, "total_steps": 38160, "loss": 0.3328, "lr": 4.43646492081623e-05, "epoch": 19.559748427672957, "percentage": 97.8, "elapsed_time": "1:35:03", "remaining_time": "0:02:08", "throughput": 4279.28, "total_tokens": 24407712}
7484
+ {"current_steps": 37325, "total_steps": 38160, "loss": 0.3438, "lr": 4.383895110917524e-05, "epoch": 19.56236897274633, "percentage": 97.81, "elapsed_time": "1:35:04", "remaining_time": "0:02:07", "throughput": 4279.33, "total_tokens": 24411232}
7485
+ {"current_steps": 37330, "total_steps": 38160, "loss": 0.286, "lr": 4.331638166845064e-05, "epoch": 19.564989517819708, "percentage": 97.82, "elapsed_time": "1:35:05", "remaining_time": "0:02:06", "throughput": 4279.35, "total_tokens": 24414432}
7486
+ {"current_steps": 37335, "total_steps": 38160, "loss": 0.3432, "lr": 4.279694099530273e-05, "epoch": 19.56761006289308, "percentage": 97.84, "elapsed_time": "1:35:05", "remaining_time": "0:02:06", "throughput": 4279.37, "total_tokens": 24417600}
7487
+ {"current_steps": 37340, "total_steps": 38160, "loss": 0.2876, "lr": 4.2280629198394594e-05, "epoch": 19.570230607966458, "percentage": 97.85, "elapsed_time": "1:35:06", "remaining_time": "0:02:05", "throughput": 4279.36, "total_tokens": 24420416}
7488
+ {"current_steps": 37345, "total_steps": 38160, "loss": 0.3661, "lr": 4.1767446385733155e-05, "epoch": 19.57285115303983, "percentage": 97.86, "elapsed_time": "1:35:07", "remaining_time": "0:02:04", "throughput": 4279.41, "total_tokens": 24424000}
7489
+ {"current_steps": 37350, "total_steps": 38160, "loss": 0.2928, "lr": 4.1257392664669214e-05, "epoch": 19.57547169811321, "percentage": 97.88, "elapsed_time": "1:35:08", "remaining_time": "0:02:03", "throughput": 4279.46, "total_tokens": 24427552}
7490
+ {"current_steps": 37355, "total_steps": 38160, "loss": 0.2775, "lr": 4.075046814189909e-05, "epoch": 19.578092243186582, "percentage": 97.89, "elapsed_time": "1:35:08", "remaining_time": "0:02:03", "throughput": 4279.44, "total_tokens": 24430176}
7491
+ {"current_steps": 37360, "total_steps": 38160, "loss": 0.353, "lr": 4.0246672923466285e-05, "epoch": 19.58071278825996, "percentage": 97.9, "elapsed_time": "1:35:09", "remaining_time": "0:02:02", "throughput": 4279.45, "total_tokens": 24433312}
7492
+ {"current_steps": 37365, "total_steps": 38160, "loss": 0.2252, "lr": 3.974600711476151e-05, "epoch": 19.583333333333332, "percentage": 97.92, "elapsed_time": "1:35:10", "remaining_time": "0:02:01", "throughput": 4279.48, "total_tokens": 24436608}
7493
+ {"current_steps": 37370, "total_steps": 38160, "loss": 0.3986, "lr": 3.9248470820515964e-05, "epoch": 19.58595387840671, "percentage": 97.93, "elapsed_time": "1:35:10", "remaining_time": "0:02:00", "throughput": 4279.45, "total_tokens": 24439136}
7494
+ {"current_steps": 37375, "total_steps": 38160, "loss": 0.2704, "lr": 3.875406414480975e-05, "epoch": 19.588574423480082, "percentage": 97.94, "elapsed_time": "1:35:11", "remaining_time": "0:01:59", "throughput": 4279.51, "total_tokens": 24442880}
7495
+ {"current_steps": 37380, "total_steps": 38160, "loss": 0.299, "lr": 3.826278719106513e-05, "epoch": 19.59119496855346, "percentage": 97.96, "elapsed_time": "1:35:12", "remaining_time": "0:01:59", "throughput": 4279.55, "total_tokens": 24446240}
7496
+ {"current_steps": 37385, "total_steps": 38160, "loss": 0.3909, "lr": 3.777464006205322e-05, "epoch": 19.593815513626833, "percentage": 97.97, "elapsed_time": "1:35:13", "remaining_time": "0:01:58", "throughput": 4279.57, "total_tokens": 24449376}
7497
+ {"current_steps": 37390, "total_steps": 38160, "loss": 0.4458, "lr": 3.728962285988901e-05, "epoch": 19.59643605870021, "percentage": 97.98, "elapsed_time": "1:35:13", "remaining_time": "0:01:57", "throughput": 4279.59, "total_tokens": 24452608}
7498
+ {"current_steps": 37395, "total_steps": 38160, "loss": 0.3736, "lr": 3.680773568603135e-05, "epoch": 19.599056603773583, "percentage": 98.0, "elapsed_time": "1:35:14", "remaining_time": "0:01:56", "throughput": 4279.54, "total_tokens": 24455008}
7499
+ {"current_steps": 37400, "total_steps": 38160, "loss": 0.4336, "lr": 3.6328978641286256e-05, "epoch": 19.60167714884696, "percentage": 98.01, "elapsed_time": "1:35:15", "remaining_time": "0:01:56", "throughput": 4279.66, "total_tokens": 24459488}
7500
+ {"current_steps": 37405, "total_steps": 38160, "loss": 0.4208, "lr": 3.585335182580529e-05, "epoch": 19.604297693920337, "percentage": 98.02, "elapsed_time": "1:35:16", "remaining_time": "0:01:55", "throughput": 4279.71, "total_tokens": 24463136}
7501
+ {"current_steps": 37410, "total_steps": 38160, "loss": 0.3578, "lr": 3.538085533908219e-05, "epoch": 19.60691823899371, "percentage": 98.03, "elapsed_time": "1:35:16", "remaining_time": "0:01:54", "throughput": 4279.72, "total_tokens": 24466240}
7502
+ {"current_steps": 37415, "total_steps": 38160, "loss": 0.3147, "lr": 3.4911489279957904e-05, "epoch": 19.609538784067087, "percentage": 98.05, "elapsed_time": "1:35:17", "remaining_time": "0:01:53", "throughput": 4279.73, "total_tokens": 24469344}
7503
+ {"current_steps": 37420, "total_steps": 38160, "loss": 0.3787, "lr": 3.444525374662222e-05, "epoch": 19.61215932914046, "percentage": 98.06, "elapsed_time": "1:35:18", "remaining_time": "0:01:53", "throughput": 4279.7, "total_tokens": 24471872}
7504
+ {"current_steps": 37425, "total_steps": 38160, "loss": 0.3344, "lr": 3.398214883660044e-05, "epoch": 19.614779874213838, "percentage": 98.07, "elapsed_time": "1:35:18", "remaining_time": "0:01:52", "throughput": 4279.7, "total_tokens": 24474752}
7505
+ {"current_steps": 37430, "total_steps": 38160, "loss": 0.4616, "lr": 3.3522174646773424e-05, "epoch": 19.61740041928721, "percentage": 98.09, "elapsed_time": "1:35:19", "remaining_time": "0:01:51", "throughput": 4279.89, "total_tokens": 24480000}
7506
+ {"current_steps": 37435, "total_steps": 38160, "loss": 0.3364, "lr": 3.30653312733592e-05, "epoch": 19.620020964360588, "percentage": 98.1, "elapsed_time": "1:35:20", "remaining_time": "0:01:50", "throughput": 4279.87, "total_tokens": 24482656}
7507
+ {"current_steps": 37440, "total_steps": 38160, "loss": 0.3485, "lr": 3.261161881192798e-05, "epoch": 19.62264150943396, "percentage": 98.11, "elapsed_time": "1:35:21", "remaining_time": "0:01:50", "throughput": 4279.89, "total_tokens": 24485856}
7508
+ {"current_steps": 37445, "total_steps": 38160, "loss": 0.4005, "lr": 3.2161037357387175e-05, "epoch": 19.62526205450734, "percentage": 98.13, "elapsed_time": "1:35:21", "remaining_time": "0:01:49", "throughput": 4279.96, "total_tokens": 24489600}
7509
+ {"current_steps": 37450, "total_steps": 38160, "loss": 0.2588, "lr": 3.17135870039964e-05, "epoch": 19.627882599580712, "percentage": 98.14, "elapsed_time": "1:35:22", "remaining_time": "0:01:48", "throughput": 4279.93, "total_tokens": 24492160}
7510
+ {"current_steps": 37455, "total_steps": 38160, "loss": 0.3359, "lr": 3.126926784535577e-05, "epoch": 19.63050314465409, "percentage": 98.15, "elapsed_time": "1:35:23", "remaining_time": "0:01:47", "throughput": 4279.94, "total_tokens": 24495200}
7511
+ {"current_steps": 37460, "total_steps": 38160, "loss": 0.321, "lr": 3.0828079974412614e-05, "epoch": 19.633123689727462, "percentage": 98.17, "elapsed_time": "1:35:23", "remaining_time": "0:01:46", "throughput": 4279.98, "total_tokens": 24498624}
7512
+ {"current_steps": 37465, "total_steps": 38160, "loss": 0.3569, "lr": 3.039002348345643e-05, "epoch": 19.63574423480084, "percentage": 98.18, "elapsed_time": "1:35:24", "remaining_time": "0:01:46", "throughput": 4280.0, "total_tokens": 24501760}
7513
+ {"current_steps": 37470, "total_steps": 38160, "loss": 0.3763, "lr": 2.9955098464125583e-05, "epoch": 19.638364779874212, "percentage": 98.19, "elapsed_time": "1:35:25", "remaining_time": "0:01:45", "throughput": 4280.14, "total_tokens": 24506560}
7514
+ {"current_steps": 37475, "total_steps": 38160, "loss": 0.3262, "lr": 2.9523305007402298e-05, "epoch": 19.64098532494759, "percentage": 98.2, "elapsed_time": "1:35:26", "remaining_time": "0:01:44", "throughput": 4280.15, "total_tokens": 24509472}
7515
+ {"current_steps": 37480, "total_steps": 38160, "loss": 0.3083, "lr": 2.9094643203609328e-05, "epoch": 19.643605870020963, "percentage": 98.22, "elapsed_time": "1:35:27", "remaining_time": "0:01:43", "throughput": 4280.16, "total_tokens": 24512608}
7516
+ {"current_steps": 37485, "total_steps": 38160, "loss": 0.3321, "lr": 2.86691131424216e-05, "epoch": 19.64622641509434, "percentage": 98.23, "elapsed_time": "1:35:27", "remaining_time": "0:01:43", "throughput": 4280.19, "total_tokens": 24516064}
7517
+ {"current_steps": 37490, "total_steps": 38160, "loss": 0.2689, "lr": 2.8246714912851243e-05, "epoch": 19.648846960167717, "percentage": 98.24, "elapsed_time": "1:35:28", "remaining_time": "0:01:42", "throughput": 4280.26, "total_tokens": 24519744}
7518
+ {"current_steps": 37495, "total_steps": 38160, "loss": 0.3438, "lr": 2.7827448603262583e-05, "epoch": 19.65146750524109, "percentage": 98.26, "elapsed_time": "1:35:29", "remaining_time": "0:01:41", "throughput": 4280.29, "total_tokens": 24523072}
7519
+ {"current_steps": 37500, "total_steps": 38160, "loss": 0.3078, "lr": 2.7411314301360456e-05, "epoch": 19.654088050314467, "percentage": 98.27, "elapsed_time": "1:35:30", "remaining_time": "0:01:40", "throughput": 4280.36, "total_tokens": 24526752}
7520
+ {"current_steps": 37505, "total_steps": 38160, "loss": 0.3712, "lr": 2.6998312094193565e-05, "epoch": 19.65670859538784, "percentage": 98.28, "elapsed_time": "1:35:30", "remaining_time": "0:01:40", "throughput": 4280.32, "total_tokens": 24529184}
7521
+ {"current_steps": 37510, "total_steps": 38160, "loss": 0.3889, "lr": 2.6588442068161134e-05, "epoch": 19.659329140461217, "percentage": 98.3, "elapsed_time": "1:35:31", "remaining_time": "0:01:39", "throughput": 4280.34, "total_tokens": 24532576}
7522
+ {"current_steps": 37515, "total_steps": 38160, "loss": 0.2887, "lr": 2.6181704308999575e-05, "epoch": 19.66194968553459, "percentage": 98.31, "elapsed_time": "1:35:32", "remaining_time": "0:01:38", "throughput": 4280.36, "total_tokens": 24535648}
7523
+ {"current_steps": 37520, "total_steps": 38160, "loss": 0.3222, "lr": 2.5778098901794167e-05, "epoch": 19.664570230607968, "percentage": 98.32, "elapsed_time": "1:35:32", "remaining_time": "0:01:37", "throughput": 4280.39, "total_tokens": 24539136}
7524
+ {"current_steps": 37525, "total_steps": 38160, "loss": 0.3779, "lr": 2.5377625930977366e-05, "epoch": 19.66719077568134, "percentage": 98.34, "elapsed_time": "1:35:33", "remaining_time": "0:01:37", "throughput": 4280.48, "total_tokens": 24543008}
7525
+ {"current_steps": 37530, "total_steps": 38160, "loss": 0.3604, "lr": 2.4980285480320496e-05, "epoch": 19.669811320754718, "percentage": 98.35, "elapsed_time": "1:35:34", "remaining_time": "0:01:36", "throughput": 4280.71, "total_tokens": 24549408}
7526
+ {"current_steps": 37535, "total_steps": 38160, "loss": 0.3494, "lr": 2.4586077632943735e-05, "epoch": 19.67243186582809, "percentage": 98.36, "elapsed_time": "1:35:35", "remaining_time": "0:01:35", "throughput": 4280.76, "total_tokens": 24552960}
7527
+ {"current_steps": 37540, "total_steps": 38160, "loss": 0.3286, "lr": 2.4195002471312788e-05, "epoch": 19.67505241090147, "percentage": 98.38, "elapsed_time": "1:35:36", "remaining_time": "0:01:34", "throughput": 4280.74, "total_tokens": 24555648}
7528
+ {"current_steps": 37545, "total_steps": 38160, "loss": 0.3119, "lr": 2.3807060077232212e-05, "epoch": 19.677672955974842, "percentage": 98.39, "elapsed_time": "1:35:37", "remaining_time": "0:01:33", "throughput": 4280.78, "total_tokens": 24558976}
7529
+ {"current_steps": 37550, "total_steps": 38160, "loss": 0.3482, "lr": 2.342225053185709e-05, "epoch": 19.68029350104822, "percentage": 98.4, "elapsed_time": "1:35:37", "remaining_time": "0:01:33", "throughput": 4280.84, "total_tokens": 24562560}
7530
+ {"current_steps": 37555, "total_steps": 38160, "loss": 0.4379, "lr": 2.3040573915686367e-05, "epoch": 19.682914046121592, "percentage": 98.41, "elapsed_time": "1:35:38", "remaining_time": "0:01:32", "throughput": 4280.85, "total_tokens": 24565664}
7531
+ {"current_steps": 37560, "total_steps": 38160, "loss": 0.291, "lr": 2.2662030308561177e-05, "epoch": 19.68553459119497, "percentage": 98.43, "elapsed_time": "1:35:39", "remaining_time": "0:01:31", "throughput": 4280.9, "total_tokens": 24569120}
7532
+ {"current_steps": 37565, "total_steps": 38160, "loss": 0.2926, "lr": 2.2286619789669836e-05, "epoch": 19.688155136268342, "percentage": 98.44, "elapsed_time": "1:35:39", "remaining_time": "0:01:30", "throughput": 4280.93, "total_tokens": 24572320}
7533
+ {"current_steps": 37570, "total_steps": 38160, "loss": 0.4242, "lr": 2.19143424375412e-05, "epoch": 19.69077568134172, "percentage": 98.45, "elapsed_time": "1:35:40", "remaining_time": "0:01:30", "throughput": 4280.97, "total_tokens": 24575808}
7534
+ {"current_steps": 37575, "total_steps": 38160, "loss": 0.3057, "lr": 2.154519833005297e-05, "epoch": 19.693396226415093, "percentage": 98.47, "elapsed_time": "1:35:41", "remaining_time": "0:01:29", "throughput": 4280.99, "total_tokens": 24579072}
7535
+ {"current_steps": 37580, "total_steps": 38160, "loss": 0.2736, "lr": 2.1179187544426713e-05, "epoch": 19.69601677148847, "percentage": 98.48, "elapsed_time": "1:35:42", "remaining_time": "0:01:28", "throughput": 4280.95, "total_tokens": 24581536}
7536
+ {"current_steps": 37585, "total_steps": 38160, "loss": 0.2803, "lr": 2.0816310157227845e-05, "epoch": 19.698637316561843, "percentage": 98.49, "elapsed_time": "1:35:42", "remaining_time": "0:01:27", "throughput": 4281.0, "total_tokens": 24585472}
7537
+ {"current_steps": 37590, "total_steps": 38160, "loss": 0.475, "lr": 2.0456566244365648e-05, "epoch": 19.70125786163522, "percentage": 98.51, "elapsed_time": "1:35:43", "remaining_time": "0:01:27", "throughput": 4281.0, "total_tokens": 24588320}
7538
+ {"current_steps": 37595, "total_steps": 38160, "loss": 0.2657, "lr": 2.009995588109159e-05, "epoch": 19.703878406708597, "percentage": 98.52, "elapsed_time": "1:35:44", "remaining_time": "0:01:26", "throughput": 4281.01, "total_tokens": 24591328}
7539
+ {"current_steps": 37600, "total_steps": 38160, "loss": 0.3899, "lr": 1.9746479142009333e-05, "epoch": 19.70649895178197, "percentage": 98.53, "elapsed_time": "1:35:45", "remaining_time": "0:01:25", "throughput": 4281.07, "total_tokens": 24594976}
7540
+ {"current_steps": 37605, "total_steps": 38160, "loss": 0.2718, "lr": 1.9396136101058058e-05, "epoch": 19.709119496855347, "percentage": 98.55, "elapsed_time": "1:35:45", "remaining_time": "0:01:24", "throughput": 4281.03, "total_tokens": 24597504}
7541
+ {"current_steps": 37610, "total_steps": 38160, "loss": 0.3718, "lr": 1.9048926831529145e-05, "epoch": 19.71174004192872, "percentage": 98.56, "elapsed_time": "1:35:46", "remaining_time": "0:01:24", "throughput": 4281.01, "total_tokens": 24600128}
7542
+ {"current_steps": 37615, "total_steps": 38160, "loss": 0.266, "lr": 1.870485140605116e-05, "epoch": 19.714360587002098, "percentage": 98.57, "elapsed_time": "1:35:47", "remaining_time": "0:01:23", "throughput": 4281.05, "total_tokens": 24603648}
7543
+ {"current_steps": 37620, "total_steps": 38160, "loss": 0.4018, "lr": 1.8363909896604856e-05, "epoch": 19.71698113207547, "percentage": 98.58, "elapsed_time": "1:35:47", "remaining_time": "0:01:22", "throughput": 4281.06, "total_tokens": 24606688}
7544
+ {"current_steps": 37625, "total_steps": 38160, "loss": 0.3587, "lr": 1.8026102374506526e-05, "epoch": 19.719601677148848, "percentage": 98.6, "elapsed_time": "1:35:48", "remaining_time": "0:01:21", "throughput": 4281.14, "total_tokens": 24610688}
7545
+ {"current_steps": 37630, "total_steps": 38160, "loss": 0.2835, "lr": 1.7691428910426297e-05, "epoch": 19.72222222222222, "percentage": 98.61, "elapsed_time": "1:35:49", "remaining_time": "0:01:20", "throughput": 4281.28, "total_tokens": 24615456}
7546
+ {"current_steps": 37635, "total_steps": 38160, "loss": 0.2955, "lr": 1.7359889574369846e-05, "epoch": 19.7248427672956, "percentage": 98.62, "elapsed_time": "1:35:50", "remaining_time": "0:01:20", "throughput": 4281.31, "total_tokens": 24618720}
7547
+ {"current_steps": 37640, "total_steps": 38160, "loss": 0.299, "lr": 1.703148443569502e-05, "epoch": 19.72746331236897, "percentage": 98.64, "elapsed_time": "1:35:50", "remaining_time": "0:01:19", "throughput": 4281.26, "total_tokens": 24621184}
7548
+ {"current_steps": 37645, "total_steps": 38160, "loss": 0.5217, "lr": 1.6706213563098537e-05, "epoch": 19.73008385744235, "percentage": 98.65, "elapsed_time": "1:35:51", "remaining_time": "0:01:18", "throughput": 4281.29, "total_tokens": 24624512}
7549
+ {"current_steps": 37650, "total_steps": 38160, "loss": 0.3575, "lr": 1.63840770246243e-05, "epoch": 19.732704402515722, "percentage": 98.66, "elapsed_time": "1:35:52", "remaining_time": "0:01:17", "throughput": 4281.25, "total_tokens": 24626880}
7550
+ {"current_steps": 37655, "total_steps": 38160, "loss": 0.4205, "lr": 1.6065074887658404e-05, "epoch": 19.7353249475891, "percentage": 98.68, "elapsed_time": "1:35:52", "remaining_time": "0:01:17", "throughput": 4281.26, "total_tokens": 24629888}
7551
+ {"current_steps": 37660, "total_steps": 38160, "loss": 0.2938, "lr": 1.574920721893247e-05, "epoch": 19.737945492662472, "percentage": 98.69, "elapsed_time": "1:35:53", "remaining_time": "0:01:16", "throughput": 4281.3, "total_tokens": 24633408}
7552
+ {"current_steps": 37665, "total_steps": 38160, "loss": 0.4177, "lr": 1.543647408452531e-05, "epoch": 19.74056603773585, "percentage": 98.7, "elapsed_time": "1:35:54", "remaining_time": "0:01:15", "throughput": 4281.27, "total_tokens": 24636096}
7553
+ {"current_steps": 37670, "total_steps": 38160, "loss": 0.3461, "lr": 1.5126875549852924e-05, "epoch": 19.743186582809223, "percentage": 98.72, "elapsed_time": "1:35:55", "remaining_time": "0:01:14", "throughput": 4281.34, "total_tokens": 24639872}
7554
+ {"current_steps": 37675, "total_steps": 38160, "loss": 0.3035, "lr": 1.4820411679681844e-05, "epoch": 19.7458071278826, "percentage": 98.73, "elapsed_time": "1:35:55", "remaining_time": "0:01:14", "throughput": 4281.35, "total_tokens": 24642816}
7555
+ {"current_steps": 37680, "total_steps": 38160, "loss": 0.2499, "lr": 1.4517082538120785e-05, "epoch": 19.748427672955973, "percentage": 98.74, "elapsed_time": "1:35:56", "remaining_time": "0:01:13", "throughput": 4281.31, "total_tokens": 24645312}
7556
+ {"current_steps": 37685, "total_steps": 38160, "loss": 0.4473, "lr": 1.4216888188622323e-05, "epoch": 19.75104821802935, "percentage": 98.76, "elapsed_time": "1:35:57", "remaining_time": "0:01:12", "throughput": 4281.33, "total_tokens": 24648416}
7557
+ {"current_steps": 37690, "total_steps": 38160, "loss": 0.3229, "lr": 1.391982869398456e-05, "epoch": 19.753668763102727, "percentage": 98.77, "elapsed_time": "1:35:57", "remaining_time": "0:01:11", "throughput": 4281.37, "total_tokens": 24651808}
7558
+ {"current_steps": 37695, "total_steps": 38160, "loss": 0.3247, "lr": 1.3625904116347787e-05, "epoch": 19.7562893081761, "percentage": 98.78, "elapsed_time": "1:35:58", "remaining_time": "0:01:11", "throughput": 4281.42, "total_tokens": 24655232}
7559
+ {"current_steps": 37700, "total_steps": 38160, "loss": 0.72, "lr": 1.3335114517199487e-05, "epoch": 19.758909853249477, "percentage": 98.79, "elapsed_time": "1:35:59", "remaining_time": "0:01:10", "throughput": 4281.42, "total_tokens": 24658208}
7560
+ {"current_steps": 37705, "total_steps": 38160, "loss": 0.3998, "lr": 1.3047459957367669e-05, "epoch": 19.76153039832285, "percentage": 98.81, "elapsed_time": "1:36:00", "remaining_time": "0:01:09", "throughput": 4281.45, "total_tokens": 24661536}
7561
+ {"current_steps": 37710, "total_steps": 38160, "loss": 0.3968, "lr": 1.276294049702753e-05, "epoch": 19.764150943396228, "percentage": 98.82, "elapsed_time": "1:36:00", "remaining_time": "0:01:08", "throughput": 4281.48, "total_tokens": 24664832}
7562
+ {"current_steps": 37715, "total_steps": 38160, "loss": 0.3754, "lr": 1.2481556195694798e-05, "epoch": 19.7667714884696, "percentage": 98.83, "elapsed_time": "1:36:01", "remaining_time": "0:01:07", "throughput": 4281.49, "total_tokens": 24667872}
7563
+ {"current_steps": 37720, "total_steps": 38160, "loss": 0.2831, "lr": 1.2203307112235717e-05, "epoch": 19.769392033542978, "percentage": 98.85, "elapsed_time": "1:36:02", "remaining_time": "0:01:07", "throughput": 4281.5, "total_tokens": 24670976}
7564
+ {"current_steps": 37725, "total_steps": 38160, "loss": 0.182, "lr": 1.1928193304855394e-05, "epoch": 19.77201257861635, "percentage": 98.86, "elapsed_time": "1:36:03", "remaining_time": "0:01:06", "throughput": 4281.76, "total_tokens": 24677568}
7565
+ {"current_steps": 37730, "total_steps": 38160, "loss": 0.2954, "lr": 1.1656214831102795e-05, "epoch": 19.77463312368973, "percentage": 98.87, "elapsed_time": "1:36:04", "remaining_time": "0:01:05", "throughput": 4281.79, "total_tokens": 24680768}
7566
+ {"current_steps": 37735, "total_steps": 38160, "loss": 0.3731, "lr": 1.1387371747874076e-05, "epoch": 19.7772536687631, "percentage": 98.89, "elapsed_time": "1:36:04", "remaining_time": "0:01:04", "throughput": 4281.81, "total_tokens": 24684032}
7567
+ {"current_steps": 37740, "total_steps": 38160, "loss": 0.4502, "lr": 1.1121664111409246e-05, "epoch": 19.77987421383648, "percentage": 98.9, "elapsed_time": "1:36:05", "remaining_time": "0:01:04", "throughput": 4281.81, "total_tokens": 24686976}
7568
+ {"current_steps": 37745, "total_steps": 38160, "loss": 0.3066, "lr": 1.0859091977288848e-05, "epoch": 19.782494758909852, "percentage": 98.91, "elapsed_time": "1:36:06", "remaining_time": "0:01:03", "throughput": 4281.86, "total_tokens": 24690560}
7569
+ {"current_steps": 37750, "total_steps": 38160, "loss": 0.2843, "lr": 1.059965540044061e-05, "epoch": 19.78511530398323, "percentage": 98.93, "elapsed_time": "1:36:06", "remaining_time": "0:01:02", "throughput": 4281.84, "total_tokens": 24693344}
7570
+ {"current_steps": 37755, "total_steps": 38160, "loss": 0.3469, "lr": 1.0343354435137785e-05, "epoch": 19.787735849056602, "percentage": 98.94, "elapsed_time": "1:36:07", "remaining_time": "0:01:01", "throughput": 4281.84, "total_tokens": 24696160}
7571
+ {"current_steps": 37760, "total_steps": 38160, "loss": 0.3432, "lr": 1.0090189134994153e-05, "epoch": 19.79035639412998, "percentage": 98.95, "elapsed_time": "1:36:08", "remaining_time": "0:01:01", "throughput": 4281.84, "total_tokens": 24699232}
7572
+ {"current_steps": 37765, "total_steps": 38160, "loss": 0.3263, "lr": 9.84015955296902e-06, "epoch": 19.792976939203353, "percentage": 98.96, "elapsed_time": "1:36:09", "remaining_time": "0:01:00", "throughput": 4281.92, "total_tokens": 24703104}
7573
+ {"current_steps": 37770, "total_steps": 38160, "loss": 0.3064, "lr": 9.593265741365542e-06, "epoch": 19.79559748427673, "percentage": 98.98, "elapsed_time": "1:36:09", "remaining_time": "0:00:59", "throughput": 4281.98, "total_tokens": 24706816}
7574
+ {"current_steps": 37775, "total_steps": 38160, "loss": 0.3831, "lr": 9.349507751830744e-06, "epoch": 19.798218029350103, "percentage": 98.99, "elapsed_time": "1:36:10", "remaining_time": "0:00:58", "throughput": 4281.98, "total_tokens": 24709696}
7575
+ {"current_steps": 37780, "total_steps": 38160, "loss": 0.4208, "lr": 9.108885635357167e-06, "epoch": 19.80083857442348, "percentage": 99.0, "elapsed_time": "1:36:11", "remaining_time": "0:00:58", "throughput": 4282.06, "total_tokens": 24713696}
7576
+ {"current_steps": 37785, "total_steps": 38160, "loss": 0.3336, "lr": 8.871399442277882e-06, "epoch": 19.803459119496857, "percentage": 99.02, "elapsed_time": "1:36:12", "remaining_time": "0:00:57", "throughput": 4282.07, "total_tokens": 24716768}
7577
+ {"current_steps": 37790, "total_steps": 38160, "loss": 0.2969, "lr": 8.637049222276483e-06, "epoch": 19.80607966457023, "percentage": 99.03, "elapsed_time": "1:36:12", "remaining_time": "0:00:56", "throughput": 4282.14, "total_tokens": 24720448}
7578
+ {"current_steps": 37795, "total_steps": 38160, "loss": 0.3216, "lr": 8.405835024372088e-06, "epoch": 19.808700209643607, "percentage": 99.04, "elapsed_time": "1:36:13", "remaining_time": "0:00:55", "throughput": 4282.15, "total_tokens": 24723680}
7579
+ {"current_steps": 37800, "total_steps": 38160, "loss": 0.3528, "lr": 8.177756896934341e-06, "epoch": 19.81132075471698, "percentage": 99.06, "elapsed_time": "1:36:14", "remaining_time": "0:00:54", "throughput": 4282.17, "total_tokens": 24726848}
7580
+ {"current_steps": 37805, "total_steps": 38160, "loss": 0.3532, "lr": 7.952814887675075e-06, "epoch": 19.813941299790358, "percentage": 99.07, "elapsed_time": "1:36:15", "remaining_time": "0:00:54", "throughput": 4282.19, "total_tokens": 24730016}
7581
+ {"current_steps": 37810, "total_steps": 38160, "loss": 0.4313, "lr": 7.731009043648318e-06, "epoch": 19.81656184486373, "percentage": 99.08, "elapsed_time": "1:36:15", "remaining_time": "0:00:53", "throughput": 4282.15, "total_tokens": 24732480}
7582
+ {"current_steps": 37815, "total_steps": 38160, "loss": 0.4184, "lr": 7.512339411251956e-06, "epoch": 19.819182389937108, "percentage": 99.1, "elapsed_time": "1:36:16", "remaining_time": "0:00:52", "throughput": 4282.18, "total_tokens": 24735712}
7583
+ {"current_steps": 37820, "total_steps": 38160, "loss": 0.3281, "lr": 7.296806036232728e-06, "epoch": 19.82180293501048, "percentage": 99.11, "elapsed_time": "1:36:17", "remaining_time": "0:00:51", "throughput": 4282.2, "total_tokens": 24739008}
7584
+ {"current_steps": 37825, "total_steps": 38160, "loss": 0.4272, "lr": 7.084408963674571e-06, "epoch": 19.82442348008386, "percentage": 99.12, "elapsed_time": "1:36:17", "remaining_time": "0:00:51", "throughput": 4282.24, "total_tokens": 24742432}
7585
+ {"current_steps": 37830, "total_steps": 38160, "loss": 0.3488, "lr": 6.875148238010276e-06, "epoch": 19.82704402515723, "percentage": 99.14, "elapsed_time": "1:36:18", "remaining_time": "0:00:50", "throughput": 4282.26, "total_tokens": 24745632}
7586
+ {"current_steps": 37835, "total_steps": 38160, "loss": 0.2501, "lr": 6.669023903014826e-06, "epoch": 19.82966457023061, "percentage": 99.15, "elapsed_time": "1:36:19", "remaining_time": "0:00:49", "throughput": 4282.3, "total_tokens": 24748960}
7587
+ {"current_steps": 37840, "total_steps": 38160, "loss": 0.4849, "lr": 6.466036001807062e-06, "epoch": 19.832285115303982, "percentage": 99.16, "elapsed_time": "1:36:20", "remaining_time": "0:00:48", "throughput": 4282.3, "total_tokens": 24751872}
7588
+ {"current_steps": 37845, "total_steps": 38160, "loss": 0.4126, "lr": 6.266184576848022e-06, "epoch": 19.83490566037736, "percentage": 99.17, "elapsed_time": "1:36:21", "remaining_time": "0:00:48", "throughput": 4282.61, "total_tokens": 24759232}
7589
+ {"current_steps": 37850, "total_steps": 38160, "loss": 0.4677, "lr": 6.069469669945926e-06, "epoch": 19.837526205450732, "percentage": 99.19, "elapsed_time": "1:36:22", "remaining_time": "0:00:47", "throughput": 4282.64, "total_tokens": 24762528}
7590
+ {"current_steps": 37855, "total_steps": 38160, "loss": 0.3437, "lr": 5.875891322251192e-06, "epoch": 19.84014675052411, "percentage": 99.2, "elapsed_time": "1:36:22", "remaining_time": "0:00:46", "throughput": 4282.63, "total_tokens": 24765344}
7591
+ {"current_steps": 37860, "total_steps": 38160, "loss": 0.4074, "lr": 5.685449574258095e-06, "epoch": 19.842767295597483, "percentage": 99.21, "elapsed_time": "1:36:23", "remaining_time": "0:00:45", "throughput": 4282.65, "total_tokens": 24768512}
7592
+ {"current_steps": 37865, "total_steps": 38160, "loss": 0.2465, "lr": 5.498144465804766e-06, "epoch": 19.84538784067086, "percentage": 99.23, "elapsed_time": "1:36:24", "remaining_time": "0:00:45", "throughput": 4282.68, "total_tokens": 24771776}
7593
+ {"current_steps": 37870, "total_steps": 38160, "loss": 0.2904, "lr": 5.313976036073198e-06, "epoch": 19.848008385744233, "percentage": 99.24, "elapsed_time": "1:36:24", "remaining_time": "0:00:44", "throughput": 4282.67, "total_tokens": 24774656}
7594
+ {"current_steps": 37875, "total_steps": 38160, "loss": 0.2953, "lr": 5.132944323589239e-06, "epoch": 19.85062893081761, "percentage": 99.25, "elapsed_time": "1:36:25", "remaining_time": "0:00:43", "throughput": 4282.67, "total_tokens": 24777568}
7595
+ {"current_steps": 37880, "total_steps": 38160, "loss": 0.2357, "lr": 4.955049366224262e-06, "epoch": 19.853249475890987, "percentage": 99.27, "elapsed_time": "1:36:26", "remaining_time": "0:00:42", "throughput": 4282.7, "total_tokens": 24780864}
7596
+ {"current_steps": 37885, "total_steps": 38160, "loss": 0.3016, "lr": 4.7802912011885025e-06, "epoch": 19.85587002096436, "percentage": 99.28, "elapsed_time": "1:36:27", "remaining_time": "0:00:42", "throughput": 4282.74, "total_tokens": 24784256}
7597
+ {"current_steps": 37890, "total_steps": 38160, "loss": 0.334, "lr": 4.608669865042714e-06, "epoch": 19.858490566037737, "percentage": 99.29, "elapsed_time": "1:36:27", "remaining_time": "0:00:41", "throughput": 4282.75, "total_tokens": 24787296}
7598
+ {"current_steps": 37895, "total_steps": 38160, "loss": 0.3998, "lr": 4.4401853936848474e-06, "epoch": 19.86111111111111, "percentage": 99.31, "elapsed_time": "1:36:28", "remaining_time": "0:00:40", "throughput": 4282.91, "total_tokens": 24792320}
7599
+ {"current_steps": 37900, "total_steps": 38160, "loss": 0.2747, "lr": 4.274837822363375e-06, "epoch": 19.863731656184488, "percentage": 99.32, "elapsed_time": "1:36:29", "remaining_time": "0:00:39", "throughput": 4282.9, "total_tokens": 24795072}
7600
+ {"current_steps": 37905, "total_steps": 38160, "loss": 0.4172, "lr": 4.112627185665629e-06, "epoch": 19.86635220125786, "percentage": 99.33, "elapsed_time": "1:36:30", "remaining_time": "0:00:38", "throughput": 4282.9, "total_tokens": 24798016}
7601
+ {"current_steps": 37910, "total_steps": 38160, "loss": 0.3493, "lr": 3.953553517522801e-06, "epoch": 19.868972746331238, "percentage": 99.34, "elapsed_time": "1:36:30", "remaining_time": "0:00:38", "throughput": 4282.86, "total_tokens": 24800544}
7602
+ {"current_steps": 37915, "total_steps": 38160, "loss": 0.402, "lr": 3.7976168512149356e-06, "epoch": 19.87159329140461, "percentage": 99.36, "elapsed_time": "1:36:31", "remaining_time": "0:00:37", "throughput": 4282.89, "total_tokens": 24803872}
7603
+ {"current_steps": 37920, "total_steps": 38160, "loss": 0.3434, "lr": 3.64481721935761e-06, "epoch": 19.87421383647799, "percentage": 99.37, "elapsed_time": "1:36:32", "remaining_time": "0:00:36", "throughput": 4282.87, "total_tokens": 24806464}
7604
+ {"current_steps": 37925, "total_steps": 38160, "loss": 0.3535, "lr": 3.4951546539185862e-06, "epoch": 19.87683438155136, "percentage": 99.38, "elapsed_time": "1:36:33", "remaining_time": "0:00:35", "throughput": 4283.19, "total_tokens": 24814080}
7605
+ {"current_steps": 37930, "total_steps": 38160, "loss": 0.3598, "lr": 3.3486291862028227e-06, "epoch": 19.87945492662474, "percentage": 99.4, "elapsed_time": "1:36:34", "remaining_time": "0:00:35", "throughput": 4283.21, "total_tokens": 24817248}
7606
+ {"current_steps": 37935, "total_steps": 38160, "loss": 0.2398, "lr": 3.2052408468641324e-06, "epoch": 19.882075471698112, "percentage": 99.41, "elapsed_time": "1:36:34", "remaining_time": "0:00:34", "throughput": 4283.23, "total_tokens": 24820480}
7607
+ {"current_steps": 37940, "total_steps": 38160, "loss": 0.333, "lr": 3.06498966589519e-06, "epoch": 19.88469601677149, "percentage": 99.42, "elapsed_time": "1:36:35", "remaining_time": "0:00:33", "throughput": 4283.22, "total_tokens": 24823200}
7608
+ {"current_steps": 37945, "total_steps": 38160, "loss": 0.2701, "lr": 2.9278756726375255e-06, "epoch": 19.887316561844862, "percentage": 99.44, "elapsed_time": "1:36:36", "remaining_time": "0:00:32", "throughput": 4283.22, "total_tokens": 24826112}
7609
+ {"current_steps": 37950, "total_steps": 38160, "loss": 0.3775, "lr": 2.7938988957715292e-06, "epoch": 19.88993710691824, "percentage": 99.45, "elapsed_time": "1:36:36", "remaining_time": "0:00:32", "throughput": 4283.19, "total_tokens": 24828576}
7610
+ {"current_steps": 37955, "total_steps": 38160, "loss": 0.2581, "lr": 2.6630593633264474e-06, "epoch": 19.892557651991613, "percentage": 99.46, "elapsed_time": "1:36:37", "remaining_time": "0:00:31", "throughput": 4283.28, "total_tokens": 24832672}
7611
+ {"current_steps": 37960, "total_steps": 38160, "loss": 0.3086, "lr": 2.5353571026687227e-06, "epoch": 19.89517819706499, "percentage": 99.48, "elapsed_time": "1:36:38", "remaining_time": "0:00:30", "throughput": 4283.28, "total_tokens": 24835616}
7612
+ {"current_steps": 37965, "total_steps": 38160, "loss": 0.3128, "lr": 2.4107921405153164e-06, "epoch": 19.897798742138363, "percentage": 99.49, "elapsed_time": "1:36:39", "remaining_time": "0:00:29", "throughput": 4283.33, "total_tokens": 24839328}
7613
+ {"current_steps": 37970, "total_steps": 38160, "loss": 0.2833, "lr": 2.289364502922053e-06, "epoch": 19.90041928721174, "percentage": 99.5, "elapsed_time": "1:36:39", "remaining_time": "0:00:29", "throughput": 4283.32, "total_tokens": 24842176}
7614
+ {"current_steps": 37975, "total_steps": 38160, "loss": 0.5084, "lr": 2.171074215291946e-06, "epoch": 19.903039832285117, "percentage": 99.52, "elapsed_time": "1:36:40", "remaining_time": "0:00:28", "throughput": 4283.32, "total_tokens": 24845088}
7615
+ {"current_steps": 37980, "total_steps": 38160, "loss": 0.2654, "lr": 2.055921302368535e-06, "epoch": 19.90566037735849, "percentage": 99.53, "elapsed_time": "1:36:41", "remaining_time": "0:00:27", "throughput": 4283.32, "total_tokens": 24848032}
7616
+ {"current_steps": 37985, "total_steps": 38160, "loss": 0.375, "lr": 1.9439057882392194e-06, "epoch": 19.908280922431867, "percentage": 99.54, "elapsed_time": "1:36:41", "remaining_time": "0:00:26", "throughput": 4283.37, "total_tokens": 24851584}
7617
+ {"current_steps": 37990, "total_steps": 38160, "loss": 0.3886, "lr": 1.8350276963402522e-06, "epoch": 19.91090146750524, "percentage": 99.55, "elapsed_time": "1:36:42", "remaining_time": "0:00:25", "throughput": 4283.37, "total_tokens": 24854464}
7618
+ {"current_steps": 37995, "total_steps": 38160, "loss": 0.3195, "lr": 1.7292870494434176e-06, "epoch": 19.913522012578618, "percentage": 99.57, "elapsed_time": "1:36:43", "remaining_time": "0:00:25", "throughput": 4283.35, "total_tokens": 24857152}
7619
+ {"current_steps": 38000, "total_steps": 38160, "loss": 0.3878, "lr": 1.6266838696710196e-06, "epoch": 19.91614255765199, "percentage": 99.58, "elapsed_time": "1:36:43", "remaining_time": "0:00:24", "throughput": 4283.38, "total_tokens": 24860480}
7620
+ {"current_steps": 38005, "total_steps": 38160, "loss": 0.3263, "lr": 1.5272181784858895e-06, "epoch": 19.918763102725368, "percentage": 99.59, "elapsed_time": "1:36:44", "remaining_time": "0:00:23", "throughput": 4283.39, "total_tokens": 24863680}
7621
+ {"current_steps": 38010, "total_steps": 38160, "loss": 0.3998, "lr": 1.4308899966963827e-06, "epoch": 19.92138364779874, "percentage": 99.61, "elapsed_time": "1:36:45", "remaining_time": "0:00:22", "throughput": 4283.42, "total_tokens": 24866976}
7622
+ {"current_steps": 38015, "total_steps": 38160, "loss": 0.3332, "lr": 1.3376993444497165e-06, "epoch": 19.92400419287212, "percentage": 99.62, "elapsed_time": "1:36:46", "remaining_time": "0:00:22", "throughput": 4283.45, "total_tokens": 24870240}
7623
+ {"current_steps": 38020, "total_steps": 38160, "loss": 0.5161, "lr": 1.2476462412436273e-06, "epoch": 19.92662473794549, "percentage": 99.63, "elapsed_time": "1:36:46", "remaining_time": "0:00:21", "throughput": 4283.55, "total_tokens": 24874400}
7624
+ {"current_steps": 38025, "total_steps": 38160, "loss": 0.3837, "lr": 1.1607307059163796e-06, "epoch": 19.92924528301887, "percentage": 99.65, "elapsed_time": "1:36:47", "remaining_time": "0:00:20", "throughput": 4283.64, "total_tokens": 24878368}
7625
+ {"current_steps": 38030, "total_steps": 38160, "loss": 0.2671, "lr": 1.076952756646765e-06, "epoch": 19.931865828092242, "percentage": 99.66, "elapsed_time": "1:36:48", "remaining_time": "0:00:19", "throughput": 4283.63, "total_tokens": 24881184}
7626
+ {"current_steps": 38035, "total_steps": 38160, "loss": 0.3162, "lr": 9.9631241096243e-07, "epoch": 19.93448637316562, "percentage": 99.67, "elapsed_time": "1:36:49", "remaining_time": "0:00:19", "throughput": 4283.67, "total_tokens": 24884704}
7627
+ {"current_steps": 38040, "total_steps": 38160, "loss": 0.4664, "lr": 9.188096857315475e-07, "epoch": 19.937106918238992, "percentage": 99.69, "elapsed_time": "1:36:49", "remaining_time": "0:00:18", "throughput": 4283.66, "total_tokens": 24887456}
7628
+ {"current_steps": 38045, "total_steps": 38160, "loss": 0.3454, "lr": 8.444445971678149e-07, "epoch": 19.93972746331237, "percentage": 99.7, "elapsed_time": "1:36:50", "remaining_time": "0:00:17", "throughput": 4283.68, "total_tokens": 24890592}
7629
+ {"current_steps": 38050, "total_steps": 38160, "loss": 0.3421, "lr": 7.732171608271221e-07, "epoch": 19.942348008385743, "percentage": 99.71, "elapsed_time": "1:36:51", "remaining_time": "0:00:16", "throughput": 4283.71, "total_tokens": 24893856}
7630
+ {"current_steps": 38055, "total_steps": 38160, "loss": 0.2343, "lr": 7.051273916075517e-07, "epoch": 19.94496855345912, "percentage": 99.72, "elapsed_time": "1:36:51", "remaining_time": "0:00:16", "throughput": 4283.69, "total_tokens": 24896544}
7631
+ {"current_steps": 38060, "total_steps": 38160, "loss": 0.243, "lr": 6.401753037560409e-07, "epoch": 19.947589098532493, "percentage": 99.74, "elapsed_time": "1:36:52", "remaining_time": "0:00:15", "throughput": 4283.64, "total_tokens": 24898848}
7632
+ {"current_steps": 38065, "total_steps": 38160, "loss": 0.2371, "lr": 5.783609108567233e-07, "epoch": 19.95020964360587, "percentage": 99.75, "elapsed_time": "1:36:53", "remaining_time": "0:00:14", "throughput": 4283.62, "total_tokens": 24901472}
7633
+ {"current_steps": 38070, "total_steps": 38160, "loss": 0.3038, "lr": 5.196842258425871e-07, "epoch": 19.952830188679247, "percentage": 99.76, "elapsed_time": "1:36:53", "remaining_time": "0:00:13", "throughput": 4283.65, "total_tokens": 24904768}
7634
+ {"current_steps": 38075, "total_steps": 38160, "loss": 0.3608, "lr": 4.641452609871477e-07, "epoch": 19.95545073375262, "percentage": 99.78, "elapsed_time": "1:36:54", "remaining_time": "0:00:12", "throughput": 4283.72, "total_tokens": 24908512}
7635
+ {"current_steps": 38080, "total_steps": 38160, "loss": 0.4481, "lr": 4.117440279094442e-07, "epoch": 19.958071278825997, "percentage": 99.79, "elapsed_time": "1:36:55", "remaining_time": "0:00:12", "throughput": 4283.73, "total_tokens": 24911488}
7636
+ {"current_steps": 38085, "total_steps": 38160, "loss": 0.3307, "lr": 3.624805375690432e-07, "epoch": 19.96069182389937, "percentage": 99.8, "elapsed_time": "1:36:56", "remaining_time": "0:00:11", "throughput": 4283.75, "total_tokens": 24914624}
7637
+ {"current_steps": 38090, "total_steps": 38160, "loss": 0.3636, "lr": 3.1635480027436545e-07, "epoch": 19.963312368972748, "percentage": 99.82, "elapsed_time": "1:36:56", "remaining_time": "0:00:10", "throughput": 4283.76, "total_tokens": 24917632}
7638
+ {"current_steps": 38095, "total_steps": 38160, "loss": 0.3551, "lr": 2.7336682567269397e-07, "epoch": 19.96593291404612, "percentage": 99.83, "elapsed_time": "1:36:57", "remaining_time": "0:00:09", "throughput": 4283.78, "total_tokens": 24920768}
7639
+ {"current_steps": 38100, "total_steps": 38160, "loss": 0.4413, "lr": 2.335166227551699e-07, "epoch": 19.968553459119498, "percentage": 99.84, "elapsed_time": "1:36:58", "remaining_time": "0:00:09", "throughput": 4283.79, "total_tokens": 24923936}
7640
+ {"current_steps": 38105, "total_steps": 38160, "loss": 0.2632, "lr": 1.9680419986178866e-07, "epoch": 19.97117400419287, "percentage": 99.86, "elapsed_time": "1:36:58", "remaining_time": "0:00:08", "throughput": 4283.84, "total_tokens": 24927424}
7641
+ {"current_steps": 38110, "total_steps": 38160, "loss": 0.3832, "lr": 1.6322956466807703e-07, "epoch": 19.97379454926625, "percentage": 99.87, "elapsed_time": "1:36:59", "remaining_time": "0:00:07", "throughput": 4283.83, "total_tokens": 24930080}
7642
+ {"current_steps": 38115, "total_steps": 38160, "loss": 0.3409, "lr": 1.327927242017468e-07, "epoch": 19.97641509433962, "percentage": 99.88, "elapsed_time": "1:37:00", "remaining_time": "0:00:06", "throughput": 4283.87, "total_tokens": 24933664}
7643
+ {"current_steps": 38120, "total_steps": 38160, "loss": 0.3697, "lr": 1.0549368482604126e-07, "epoch": 19.979035639413, "percentage": 99.9, "elapsed_time": "1:37:01", "remaining_time": "0:00:06", "throughput": 4283.89, "total_tokens": 24936800}
7644
+ {"current_steps": 38125, "total_steps": 38160, "loss": 0.4071, "lr": 8.133245225305784e-08, "epoch": 19.981656184486372, "percentage": 99.91, "elapsed_time": "1:37:01", "remaining_time": "0:00:05", "throughput": 4283.93, "total_tokens": 24940352}
7645
+ {"current_steps": 38130, "total_steps": 38160, "loss": 0.3685, "lr": 6.030903153875222e-08, "epoch": 19.98427672955975, "percentage": 99.92, "elapsed_time": "1:37:02", "remaining_time": "0:00:04", "throughput": 4283.97, "total_tokens": 24943808}
7646
+ {"current_steps": 38135, "total_steps": 38160, "loss": 0.3298, "lr": 4.242342707794222e-08, "epoch": 19.986897274633122, "percentage": 99.93, "elapsed_time": "1:37:03", "remaining_time": "0:00:03", "throughput": 4283.94, "total_tokens": 24946304}
7647
+ {"current_steps": 38140, "total_steps": 38160, "loss": 0.3569, "lr": 2.767564261429989e-08, "epoch": 19.9895178197065, "percentage": 99.95, "elapsed_time": "1:37:03", "remaining_time": "0:00:03", "throughput": 4283.95, "total_tokens": 24949440}
7648
+ {"current_steps": 38145, "total_steps": 38160, "loss": 0.3058, "lr": 1.6065681232024784e-08, "epoch": 19.992138364779873, "percentage": 99.96, "elapsed_time": "1:37:04", "remaining_time": "0:00:02", "throughput": 4284.02, "total_tokens": 24953376}
7649
+ {"current_steps": 38150, "total_steps": 38160, "loss": 0.3233, "lr": 7.593545359174669e-09, "epoch": 19.99475890985325, "percentage": 99.97, "elapsed_time": "1:37:05", "remaining_time": "0:00:01", "throughput": 4284.15, "total_tokens": 24957984}
7650
+ {"current_steps": 38155, "total_steps": 38160, "loss": 0.2959, "lr": 2.259236769330819e-09, "epoch": 19.997379454926623, "percentage": 99.99, "elapsed_time": "1:37:06", "remaining_time": "0:00:00", "throughput": 4284.15, "total_tokens": 24960832}
7651
+ {"current_steps": 38160, "total_steps": 38160, "loss": 0.4804, "lr": 6.275657826737557e-11, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "1:37:07", "remaining_time": "0:00:00", "throughput": 4284.13, "total_tokens": 24964664}
7652
+ {"current_steps": 38160, "total_steps": 38160, "eval_loss": 0.5405035018920898, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "1:37:20", "remaining_time": "0:00:00", "throughput": 4274.14, "total_tokens": 24964664}
7653
+ {"current_steps": 38160, "total_steps": 38160, "epoch": 20.0, "percentage": 100.0, "elapsed_time": "1:37:22", "remaining_time": "0:00:00", "throughput": 4273.06, "total_tokens": 24964664}