rbelanec commited on
Commit
6cb7394
·
verified ·
1 Parent(s): 3e11bfb

Training in progress, step 6696

Browse files
Files changed (2) hide show
  1. adapter_model.safetensors +1 -1
  2. trainer_log.jsonl +225 -0
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2a4142e7cc12df8eaa0538d0084b6d8e846b1544bbe2014d8210e8bd9ca510fa
3
  size 1638528
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed03ff9b1bbe6caca197c3750a59a879fcd545c9d5d7e8d407e7faf4e7fd2055
3
  size 1638528
trainer_log.jsonl CHANGED
@@ -1121,3 +1121,228 @@
1121
  {"current_steps": 5580, "total_steps": 22320, "eval_loss": 0.6111505627632141, "epoch": 5.0, "percentage": 25.0, "elapsed_time": "0:17:42", "remaining_time": "0:53:08", "throughput": 2000.64, "total_tokens": 2126656}
1122
  {"current_steps": 5585, "total_steps": 22320, "loss": 0.6584, "lr": 0.0009328562256663351, "epoch": 5.004480286738351, "percentage": 25.02, "elapsed_time": "0:17:44", "remaining_time": "0:53:10", "throughput": 1998.91, "total_tokens": 2128416}
1123
  {"current_steps": 5590, "total_steps": 22320, "loss": 0.6561, "lr": 0.0009326603921852599, "epoch": 5.008960573476703, "percentage": 25.04, "elapsed_time": "0:17:45", "remaining_time": "0:53:09", "throughput": 1998.98, "total_tokens": 2130304}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1121
  {"current_steps": 5580, "total_steps": 22320, "eval_loss": 0.6111505627632141, "epoch": 5.0, "percentage": 25.0, "elapsed_time": "0:17:42", "remaining_time": "0:53:08", "throughput": 2000.64, "total_tokens": 2126656}
1122
  {"current_steps": 5585, "total_steps": 22320, "loss": 0.6584, "lr": 0.0009328562256663351, "epoch": 5.004480286738351, "percentage": 25.02, "elapsed_time": "0:17:44", "remaining_time": "0:53:10", "throughput": 1998.91, "total_tokens": 2128416}
1123
  {"current_steps": 5590, "total_steps": 22320, "loss": 0.6561, "lr": 0.0009326603921852599, "epoch": 5.008960573476703, "percentage": 25.04, "elapsed_time": "0:17:45", "remaining_time": "0:53:09", "throughput": 1998.98, "total_tokens": 2130304}
1124
+ {"current_steps": 5595, "total_steps": 22320, "loss": 0.5693, "lr": 0.0009324642941507087, "epoch": 5.013440860215054, "percentage": 25.07, "elapsed_time": "0:17:46", "remaining_time": "0:53:08", "throughput": 1999.07, "total_tokens": 2132032}
1125
+ {"current_steps": 5600, "total_steps": 22320, "loss": 0.5846, "lr": 0.0009322679316825871, "epoch": 5.017921146953405, "percentage": 25.09, "elapsed_time": "0:17:47", "remaining_time": "0:53:07", "throughput": 1999.11, "total_tokens": 2133952}
1126
+ {"current_steps": 5605, "total_steps": 22320, "loss": 0.5499, "lr": 0.0009320713049009624, "epoch": 5.022401433691757, "percentage": 25.11, "elapsed_time": "0:17:48", "remaining_time": "0:53:05", "throughput": 1999.18, "total_tokens": 2135776}
1127
+ {"current_steps": 5610, "total_steps": 22320, "loss": 0.5912, "lr": 0.0009318744139260635, "epoch": 5.026881720430108, "percentage": 25.13, "elapsed_time": "0:17:49", "remaining_time": "0:53:04", "throughput": 1999.22, "total_tokens": 2137632}
1128
+ {"current_steps": 5615, "total_steps": 22320, "loss": 0.4714, "lr": 0.0009316772588782811, "epoch": 5.031362007168458, "percentage": 25.16, "elapsed_time": "0:17:50", "remaining_time": "0:53:03", "throughput": 1999.24, "total_tokens": 2139392}
1129
+ {"current_steps": 5620, "total_steps": 22320, "loss": 0.564, "lr": 0.000931479839878167, "epoch": 5.03584229390681, "percentage": 25.18, "elapsed_time": "0:17:51", "remaining_time": "0:53:02", "throughput": 1999.33, "total_tokens": 2141312}
1130
+ {"current_steps": 5625, "total_steps": 22320, "loss": 0.4819, "lr": 0.0009312821570464347, "epoch": 5.040322580645161, "percentage": 25.2, "elapsed_time": "0:17:51", "remaining_time": "0:53:01", "throughput": 1999.4, "total_tokens": 2143200}
1131
+ {"current_steps": 5630, "total_steps": 22320, "loss": 0.5162, "lr": 0.0009310842105039587, "epoch": 5.044802867383512, "percentage": 25.22, "elapsed_time": "0:17:52", "remaining_time": "0:53:00", "throughput": 1999.55, "total_tokens": 2145280}
1132
+ {"current_steps": 5635, "total_steps": 22320, "loss": 0.6686, "lr": 0.0009308860003717749, "epoch": 5.049283154121864, "percentage": 25.25, "elapsed_time": "0:17:53", "remaining_time": "0:52:59", "throughput": 1999.61, "total_tokens": 2147168}
1133
+ {"current_steps": 5640, "total_steps": 22320, "loss": 0.7079, "lr": 0.0009306875267710803, "epoch": 5.053763440860215, "percentage": 25.27, "elapsed_time": "0:17:54", "remaining_time": "0:52:58", "throughput": 1999.73, "total_tokens": 2149184}
1134
+ {"current_steps": 5645, "total_steps": 22320, "loss": 0.5841, "lr": 0.0009304887898232333, "epoch": 5.058243727598566, "percentage": 25.29, "elapsed_time": "0:17:55", "remaining_time": "0:52:57", "throughput": 1999.83, "total_tokens": 2151168}
1135
+ {"current_steps": 5650, "total_steps": 22320, "loss": 0.6008, "lr": 0.0009302897896497532, "epoch": 5.062724014336918, "percentage": 25.31, "elapsed_time": "0:17:56", "remaining_time": "0:52:56", "throughput": 1999.95, "total_tokens": 2153216}
1136
+ {"current_steps": 5655, "total_steps": 22320, "loss": 0.5535, "lr": 0.0009300905263723198, "epoch": 5.067204301075269, "percentage": 25.34, "elapsed_time": "0:17:57", "remaining_time": "0:52:55", "throughput": 2000.13, "total_tokens": 2155264}
1137
+ {"current_steps": 5660, "total_steps": 22320, "loss": 0.6116, "lr": 0.0009298910001127744, "epoch": 5.07168458781362, "percentage": 25.36, "elapsed_time": "0:17:58", "remaining_time": "0:52:54", "throughput": 2000.17, "total_tokens": 2157056}
1138
+ {"current_steps": 5665, "total_steps": 22320, "loss": 0.6062, "lr": 0.0009296912109931188, "epoch": 5.076164874551972, "percentage": 25.38, "elapsed_time": "0:17:59", "remaining_time": "0:52:53", "throughput": 2000.23, "total_tokens": 2158880}
1139
+ {"current_steps": 5670, "total_steps": 22320, "loss": 0.5473, "lr": 0.0009294911591355156, "epoch": 5.080645161290323, "percentage": 25.4, "elapsed_time": "0:18:00", "remaining_time": "0:52:52", "throughput": 2000.3, "total_tokens": 2160768}
1140
+ {"current_steps": 5675, "total_steps": 22320, "loss": 0.5099, "lr": 0.0009292908446622881, "epoch": 5.085125448028673, "percentage": 25.43, "elapsed_time": "0:18:01", "remaining_time": "0:52:50", "throughput": 2000.31, "total_tokens": 2162528}
1141
+ {"current_steps": 5680, "total_steps": 22320, "loss": 0.5118, "lr": 0.0009290902676959197, "epoch": 5.089605734767025, "percentage": 25.45, "elapsed_time": "0:18:02", "remaining_time": "0:52:49", "throughput": 2000.46, "total_tokens": 2164544}
1142
+ {"current_steps": 5685, "total_steps": 22320, "loss": 0.5328, "lr": 0.0009288894283590552, "epoch": 5.094086021505376, "percentage": 25.47, "elapsed_time": "0:18:02", "remaining_time": "0:52:48", "throughput": 2000.58, "total_tokens": 2166560}
1143
+ {"current_steps": 5690, "total_steps": 22320, "loss": 0.6238, "lr": 0.000928688326774499, "epoch": 5.098566308243727, "percentage": 25.49, "elapsed_time": "0:18:03", "remaining_time": "0:52:47", "throughput": 2000.65, "total_tokens": 2168448}
1144
+ {"current_steps": 5695, "total_steps": 22320, "loss": 0.5527, "lr": 0.0009284869630652164, "epoch": 5.103046594982079, "percentage": 25.52, "elapsed_time": "0:18:04", "remaining_time": "0:52:46", "throughput": 2000.76, "total_tokens": 2170336}
1145
+ {"current_steps": 5700, "total_steps": 22320, "loss": 0.5412, "lr": 0.0009282853373543326, "epoch": 5.10752688172043, "percentage": 25.54, "elapsed_time": "0:18:05", "remaining_time": "0:52:45", "throughput": 2000.8, "total_tokens": 2172128}
1146
+ {"current_steps": 5705, "total_steps": 22320, "loss": 0.5753, "lr": 0.0009280834497651332, "epoch": 5.112007168458781, "percentage": 25.56, "elapsed_time": "0:18:06", "remaining_time": "0:52:44", "throughput": 2000.99, "total_tokens": 2174176}
1147
+ {"current_steps": 5710, "total_steps": 22320, "loss": 0.6867, "lr": 0.000927881300421064, "epoch": 5.116487455197133, "percentage": 25.58, "elapsed_time": "0:18:07", "remaining_time": "0:52:43", "throughput": 2001.01, "total_tokens": 2175936}
1148
+ {"current_steps": 5715, "total_steps": 22320, "loss": 0.4554, "lr": 0.0009276788894457309, "epoch": 5.120967741935484, "percentage": 25.6, "elapsed_time": "0:18:08", "remaining_time": "0:52:42", "throughput": 2001.07, "total_tokens": 2177760}
1149
+ {"current_steps": 5720, "total_steps": 22320, "loss": 0.5557, "lr": 0.0009274762169628991, "epoch": 5.125448028673835, "percentage": 25.63, "elapsed_time": "0:18:09", "remaining_time": "0:52:40", "throughput": 2001.19, "total_tokens": 2179712}
1150
+ {"current_steps": 5725, "total_steps": 22320, "loss": 0.6007, "lr": 0.0009272732830964948, "epoch": 5.129928315412187, "percentage": 25.65, "elapsed_time": "0:18:10", "remaining_time": "0:52:39", "throughput": 2001.23, "total_tokens": 2181568}
1151
+ {"current_steps": 5730, "total_steps": 22320, "loss": 0.5402, "lr": 0.000927070087970603, "epoch": 5.134408602150538, "percentage": 25.67, "elapsed_time": "0:18:11", "remaining_time": "0:52:38", "throughput": 2001.29, "total_tokens": 2183456}
1152
+ {"current_steps": 5735, "total_steps": 22320, "loss": 0.5426, "lr": 0.0009268666317094692, "epoch": 5.138888888888889, "percentage": 25.69, "elapsed_time": "0:18:11", "remaining_time": "0:52:37", "throughput": 2001.38, "total_tokens": 2185312}
1153
+ {"current_steps": 5740, "total_steps": 22320, "loss": 0.4761, "lr": 0.000926662914437498, "epoch": 5.14336917562724, "percentage": 25.72, "elapsed_time": "0:18:12", "remaining_time": "0:52:36", "throughput": 2001.47, "total_tokens": 2187232}
1154
+ {"current_steps": 5745, "total_steps": 22320, "loss": 0.5939, "lr": 0.0009264589362792543, "epoch": 5.147849462365591, "percentage": 25.74, "elapsed_time": "0:18:13", "remaining_time": "0:52:35", "throughput": 2001.61, "total_tokens": 2189280}
1155
+ {"current_steps": 5750, "total_steps": 22320, "loss": 0.5147, "lr": 0.0009262546973594617, "epoch": 5.152329749103942, "percentage": 25.76, "elapsed_time": "0:18:14", "remaining_time": "0:52:34", "throughput": 2001.68, "total_tokens": 2191168}
1156
+ {"current_steps": 5755, "total_steps": 22320, "loss": 0.5272, "lr": 0.0009260501978030038, "epoch": 5.156810035842294, "percentage": 25.78, "elapsed_time": "0:18:15", "remaining_time": "0:52:33", "throughput": 2001.79, "total_tokens": 2192992}
1157
+ {"current_steps": 5760, "total_steps": 22320, "loss": 0.5273, "lr": 0.0009258454377349233, "epoch": 5.161290322580645, "percentage": 25.81, "elapsed_time": "0:18:16", "remaining_time": "0:52:32", "throughput": 2001.86, "total_tokens": 2194816}
1158
+ {"current_steps": 5765, "total_steps": 22320, "loss": 0.6803, "lr": 0.0009256404172804224, "epoch": 5.165770609318996, "percentage": 25.83, "elapsed_time": "0:18:17", "remaining_time": "0:52:31", "throughput": 2001.93, "total_tokens": 2196768}
1159
+ {"current_steps": 5770, "total_steps": 22320, "loss": 0.663, "lr": 0.0009254351365648623, "epoch": 5.170250896057348, "percentage": 25.85, "elapsed_time": "0:18:18", "remaining_time": "0:52:30", "throughput": 2002.0, "total_tokens": 2198656}
1160
+ {"current_steps": 5775, "total_steps": 22320, "loss": 0.5354, "lr": 0.0009252295957137637, "epoch": 5.174731182795699, "percentage": 25.87, "elapsed_time": "0:18:19", "remaining_time": "0:52:28", "throughput": 2002.04, "total_tokens": 2200512}
1161
+ {"current_steps": 5780, "total_steps": 22320, "loss": 0.5608, "lr": 0.0009250237948528057, "epoch": 5.17921146953405, "percentage": 25.9, "elapsed_time": "0:18:20", "remaining_time": "0:52:27", "throughput": 2002.11, "total_tokens": 2202400}
1162
+ {"current_steps": 5785, "total_steps": 22320, "loss": 0.5331, "lr": 0.0009248177341078272, "epoch": 5.183691756272402, "percentage": 25.92, "elapsed_time": "0:18:20", "remaining_time": "0:52:26", "throughput": 2002.16, "total_tokens": 2204256}
1163
+ {"current_steps": 5790, "total_steps": 22320, "loss": 0.6531, "lr": 0.0009246114136048254, "epoch": 5.188172043010753, "percentage": 25.94, "elapsed_time": "0:18:21", "remaining_time": "0:52:25", "throughput": 2002.23, "total_tokens": 2206144}
1164
+ {"current_steps": 5795, "total_steps": 22320, "loss": 0.5665, "lr": 0.0009244048334699567, "epoch": 5.192652329749104, "percentage": 25.96, "elapsed_time": "0:18:22", "remaining_time": "0:52:24", "throughput": 2002.36, "total_tokens": 2208256}
1165
+ {"current_steps": 5800, "total_steps": 22320, "loss": 0.5843, "lr": 0.0009241979938295358, "epoch": 5.197132616487456, "percentage": 25.99, "elapsed_time": "0:18:23", "remaining_time": "0:52:23", "throughput": 2002.43, "total_tokens": 2210080}
1166
+ {"current_steps": 5805, "total_steps": 22320, "loss": 0.5066, "lr": 0.0009239908948100364, "epoch": 5.201612903225806, "percentage": 26.01, "elapsed_time": "0:18:24", "remaining_time": "0:52:22", "throughput": 2002.44, "total_tokens": 2211904}
1167
+ {"current_steps": 5810, "total_steps": 22320, "loss": 0.5934, "lr": 0.0009237835365380912, "epoch": 5.206093189964157, "percentage": 26.03, "elapsed_time": "0:18:25", "remaining_time": "0:52:21", "throughput": 2002.49, "total_tokens": 2213760}
1168
+ {"current_steps": 5815, "total_steps": 22320, "loss": 0.478, "lr": 0.0009235759191404904, "epoch": 5.210573476702509, "percentage": 26.05, "elapsed_time": "0:18:26", "remaining_time": "0:52:20", "throughput": 2002.6, "total_tokens": 2215648}
1169
+ {"current_steps": 5820, "total_steps": 22320, "loss": 0.5445, "lr": 0.0009233680427441836, "epoch": 5.21505376344086, "percentage": 26.08, "elapsed_time": "0:18:27", "remaining_time": "0:52:19", "throughput": 2002.67, "total_tokens": 2217408}
1170
+ {"current_steps": 5825, "total_steps": 22320, "loss": 0.6139, "lr": 0.0009231599074762784, "epoch": 5.219534050179211, "percentage": 26.1, "elapsed_time": "0:18:28", "remaining_time": "0:52:18", "throughput": 2002.74, "total_tokens": 2219360}
1171
+ {"current_steps": 5830, "total_steps": 22320, "loss": 0.4555, "lr": 0.0009229515134640405, "epoch": 5.224014336917563, "percentage": 26.12, "elapsed_time": "0:18:29", "remaining_time": "0:52:16", "throughput": 2002.8, "total_tokens": 2221120}
1172
+ {"current_steps": 5835, "total_steps": 22320, "loss": 0.5687, "lr": 0.000922742860834894, "epoch": 5.228494623655914, "percentage": 26.14, "elapsed_time": "0:18:29", "remaining_time": "0:52:15", "throughput": 2002.87, "total_tokens": 2223072}
1173
+ {"current_steps": 5840, "total_steps": 22320, "loss": 0.507, "lr": 0.0009225339497164214, "epoch": 5.232974910394265, "percentage": 26.16, "elapsed_time": "0:18:30", "remaining_time": "0:52:14", "throughput": 2003.04, "total_tokens": 2225184}
1174
+ {"current_steps": 5845, "total_steps": 22320, "loss": 0.5913, "lr": 0.0009223247802363628, "epoch": 5.237455197132617, "percentage": 26.19, "elapsed_time": "0:18:31", "remaining_time": "0:52:13", "throughput": 2003.14, "total_tokens": 2227136}
1175
+ {"current_steps": 5850, "total_steps": 22320, "loss": 0.5344, "lr": 0.0009221153525226164, "epoch": 5.241935483870968, "percentage": 26.21, "elapsed_time": "0:18:32", "remaining_time": "0:52:12", "throughput": 2003.18, "total_tokens": 2228928}
1176
+ {"current_steps": 5855, "total_steps": 22320, "loss": 0.5172, "lr": 0.0009219056667032384, "epoch": 5.246415770609319, "percentage": 26.23, "elapsed_time": "0:18:33", "remaining_time": "0:52:11", "throughput": 2003.3, "total_tokens": 2230976}
1177
+ {"current_steps": 5860, "total_steps": 22320, "loss": 0.6204, "lr": 0.0009216957229064429, "epoch": 5.250896057347671, "percentage": 26.25, "elapsed_time": "0:18:34", "remaining_time": "0:52:10", "throughput": 2003.42, "total_tokens": 2232960}
1178
+ {"current_steps": 5865, "total_steps": 22320, "loss": 0.4276, "lr": 0.0009214855212606015, "epoch": 5.255376344086022, "percentage": 26.28, "elapsed_time": "0:18:35", "remaining_time": "0:52:09", "throughput": 2003.49, "total_tokens": 2234848}
1179
+ {"current_steps": 5870, "total_steps": 22320, "loss": 0.5381, "lr": 0.0009212750618942436, "epoch": 5.259856630824372, "percentage": 26.3, "elapsed_time": "0:18:36", "remaining_time": "0:52:08", "throughput": 2003.61, "total_tokens": 2236736}
1180
+ {"current_steps": 5875, "total_steps": 22320, "loss": 0.7751, "lr": 0.0009210643449360563, "epoch": 5.264336917562724, "percentage": 26.32, "elapsed_time": "0:18:37", "remaining_time": "0:52:07", "throughput": 2003.7, "total_tokens": 2238592}
1181
+ {"current_steps": 5880, "total_steps": 22320, "loss": 0.5327, "lr": 0.000920853370514884, "epoch": 5.268817204301075, "percentage": 26.34, "elapsed_time": "0:18:38", "remaining_time": "0:52:06", "throughput": 2003.73, "total_tokens": 2240384}
1182
+ {"current_steps": 5885, "total_steps": 22320, "loss": 0.5629, "lr": 0.0009206421387597286, "epoch": 5.273297491039426, "percentage": 26.37, "elapsed_time": "0:18:39", "remaining_time": "0:52:05", "throughput": 2003.85, "total_tokens": 2242336}
1183
+ {"current_steps": 5890, "total_steps": 22320, "loss": 0.6514, "lr": 0.0009204306497997492, "epoch": 5.277777777777778, "percentage": 26.39, "elapsed_time": "0:18:39", "remaining_time": "0:52:04", "throughput": 2003.96, "total_tokens": 2244288}
1184
+ {"current_steps": 5895, "total_steps": 22320, "loss": 0.5809, "lr": 0.0009202189037642625, "epoch": 5.282258064516129, "percentage": 26.41, "elapsed_time": "0:18:40", "remaining_time": "0:52:02", "throughput": 2004.03, "total_tokens": 2246176}
1185
+ {"current_steps": 5900, "total_steps": 22320, "loss": 0.6584, "lr": 0.0009200069007827424, "epoch": 5.28673835125448, "percentage": 26.43, "elapsed_time": "0:18:41", "remaining_time": "0:52:01", "throughput": 2004.12, "total_tokens": 2248032}
1186
+ {"current_steps": 5905, "total_steps": 22320, "loss": 0.548, "lr": 0.0009197946409848195, "epoch": 5.291218637992832, "percentage": 26.46, "elapsed_time": "0:18:42", "remaining_time": "0:52:00", "throughput": 2004.16, "total_tokens": 2249952}
1187
+ {"current_steps": 5910, "total_steps": 22320, "loss": 0.6614, "lr": 0.0009195821245002816, "epoch": 5.295698924731183, "percentage": 26.48, "elapsed_time": "0:18:43", "remaining_time": "0:51:59", "throughput": 2004.27, "total_tokens": 2251840}
1188
+ {"current_steps": 5915, "total_steps": 22320, "loss": 0.5563, "lr": 0.0009193693514590736, "epoch": 5.300179211469534, "percentage": 26.5, "elapsed_time": "0:18:44", "remaining_time": "0:51:58", "throughput": 2004.31, "total_tokens": 2253632}
1189
+ {"current_steps": 5920, "total_steps": 22320, "loss": 0.4957, "lr": 0.0009191563219912973, "epoch": 5.304659498207886, "percentage": 26.52, "elapsed_time": "0:18:45", "remaining_time": "0:51:57", "throughput": 2004.42, "total_tokens": 2255584}
1190
+ {"current_steps": 5925, "total_steps": 22320, "loss": 0.6065, "lr": 0.000918943036227211, "epoch": 5.309139784946237, "percentage": 26.55, "elapsed_time": "0:18:46", "remaining_time": "0:51:56", "throughput": 2004.52, "total_tokens": 2257504}
1191
+ {"current_steps": 5930, "total_steps": 22320, "loss": 0.5179, "lr": 0.00091872949429723, "epoch": 5.313620071684587, "percentage": 26.57, "elapsed_time": "0:18:47", "remaining_time": "0:51:55", "throughput": 2004.57, "total_tokens": 2259392}
1192
+ {"current_steps": 5935, "total_steps": 22320, "loss": 0.5454, "lr": 0.0009185156963319261, "epoch": 5.318100358422939, "percentage": 26.59, "elapsed_time": "0:18:48", "remaining_time": "0:51:54", "throughput": 2004.64, "total_tokens": 2261408}
1193
+ {"current_steps": 5940, "total_steps": 22320, "loss": 0.645, "lr": 0.0009183016424620276, "epoch": 5.32258064516129, "percentage": 26.61, "elapsed_time": "0:18:48", "remaining_time": "0:51:53", "throughput": 2004.7, "total_tokens": 2263296}
1194
+ {"current_steps": 5945, "total_steps": 22320, "loss": 0.5136, "lr": 0.0009180873328184196, "epoch": 5.327060931899641, "percentage": 26.64, "elapsed_time": "0:18:49", "remaining_time": "0:51:52", "throughput": 2004.74, "total_tokens": 2265024}
1195
+ {"current_steps": 5950, "total_steps": 22320, "loss": 0.6691, "lr": 0.0009178727675321432, "epoch": 5.331541218637993, "percentage": 26.66, "elapsed_time": "0:18:50", "remaining_time": "0:51:51", "throughput": 2004.88, "total_tokens": 2267072}
1196
+ {"current_steps": 5955, "total_steps": 22320, "loss": 0.5742, "lr": 0.000917657946734396, "epoch": 5.336021505376344, "percentage": 26.68, "elapsed_time": "0:18:51", "remaining_time": "0:51:49", "throughput": 2004.94, "total_tokens": 2268960}
1197
+ {"current_steps": 5960, "total_steps": 22320, "loss": 0.6519, "lr": 0.0009174428705565318, "epoch": 5.340501792114695, "percentage": 26.7, "elapsed_time": "0:18:52", "remaining_time": "0:51:48", "throughput": 2005.03, "total_tokens": 2270880}
1198
+ {"current_steps": 5965, "total_steps": 22320, "loss": 0.5159, "lr": 0.0009172275391300604, "epoch": 5.344982078853047, "percentage": 26.72, "elapsed_time": "0:18:53", "remaining_time": "0:51:47", "throughput": 2005.02, "total_tokens": 2272672}
1199
+ {"current_steps": 5970, "total_steps": 22320, "loss": 0.6516, "lr": 0.000917011952586648, "epoch": 5.349462365591398, "percentage": 26.75, "elapsed_time": "0:18:54", "remaining_time": "0:51:46", "throughput": 2005.13, "total_tokens": 2274688}
1200
+ {"current_steps": 5975, "total_steps": 22320, "loss": 0.5377, "lr": 0.0009167961110581168, "epoch": 5.353942652329749, "percentage": 26.77, "elapsed_time": "0:18:55", "remaining_time": "0:51:45", "throughput": 2005.24, "total_tokens": 2276640}
1201
+ {"current_steps": 5980, "total_steps": 22320, "loss": 0.5377, "lr": 0.0009165800146764445, "epoch": 5.358422939068101, "percentage": 26.79, "elapsed_time": "0:18:56", "remaining_time": "0:51:44", "throughput": 2005.3, "total_tokens": 2278592}
1202
+ {"current_steps": 5985, "total_steps": 22320, "loss": 0.5905, "lr": 0.0009163636635737647, "epoch": 5.362903225806452, "percentage": 26.81, "elapsed_time": "0:18:57", "remaining_time": "0:51:43", "throughput": 2005.32, "total_tokens": 2280416}
1203
+ {"current_steps": 5990, "total_steps": 22320, "loss": 0.5351, "lr": 0.0009161470578823674, "epoch": 5.367383512544803, "percentage": 26.84, "elapsed_time": "0:18:58", "remaining_time": "0:51:42", "throughput": 2005.41, "total_tokens": 2282336}
1204
+ {"current_steps": 5995, "total_steps": 22320, "loss": 0.6138, "lr": 0.0009159301977346975, "epoch": 5.371863799283154, "percentage": 26.86, "elapsed_time": "0:18:58", "remaining_time": "0:51:41", "throughput": 2005.5, "total_tokens": 2284224}
1205
+ {"current_steps": 6000, "total_steps": 22320, "loss": 0.4187, "lr": 0.0009157130832633556, "epoch": 5.376344086021505, "percentage": 26.88, "elapsed_time": "0:18:59", "remaining_time": "0:51:40", "throughput": 2005.56, "total_tokens": 2286112}
1206
+ {"current_steps": 6005, "total_steps": 22320, "loss": 0.5738, "lr": 0.0009154957146010982, "epoch": 5.380824372759856, "percentage": 26.9, "elapsed_time": "0:19:00", "remaining_time": "0:51:39", "throughput": 2005.62, "total_tokens": 2288000}
1207
+ {"current_steps": 6010, "total_steps": 22320, "loss": 0.5565, "lr": 0.0009152780918808372, "epoch": 5.385304659498208, "percentage": 26.93, "elapsed_time": "0:19:01", "remaining_time": "0:51:38", "throughput": 2005.77, "total_tokens": 2290016}
1208
+ {"current_steps": 6015, "total_steps": 22320, "loss": 0.4386, "lr": 0.0009150602152356394, "epoch": 5.389784946236559, "percentage": 26.95, "elapsed_time": "0:19:02", "remaining_time": "0:51:37", "throughput": 2005.85, "total_tokens": 2291936}
1209
+ {"current_steps": 6020, "total_steps": 22320, "loss": 0.4475, "lr": 0.0009148420847987272, "epoch": 5.39426523297491, "percentage": 26.97, "elapsed_time": "0:19:03", "remaining_time": "0:51:36", "throughput": 2005.89, "total_tokens": 2293792}
1210
+ {"current_steps": 6025, "total_steps": 22320, "loss": 0.4607, "lr": 0.0009146237007034781, "epoch": 5.398745519713262, "percentage": 26.99, "elapsed_time": "0:19:04", "remaining_time": "0:51:35", "throughput": 2005.99, "total_tokens": 2295808}
1211
+ {"current_steps": 6030, "total_steps": 22320, "loss": 0.5872, "lr": 0.0009144050630834248, "epoch": 5.403225806451613, "percentage": 27.02, "elapsed_time": "0:19:05", "remaining_time": "0:51:34", "throughput": 2006.08, "total_tokens": 2297792}
1212
+ {"current_steps": 6035, "total_steps": 22320, "loss": 0.7203, "lr": 0.0009141861720722549, "epoch": 5.407706093189964, "percentage": 27.04, "elapsed_time": "0:19:06", "remaining_time": "0:51:33", "throughput": 2006.19, "total_tokens": 2299744}
1213
+ {"current_steps": 6040, "total_steps": 22320, "loss": 0.4725, "lr": 0.0009139670278038108, "epoch": 5.412186379928316, "percentage": 27.06, "elapsed_time": "0:19:07", "remaining_time": "0:51:32", "throughput": 2006.25, "total_tokens": 2301632}
1214
+ {"current_steps": 6045, "total_steps": 22320, "loss": 0.478, "lr": 0.00091374763041209, "epoch": 5.416666666666667, "percentage": 27.08, "elapsed_time": "0:19:08", "remaining_time": "0:51:31", "throughput": 2006.34, "total_tokens": 2303616}
1215
+ {"current_steps": 6050, "total_steps": 22320, "loss": 0.4537, "lr": 0.0009135279800312449, "epoch": 5.421146953405018, "percentage": 27.11, "elapsed_time": "0:19:09", "remaining_time": "0:51:29", "throughput": 2006.42, "total_tokens": 2305408}
1216
+ {"current_steps": 6055, "total_steps": 22320, "loss": 0.5724, "lr": 0.000913308076795582, "epoch": 5.425627240143369, "percentage": 27.13, "elapsed_time": "0:19:09", "remaining_time": "0:51:29", "throughput": 2006.53, "total_tokens": 2307424}
1217
+ {"current_steps": 6060, "total_steps": 22320, "loss": 0.84, "lr": 0.0009130879208395632, "epoch": 5.43010752688172, "percentage": 27.15, "elapsed_time": "0:19:10", "remaining_time": "0:51:27", "throughput": 2006.71, "total_tokens": 2309472}
1218
+ {"current_steps": 6065, "total_steps": 22320, "loss": 0.5557, "lr": 0.0009128675122978043, "epoch": 5.434587813620071, "percentage": 27.17, "elapsed_time": "0:19:11", "remaining_time": "0:51:26", "throughput": 2006.83, "total_tokens": 2311456}
1219
+ {"current_steps": 6070, "total_steps": 22320, "loss": 0.7179, "lr": 0.0009126468513050758, "epoch": 5.439068100358423, "percentage": 27.2, "elapsed_time": "0:19:12", "remaining_time": "0:51:25", "throughput": 2006.87, "total_tokens": 2313312}
1220
+ {"current_steps": 6075, "total_steps": 22320, "loss": 0.5903, "lr": 0.0009124259379963027, "epoch": 5.443548387096774, "percentage": 27.22, "elapsed_time": "0:19:13", "remaining_time": "0:51:24", "throughput": 2006.96, "total_tokens": 2315232}
1221
+ {"current_steps": 6080, "total_steps": 22320, "loss": 0.5944, "lr": 0.0009122047725065638, "epoch": 5.448028673835125, "percentage": 27.24, "elapsed_time": "0:19:14", "remaining_time": "0:51:23", "throughput": 2007.04, "total_tokens": 2317088}
1222
+ {"current_steps": 6085, "total_steps": 22320, "loss": 0.5907, "lr": 0.0009119833549710928, "epoch": 5.452508960573477, "percentage": 27.26, "elapsed_time": "0:19:15", "remaining_time": "0:51:22", "throughput": 2007.12, "total_tokens": 2318880}
1223
+ {"current_steps": 6090, "total_steps": 22320, "loss": 0.6034, "lr": 0.0009117616855252768, "epoch": 5.456989247311828, "percentage": 27.28, "elapsed_time": "0:19:16", "remaining_time": "0:51:21", "throughput": 2007.16, "total_tokens": 2320672}
1224
+ {"current_steps": 6095, "total_steps": 22320, "loss": 0.5585, "lr": 0.0009115397643046575, "epoch": 5.461469534050179, "percentage": 27.31, "elapsed_time": "0:19:17", "remaining_time": "0:51:20", "throughput": 2007.14, "total_tokens": 2322400}
1225
+ {"current_steps": 6100, "total_steps": 22320, "loss": 0.4896, "lr": 0.0009113175914449301, "epoch": 5.465949820788531, "percentage": 27.33, "elapsed_time": "0:19:18", "remaining_time": "0:51:19", "throughput": 2007.37, "total_tokens": 2324544}
1226
+ {"current_steps": 6105, "total_steps": 22320, "loss": 0.522, "lr": 0.0009110951670819441, "epoch": 5.470430107526882, "percentage": 27.35, "elapsed_time": "0:19:18", "remaining_time": "0:51:18", "throughput": 2007.39, "total_tokens": 2326432}
1227
+ {"current_steps": 6110, "total_steps": 22320, "loss": 0.646, "lr": 0.0009108724913517022, "epoch": 5.474910394265233, "percentage": 27.37, "elapsed_time": "0:19:19", "remaining_time": "0:51:17", "throughput": 2007.55, "total_tokens": 2328544}
1228
+ {"current_steps": 6115, "total_steps": 22320, "loss": 0.6986, "lr": 0.0009106495643903616, "epoch": 5.479390681003585, "percentage": 27.4, "elapsed_time": "0:19:20", "remaining_time": "0:51:16", "throughput": 2007.64, "total_tokens": 2330464}
1229
+ {"current_steps": 6120, "total_steps": 22320, "loss": 0.5425, "lr": 0.0009104263863342326, "epoch": 5.483870967741936, "percentage": 27.42, "elapsed_time": "0:19:21", "remaining_time": "0:51:15", "throughput": 2007.85, "total_tokens": 2332576}
1230
+ {"current_steps": 6125, "total_steps": 22320, "loss": 0.5344, "lr": 0.0009102029573197787, "epoch": 5.488351254480286, "percentage": 27.44, "elapsed_time": "0:19:22", "remaining_time": "0:51:14", "throughput": 2007.96, "total_tokens": 2334592}
1231
+ {"current_steps": 6130, "total_steps": 22320, "loss": 0.6401, "lr": 0.0009099792774836178, "epoch": 5.492831541218638, "percentage": 27.46, "elapsed_time": "0:19:23", "remaining_time": "0:51:13", "throughput": 2008.13, "total_tokens": 2336640}
1232
+ {"current_steps": 6135, "total_steps": 22320, "loss": 0.6721, "lr": 0.0009097553469625204, "epoch": 5.497311827956989, "percentage": 27.49, "elapsed_time": "0:19:24", "remaining_time": "0:51:12", "throughput": 2008.24, "total_tokens": 2338528}
1233
+ {"current_steps": 6140, "total_steps": 22320, "loss": 0.6099, "lr": 0.0009095311658934104, "epoch": 5.50179211469534, "percentage": 27.51, "elapsed_time": "0:19:25", "remaining_time": "0:51:10", "throughput": 2008.3, "total_tokens": 2340416}
1234
+ {"current_steps": 6145, "total_steps": 22320, "loss": 0.5172, "lr": 0.0009093067344133652, "epoch": 5.506272401433692, "percentage": 27.53, "elapsed_time": "0:19:26", "remaining_time": "0:51:09", "throughput": 2008.37, "total_tokens": 2342240}
1235
+ {"current_steps": 6150, "total_steps": 22320, "loss": 0.6191, "lr": 0.000909082052659615, "epoch": 5.510752688172043, "percentage": 27.55, "elapsed_time": "0:19:27", "remaining_time": "0:51:08", "throughput": 2008.5, "total_tokens": 2344096}
1236
+ {"current_steps": 6155, "total_steps": 22320, "loss": 0.4529, "lr": 0.0009088571207695433, "epoch": 5.515232974910394, "percentage": 27.58, "elapsed_time": "0:19:28", "remaining_time": "0:51:07", "throughput": 2008.52, "total_tokens": 2345984}
1237
+ {"current_steps": 6160, "total_steps": 22320, "loss": 0.5742, "lr": 0.0009086319388806863, "epoch": 5.519713261648746, "percentage": 27.6, "elapsed_time": "0:19:28", "remaining_time": "0:51:06", "throughput": 2008.6, "total_tokens": 2347904}
1238
+ {"current_steps": 6165, "total_steps": 22320, "loss": 0.4964, "lr": 0.0009084065071307333, "epoch": 5.524193548387097, "percentage": 27.62, "elapsed_time": "0:19:29", "remaining_time": "0:51:05", "throughput": 2008.64, "total_tokens": 2349824}
1239
+ {"current_steps": 6170, "total_steps": 22320, "loss": 0.4898, "lr": 0.0009081808256575259, "epoch": 5.528673835125448, "percentage": 27.64, "elapsed_time": "0:19:30", "remaining_time": "0:51:04", "throughput": 2008.77, "total_tokens": 2351840}
1240
+ {"current_steps": 6175, "total_steps": 22320, "loss": 0.5547, "lr": 0.0009079548945990592, "epoch": 5.5331541218638, "percentage": 27.67, "elapsed_time": "0:19:31", "remaining_time": "0:51:03", "throughput": 2008.9, "total_tokens": 2353824}
1241
+ {"current_steps": 6180, "total_steps": 22320, "loss": 0.6459, "lr": 0.0009077287140934802, "epoch": 5.53763440860215, "percentage": 27.69, "elapsed_time": "0:19:32", "remaining_time": "0:51:02", "throughput": 2008.91, "total_tokens": 2355776}
1242
+ {"current_steps": 6185, "total_steps": 22320, "loss": 0.6945, "lr": 0.0009075022842790888, "epoch": 5.542114695340501, "percentage": 27.71, "elapsed_time": "0:19:33", "remaining_time": "0:51:01", "throughput": 2008.95, "total_tokens": 2357632}
1243
+ {"current_steps": 6190, "total_steps": 22320, "loss": 0.6876, "lr": 0.000907275605294337, "epoch": 5.546594982078853, "percentage": 27.73, "elapsed_time": "0:19:34", "remaining_time": "0:51:00", "throughput": 2008.96, "total_tokens": 2359392}
1244
+ {"current_steps": 6195, "total_steps": 22320, "loss": 0.6219, "lr": 0.0009070486772778297, "epoch": 5.551075268817204, "percentage": 27.76, "elapsed_time": "0:19:35", "remaining_time": "0:50:59", "throughput": 2009.06, "total_tokens": 2361216}
1245
+ {"current_steps": 6200, "total_steps": 22320, "loss": 0.6179, "lr": 0.0009068215003683235, "epoch": 5.555555555555555, "percentage": 27.78, "elapsed_time": "0:19:36", "remaining_time": "0:50:58", "throughput": 2009.22, "total_tokens": 2363328}
1246
+ {"current_steps": 6205, "total_steps": 22320, "loss": 0.5654, "lr": 0.0009065940747047277, "epoch": 5.560035842293907, "percentage": 27.8, "elapsed_time": "0:19:37", "remaining_time": "0:50:56", "throughput": 2009.25, "total_tokens": 2365056}
1247
+ {"current_steps": 6210, "total_steps": 22320, "loss": 0.5172, "lr": 0.0009063664004261032, "epoch": 5.564516129032258, "percentage": 27.82, "elapsed_time": "0:19:38", "remaining_time": "0:50:56", "throughput": 2009.36, "total_tokens": 2367072}
1248
+ {"current_steps": 6215, "total_steps": 22320, "loss": 0.5537, "lr": 0.0009061384776716632, "epoch": 5.568996415770609, "percentage": 27.84, "elapsed_time": "0:19:38", "remaining_time": "0:50:55", "throughput": 2009.35, "total_tokens": 2368992}
1249
+ {"current_steps": 6220, "total_steps": 22320, "loss": 0.5442, "lr": 0.000905910306580773, "epoch": 5.573476702508961, "percentage": 27.87, "elapsed_time": "0:19:39", "remaining_time": "0:50:54", "throughput": 2009.53, "total_tokens": 2371104}
1250
+ {"current_steps": 6225, "total_steps": 22320, "loss": 0.498, "lr": 0.0009056818872929493, "epoch": 5.577956989247312, "percentage": 27.89, "elapsed_time": "0:19:40", "remaining_time": "0:50:53", "throughput": 2009.59, "total_tokens": 2372928}
1251
+ {"current_steps": 6230, "total_steps": 22320, "loss": 0.4947, "lr": 0.0009054532199478609, "epoch": 5.582437275985663, "percentage": 27.91, "elapsed_time": "0:19:41", "remaining_time": "0:50:52", "throughput": 2009.65, "total_tokens": 2374944}
1252
+ {"current_steps": 6235, "total_steps": 22320, "loss": 0.4915, "lr": 0.0009052243046853283, "epoch": 5.586917562724015, "percentage": 27.93, "elapsed_time": "0:19:42", "remaining_time": "0:50:51", "throughput": 2009.66, "total_tokens": 2376832}
1253
+ {"current_steps": 6240, "total_steps": 22320, "loss": 0.4175, "lr": 0.0009049951416453233, "epoch": 5.591397849462366, "percentage": 27.96, "elapsed_time": "0:19:43", "remaining_time": "0:50:50", "throughput": 2009.77, "total_tokens": 2378848}
1254
+ {"current_steps": 6245, "total_steps": 22320, "loss": 0.477, "lr": 0.0009047657309679693, "epoch": 5.595878136200717, "percentage": 27.98, "elapsed_time": "0:19:44", "remaining_time": "0:50:49", "throughput": 2009.78, "total_tokens": 2380608}
1255
+ {"current_steps": 6250, "total_steps": 22320, "loss": 0.7753, "lr": 0.0009045360727935414, "epoch": 5.600358422939068, "percentage": 28.0, "elapsed_time": "0:19:45", "remaining_time": "0:50:47", "throughput": 2009.88, "total_tokens": 2382496}
1256
+ {"current_steps": 6255, "total_steps": 22320, "loss": 0.5576, "lr": 0.0009043061672624659, "epoch": 5.604838709677419, "percentage": 28.02, "elapsed_time": "0:19:46", "remaining_time": "0:50:46", "throughput": 2009.91, "total_tokens": 2384288}
1257
+ {"current_steps": 6260, "total_steps": 22320, "loss": 0.5977, "lr": 0.0009040760145153199, "epoch": 5.60931899641577, "percentage": 28.05, "elapsed_time": "0:19:47", "remaining_time": "0:50:45", "throughput": 2010.02, "total_tokens": 2386112}
1258
+ {"current_steps": 6265, "total_steps": 22320, "loss": 0.6446, "lr": 0.0009038456146928325, "epoch": 5.613799283154122, "percentage": 28.07, "elapsed_time": "0:19:47", "remaining_time": "0:50:44", "throughput": 2010.1, "total_tokens": 2387968}
1259
+ {"current_steps": 6270, "total_steps": 22320, "loss": 0.5726, "lr": 0.0009036149679358832, "epoch": 5.618279569892473, "percentage": 28.09, "elapsed_time": "0:19:48", "remaining_time": "0:50:43", "throughput": 2010.13, "total_tokens": 2389888}
1260
+ {"current_steps": 6275, "total_steps": 22320, "loss": 0.672, "lr": 0.0009033840743855027, "epoch": 5.622759856630824, "percentage": 28.11, "elapsed_time": "0:19:49", "remaining_time": "0:50:42", "throughput": 2010.26, "total_tokens": 2391808}
1261
+ {"current_steps": 6280, "total_steps": 22320, "loss": 0.6333, "lr": 0.0009031529341828724, "epoch": 5.627240143369176, "percentage": 28.14, "elapsed_time": "0:19:50", "remaining_time": "0:50:41", "throughput": 2010.46, "total_tokens": 2393888}
1262
+ {"current_steps": 6285, "total_steps": 22320, "loss": 0.576, "lr": 0.0009029215474693251, "epoch": 5.631720430107527, "percentage": 28.16, "elapsed_time": "0:19:51", "remaining_time": "0:50:40", "throughput": 2010.61, "total_tokens": 2395840}
1263
+ {"current_steps": 6290, "total_steps": 22320, "loss": 0.4852, "lr": 0.0009026899143863438, "epoch": 5.636200716845878, "percentage": 28.18, "elapsed_time": "0:19:52", "remaining_time": "0:50:39", "throughput": 2010.71, "total_tokens": 2397856}
1264
+ {"current_steps": 6295, "total_steps": 22320, "loss": 0.6768, "lr": 0.0009024580350755623, "epoch": 5.64068100358423, "percentage": 28.2, "elapsed_time": "0:19:53", "remaining_time": "0:50:38", "throughput": 2010.77, "total_tokens": 2399744}
1265
+ {"current_steps": 6300, "total_steps": 22320, "loss": 0.6471, "lr": 0.000902225909678765, "epoch": 5.645161290322581, "percentage": 28.23, "elapsed_time": "0:19:54", "remaining_time": "0:50:36", "throughput": 2010.81, "total_tokens": 2401536}
1266
+ {"current_steps": 6305, "total_steps": 22320, "loss": 0.5734, "lr": 0.0009019935383378868, "epoch": 5.649641577060932, "percentage": 28.25, "elapsed_time": "0:19:55", "remaining_time": "0:50:36", "throughput": 2010.89, "total_tokens": 2403520}
1267
+ {"current_steps": 6310, "total_steps": 22320, "loss": 0.461, "lr": 0.0009017609211950127, "epoch": 5.654121863799283, "percentage": 28.27, "elapsed_time": "0:19:56", "remaining_time": "0:50:34", "throughput": 2010.99, "total_tokens": 2405408}
1268
+ {"current_steps": 6315, "total_steps": 22320, "loss": 0.4994, "lr": 0.0009015280583923784, "epoch": 5.658602150537634, "percentage": 28.29, "elapsed_time": "0:19:56", "remaining_time": "0:50:33", "throughput": 2011.09, "total_tokens": 2407232}
1269
+ {"current_steps": 6320, "total_steps": 22320, "loss": 0.576, "lr": 0.0009012949500723695, "epoch": 5.663082437275985, "percentage": 28.32, "elapsed_time": "0:19:57", "remaining_time": "0:50:32", "throughput": 2011.17, "total_tokens": 2409152}
1270
+ {"current_steps": 6325, "total_steps": 22320, "loss": 0.6925, "lr": 0.0009010615963775219, "epoch": 5.667562724014337, "percentage": 28.34, "elapsed_time": "0:19:58", "remaining_time": "0:50:31", "throughput": 2011.25, "total_tokens": 2411072}
1271
+ {"current_steps": 6330, "total_steps": 22320, "loss": 0.4843, "lr": 0.0009008279974505216, "epoch": 5.672043010752688, "percentage": 28.36, "elapsed_time": "0:19:59", "remaining_time": "0:50:30", "throughput": 2011.33, "total_tokens": 2413056}
1272
+ {"current_steps": 6335, "total_steps": 22320, "loss": 0.5932, "lr": 0.0009005941534342043, "epoch": 5.676523297491039, "percentage": 28.38, "elapsed_time": "0:20:00", "remaining_time": "0:50:29", "throughput": 2011.41, "total_tokens": 2414976}
1273
+ {"current_steps": 6340, "total_steps": 22320, "loss": 0.5693, "lr": 0.0009003600644715557, "epoch": 5.681003584229391, "percentage": 28.41, "elapsed_time": "0:20:01", "remaining_time": "0:50:28", "throughput": 2011.46, "total_tokens": 2416864}
1274
+ {"current_steps": 6345, "total_steps": 22320, "loss": 0.4313, "lr": 0.0009001257307057113, "epoch": 5.685483870967742, "percentage": 28.43, "elapsed_time": "0:20:02", "remaining_time": "0:50:27", "throughput": 2011.56, "total_tokens": 2418688}
1275
+ {"current_steps": 6350, "total_steps": 22320, "loss": 0.4751, "lr": 0.0008998911522799562, "epoch": 5.689964157706093, "percentage": 28.45, "elapsed_time": "0:20:03", "remaining_time": "0:50:26", "throughput": 2011.61, "total_tokens": 2420512}
1276
+ {"current_steps": 6355, "total_steps": 22320, "loss": 0.6511, "lr": 0.0008996563293377254, "epoch": 5.694444444444445, "percentage": 28.47, "elapsed_time": "0:20:04", "remaining_time": "0:50:25", "throughput": 2011.67, "total_tokens": 2422400}
1277
+ {"current_steps": 6360, "total_steps": 22320, "loss": 0.6145, "lr": 0.0008994212620226028, "epoch": 5.698924731182796, "percentage": 28.49, "elapsed_time": "0:20:05", "remaining_time": "0:50:24", "throughput": 2011.74, "total_tokens": 2424384}
1278
+ {"current_steps": 6365, "total_steps": 22320, "loss": 0.5589, "lr": 0.0008991859504783224, "epoch": 5.703405017921147, "percentage": 28.52, "elapsed_time": "0:20:06", "remaining_time": "0:50:23", "throughput": 2011.8, "total_tokens": 2426336}
1279
+ {"current_steps": 6370, "total_steps": 22320, "loss": 0.6855, "lr": 0.000898950394848767, "epoch": 5.707885304659499, "percentage": 28.54, "elapsed_time": "0:20:06", "remaining_time": "0:50:22", "throughput": 2011.91, "total_tokens": 2428288}
1280
+ {"current_steps": 6375, "total_steps": 22320, "loss": 0.6265, "lr": 0.0008987145952779691, "epoch": 5.71236559139785, "percentage": 28.56, "elapsed_time": "0:20:07", "remaining_time": "0:50:21", "throughput": 2012.08, "total_tokens": 2430368}
1281
+ {"current_steps": 6380, "total_steps": 22320, "loss": 0.5077, "lr": 0.0008984785519101099, "epoch": 5.7168458781362, "percentage": 28.58, "elapsed_time": "0:20:08", "remaining_time": "0:50:20", "throughput": 2012.16, "total_tokens": 2432288}
1282
+ {"current_steps": 6385, "total_steps": 22320, "loss": 0.6051, "lr": 0.00089824226488952, "epoch": 5.721326164874552, "percentage": 28.61, "elapsed_time": "0:20:09", "remaining_time": "0:50:18", "throughput": 2012.24, "total_tokens": 2434144}
1283
+ {"current_steps": 6390, "total_steps": 22320, "loss": 0.6002, "lr": 0.0008980057343606789, "epoch": 5.725806451612903, "percentage": 28.63, "elapsed_time": "0:20:10", "remaining_time": "0:50:17", "throughput": 2012.29, "total_tokens": 2436032}
1284
+ {"current_steps": 6395, "total_steps": 22320, "loss": 0.5968, "lr": 0.0008977689604682151, "epoch": 5.730286738351254, "percentage": 28.65, "elapsed_time": "0:20:11", "remaining_time": "0:50:16", "throughput": 2012.35, "total_tokens": 2437856}
1285
+ {"current_steps": 6400, "total_steps": 22320, "loss": 0.5882, "lr": 0.0008975319433569055, "epoch": 5.734767025089606, "percentage": 28.67, "elapsed_time": "0:20:12", "remaining_time": "0:50:15", "throughput": 2012.38, "total_tokens": 2439776}
1286
+ {"current_steps": 6405, "total_steps": 22320, "loss": 0.6293, "lr": 0.0008972946831716764, "epoch": 5.739247311827957, "percentage": 28.7, "elapsed_time": "0:20:13", "remaining_time": "0:50:14", "throughput": 2012.51, "total_tokens": 2441760}
1287
+ {"current_steps": 6410, "total_steps": 22320, "loss": 0.5523, "lr": 0.0008970571800576022, "epoch": 5.743727598566308, "percentage": 28.72, "elapsed_time": "0:20:14", "remaining_time": "0:50:13", "throughput": 2012.66, "total_tokens": 2443776}
1288
+ {"current_steps": 6415, "total_steps": 22320, "loss": 0.6456, "lr": 0.0008968194341599056, "epoch": 5.74820788530466, "percentage": 28.74, "elapsed_time": "0:20:15", "remaining_time": "0:50:12", "throughput": 2012.69, "total_tokens": 2445632}
1289
+ {"current_steps": 6420, "total_steps": 22320, "loss": 0.5742, "lr": 0.0008965814456239588, "epoch": 5.752688172043011, "percentage": 28.76, "elapsed_time": "0:20:16", "remaining_time": "0:50:11", "throughput": 2012.7, "total_tokens": 2447456}
1290
+ {"current_steps": 6425, "total_steps": 22320, "loss": 0.6042, "lr": 0.000896343214595281, "epoch": 5.757168458781362, "percentage": 28.79, "elapsed_time": "0:20:16", "remaining_time": "0:50:10", "throughput": 2012.8, "total_tokens": 2449472}
1291
+ {"current_steps": 6430, "total_steps": 22320, "loss": 0.5586, "lr": 0.0008961047412195409, "epoch": 5.761648745519714, "percentage": 28.81, "elapsed_time": "0:20:17", "remaining_time": "0:50:09", "throughput": 2012.85, "total_tokens": 2451232}
1292
+ {"current_steps": 6435, "total_steps": 22320, "loss": 0.5513, "lr": 0.0008958660256425546, "epoch": 5.766129032258064, "percentage": 28.83, "elapsed_time": "0:20:18", "remaining_time": "0:50:08", "throughput": 2013.0, "total_tokens": 2453312}
1293
+ {"current_steps": 6440, "total_steps": 22320, "loss": 0.5017, "lr": 0.0008956270680102866, "epoch": 5.770609318996415, "percentage": 28.85, "elapsed_time": "0:20:19", "remaining_time": "0:50:07", "throughput": 2013.06, "total_tokens": 2455328}
1294
+ {"current_steps": 6445, "total_steps": 22320, "loss": 0.5017, "lr": 0.0008953878684688492, "epoch": 5.775089605734767, "percentage": 28.88, "elapsed_time": "0:20:20", "remaining_time": "0:50:06", "throughput": 2013.09, "total_tokens": 2457120}
1295
+ {"current_steps": 6450, "total_steps": 22320, "loss": 0.5033, "lr": 0.0008951484271645032, "epoch": 5.779569892473118, "percentage": 28.9, "elapsed_time": "0:20:21", "remaining_time": "0:50:05", "throughput": 2013.17, "total_tokens": 2458912}
1296
+ {"current_steps": 6455, "total_steps": 22320, "loss": 0.5604, "lr": 0.0008949087442436564, "epoch": 5.784050179211469, "percentage": 28.92, "elapsed_time": "0:20:22", "remaining_time": "0:50:04", "throughput": 2013.29, "total_tokens": 2460896}
1297
+ {"current_steps": 6460, "total_steps": 22320, "loss": 0.5533, "lr": 0.000894668819852865, "epoch": 5.788530465949821, "percentage": 28.94, "elapsed_time": "0:20:23", "remaining_time": "0:50:03", "throughput": 2013.29, "total_tokens": 2462592}
1298
+ {"current_steps": 6465, "total_steps": 22320, "loss": 0.5559, "lr": 0.0008944286541388322, "epoch": 5.793010752688172, "percentage": 28.97, "elapsed_time": "0:20:24", "remaining_time": "0:50:01", "throughput": 2013.32, "total_tokens": 2464384}
1299
+ {"current_steps": 6470, "total_steps": 22320, "loss": 0.5613, "lr": 0.0008941882472484097, "epoch": 5.797491039426523, "percentage": 28.99, "elapsed_time": "0:20:24", "remaining_time": "0:50:00", "throughput": 2013.38, "total_tokens": 2466272}
1300
+ {"current_steps": 6475, "total_steps": 22320, "loss": 0.5956, "lr": 0.0008939475993285956, "epoch": 5.801971326164875, "percentage": 29.01, "elapsed_time": "0:20:25", "remaining_time": "0:49:59", "throughput": 2013.45, "total_tokens": 2468192}
1301
+ {"current_steps": 6480, "total_steps": 22320, "loss": 0.565, "lr": 0.0008937067105265362, "epoch": 5.806451612903226, "percentage": 29.03, "elapsed_time": "0:20:26", "remaining_time": "0:49:58", "throughput": 2013.52, "total_tokens": 2469984}
1302
+ {"current_steps": 6485, "total_steps": 22320, "loss": 0.4049, "lr": 0.0008934655809895247, "epoch": 5.810931899641577, "percentage": 29.05, "elapsed_time": "0:20:27", "remaining_time": "0:49:57", "throughput": 2013.6, "total_tokens": 2471776}
1303
+ {"current_steps": 6490, "total_steps": 22320, "loss": 0.6723, "lr": 0.0008932242108650015, "epoch": 5.815412186379929, "percentage": 29.08, "elapsed_time": "0:20:28", "remaining_time": "0:49:56", "throughput": 2013.65, "total_tokens": 2473600}
1304
+ {"current_steps": 6495, "total_steps": 22320, "loss": 0.5624, "lr": 0.0008929826003005543, "epoch": 5.81989247311828, "percentage": 29.1, "elapsed_time": "0:20:29", "remaining_time": "0:49:55", "throughput": 2013.68, "total_tokens": 2475392}
1305
+ {"current_steps": 6500, "total_steps": 22320, "loss": 0.5655, "lr": 0.0008927407494439178, "epoch": 5.824372759856631, "percentage": 29.12, "elapsed_time": "0:20:30", "remaining_time": "0:49:54", "throughput": 2013.73, "total_tokens": 2477216}
1306
+ {"current_steps": 6505, "total_steps": 22320, "loss": 0.5136, "lr": 0.0008924986584429732, "epoch": 5.828853046594982, "percentage": 29.14, "elapsed_time": "0:20:31", "remaining_time": "0:49:53", "throughput": 2013.72, "total_tokens": 2479072}
1307
+ {"current_steps": 6510, "total_steps": 22320, "loss": 0.4831, "lr": 0.0008922563274457494, "epoch": 5.833333333333333, "percentage": 29.17, "elapsed_time": "0:20:31", "remaining_time": "0:49:51", "throughput": 2013.79, "total_tokens": 2480992}
1308
+ {"current_steps": 6515, "total_steps": 22320, "loss": 0.6075, "lr": 0.0008920137566004211, "epoch": 5.837813620071684, "percentage": 29.19, "elapsed_time": "0:20:32", "remaining_time": "0:49:50", "throughput": 2013.89, "total_tokens": 2482976}
1309
+ {"current_steps": 6520, "total_steps": 22320, "loss": 0.5557, "lr": 0.0008917709460553101, "epoch": 5.842293906810036, "percentage": 29.21, "elapsed_time": "0:20:33", "remaining_time": "0:49:49", "throughput": 2013.97, "total_tokens": 2484832}
1310
+ {"current_steps": 6525, "total_steps": 22320, "loss": 0.475, "lr": 0.0008915278959588849, "epoch": 5.846774193548387, "percentage": 29.23, "elapsed_time": "0:20:34", "remaining_time": "0:49:48", "throughput": 2014.02, "total_tokens": 2486720}
1311
+ {"current_steps": 6530, "total_steps": 22320, "loss": 0.4882, "lr": 0.0008912846064597603, "epoch": 5.851254480286738, "percentage": 29.26, "elapsed_time": "0:20:35", "remaining_time": "0:49:47", "throughput": 2014.05, "total_tokens": 2488448}
1312
+ {"current_steps": 6535, "total_steps": 22320, "loss": 0.5821, "lr": 0.0008910410777066975, "epoch": 5.85573476702509, "percentage": 29.28, "elapsed_time": "0:20:36", "remaining_time": "0:49:46", "throughput": 2014.08, "total_tokens": 2490240}
1313
+ {"current_steps": 6540, "total_steps": 22320, "loss": 0.6207, "lr": 0.0008907973098486039, "epoch": 5.860215053763441, "percentage": 29.3, "elapsed_time": "0:20:37", "remaining_time": "0:49:45", "throughput": 2014.2, "total_tokens": 2492160}
1314
+ {"current_steps": 6545, "total_steps": 22320, "loss": 0.508, "lr": 0.0008905533030345335, "epoch": 5.864695340501792, "percentage": 29.32, "elapsed_time": "0:20:38", "remaining_time": "0:49:44", "throughput": 2014.35, "total_tokens": 2494336}
1315
+ {"current_steps": 6550, "total_steps": 22320, "loss": 0.5796, "lr": 0.0008903090574136858, "epoch": 5.869175627240144, "percentage": 29.35, "elapsed_time": "0:20:39", "remaining_time": "0:49:43", "throughput": 2014.4, "total_tokens": 2496224}
1316
+ {"current_steps": 6555, "total_steps": 22320, "loss": 0.6666, "lr": 0.0008900645731354066, "epoch": 5.873655913978495, "percentage": 29.37, "elapsed_time": "0:20:40", "remaining_time": "0:49:42", "throughput": 2014.47, "total_tokens": 2498144}
1317
+ {"current_steps": 6560, "total_steps": 22320, "loss": 0.6724, "lr": 0.0008898198503491881, "epoch": 5.878136200716845, "percentage": 29.39, "elapsed_time": "0:20:41", "remaining_time": "0:49:41", "throughput": 2014.5, "total_tokens": 2500000}
1318
+ {"current_steps": 6565, "total_steps": 22320, "loss": 0.5256, "lr": 0.0008895748892046674, "epoch": 5.882616487455197, "percentage": 29.41, "elapsed_time": "0:20:42", "remaining_time": "0:49:40", "throughput": 2014.67, "total_tokens": 2502240}
1319
+ {"current_steps": 6570, "total_steps": 22320, "loss": 0.6395, "lr": 0.0008893296898516281, "epoch": 5.887096774193548, "percentage": 29.44, "elapsed_time": "0:20:42", "remaining_time": "0:49:39", "throughput": 2014.71, "total_tokens": 2504096}
1320
+ {"current_steps": 6575, "total_steps": 22320, "loss": 0.5529, "lr": 0.0008890842524399992, "epoch": 5.891577060931899, "percentage": 29.46, "elapsed_time": "0:20:43", "remaining_time": "0:49:38", "throughput": 2014.74, "total_tokens": 2505952}
1321
+ {"current_steps": 6580, "total_steps": 22320, "loss": 0.5311, "lr": 0.0008888385771198552, "epoch": 5.896057347670251, "percentage": 29.48, "elapsed_time": "0:20:44", "remaining_time": "0:49:37", "throughput": 2014.81, "total_tokens": 2507808}
1322
+ {"current_steps": 6585, "total_steps": 22320, "loss": 0.5576, "lr": 0.0008885926640414162, "epoch": 5.900537634408602, "percentage": 29.5, "elapsed_time": "0:20:45", "remaining_time": "0:49:36", "throughput": 2014.81, "total_tokens": 2509632}
1323
+ {"current_steps": 6590, "total_steps": 22320, "loss": 0.639, "lr": 0.0008883465133550475, "epoch": 5.905017921146953, "percentage": 29.53, "elapsed_time": "0:20:46", "remaining_time": "0:49:35", "throughput": 2014.89, "total_tokens": 2511552}
1324
+ {"current_steps": 6595, "total_steps": 22320, "loss": 0.5362, "lr": 0.0008881001252112599, "epoch": 5.909498207885305, "percentage": 29.55, "elapsed_time": "0:20:47", "remaining_time": "0:49:34", "throughput": 2014.94, "total_tokens": 2513504}
1325
+ {"current_steps": 6600, "total_steps": 22320, "loss": 0.5491, "lr": 0.0008878534997607093, "epoch": 5.913978494623656, "percentage": 29.57, "elapsed_time": "0:20:48", "remaining_time": "0:49:33", "throughput": 2015.08, "total_tokens": 2515520}
1326
+ {"current_steps": 6605, "total_steps": 22320, "loss": 0.5466, "lr": 0.0008876066371541968, "epoch": 5.918458781362007, "percentage": 29.59, "elapsed_time": "0:20:49", "remaining_time": "0:49:32", "throughput": 2015.2, "total_tokens": 2517504}
1327
+ {"current_steps": 6610, "total_steps": 22320, "loss": 0.6619, "lr": 0.0008873595375426681, "epoch": 5.922939068100359, "percentage": 29.61, "elapsed_time": "0:20:50", "remaining_time": "0:49:31", "throughput": 2015.28, "total_tokens": 2519424}
1328
+ {"current_steps": 6615, "total_steps": 22320, "loss": 0.532, "lr": 0.0008871122010772146, "epoch": 5.92741935483871, "percentage": 29.64, "elapsed_time": "0:20:51", "remaining_time": "0:49:30", "throughput": 2015.35, "total_tokens": 2521280}
1329
+ {"current_steps": 6620, "total_steps": 22320, "loss": 0.4322, "lr": 0.0008868646279090715, "epoch": 5.931899641577061, "percentage": 29.66, "elapsed_time": "0:20:51", "remaining_time": "0:49:29", "throughput": 2015.4, "total_tokens": 2523232}
1330
+ {"current_steps": 6625, "total_steps": 22320, "loss": 0.6362, "lr": 0.0008866168181896197, "epoch": 5.936379928315413, "percentage": 29.68, "elapsed_time": "0:20:52", "remaining_time": "0:49:28", "throughput": 2015.46, "total_tokens": 2525120}
1331
+ {"current_steps": 6630, "total_steps": 22320, "loss": 0.516, "lr": 0.0008863687720703841, "epoch": 5.940860215053764, "percentage": 29.7, "elapsed_time": "0:20:53", "remaining_time": "0:49:27", "throughput": 2015.47, "total_tokens": 2527008}
1332
+ {"current_steps": 6635, "total_steps": 22320, "loss": 0.5612, "lr": 0.0008861204897030346, "epoch": 5.945340501792114, "percentage": 29.73, "elapsed_time": "0:20:54", "remaining_time": "0:49:26", "throughput": 2015.5, "total_tokens": 2528864}
1333
+ {"current_steps": 6640, "total_steps": 22320, "loss": 0.5025, "lr": 0.0008858719712393851, "epoch": 5.949820788530466, "percentage": 29.75, "elapsed_time": "0:20:55", "remaining_time": "0:49:25", "throughput": 2015.57, "total_tokens": 2530912}
1334
+ {"current_steps": 6645, "total_steps": 22320, "loss": 0.4546, "lr": 0.0008856232168313943, "epoch": 5.954301075268817, "percentage": 29.77, "elapsed_time": "0:20:56", "remaining_time": "0:49:24", "throughput": 2015.62, "total_tokens": 2532800}
1335
+ {"current_steps": 6650, "total_steps": 22320, "loss": 0.4764, "lr": 0.0008853742266311649, "epoch": 5.958781362007168, "percentage": 29.79, "elapsed_time": "0:20:57", "remaining_time": "0:49:23", "throughput": 2015.79, "total_tokens": 2534816}
1336
+ {"current_steps": 6655, "total_steps": 22320, "loss": 0.5446, "lr": 0.0008851250007909439, "epoch": 5.96326164874552, "percentage": 29.82, "elapsed_time": "0:20:58", "remaining_time": "0:49:22", "throughput": 2015.89, "total_tokens": 2536800}
1337
+ {"current_steps": 6660, "total_steps": 22320, "loss": 0.7217, "lr": 0.0008848755394631221, "epoch": 5.967741935483871, "percentage": 29.84, "elapsed_time": "0:20:59", "remaining_time": "0:49:21", "throughput": 2015.94, "total_tokens": 2538688}
1338
+ {"current_steps": 6665, "total_steps": 22320, "loss": 0.4813, "lr": 0.0008846258428002348, "epoch": 5.972222222222222, "percentage": 29.86, "elapsed_time": "0:21:00", "remaining_time": "0:49:20", "throughput": 2016.03, "total_tokens": 2540608}
1339
+ {"current_steps": 6670, "total_steps": 22320, "loss": 0.4075, "lr": 0.0008843759109549606, "epoch": 5.976702508960574, "percentage": 29.88, "elapsed_time": "0:21:01", "remaining_time": "0:49:18", "throughput": 2016.08, "total_tokens": 2542432}
1340
+ {"current_steps": 6675, "total_steps": 22320, "loss": 0.5559, "lr": 0.0008841257440801224, "epoch": 5.981182795698925, "percentage": 29.91, "elapsed_time": "0:21:01", "remaining_time": "0:49:17", "throughput": 2016.13, "total_tokens": 2544192}
1341
+ {"current_steps": 6680, "total_steps": 22320, "loss": 0.3617, "lr": 0.0008838753423286869, "epoch": 5.985663082437276, "percentage": 29.93, "elapsed_time": "0:21:02", "remaining_time": "0:49:16", "throughput": 2016.17, "total_tokens": 2545952}
1342
+ {"current_steps": 6685, "total_steps": 22320, "loss": 0.5103, "lr": 0.0008836247058537638, "epoch": 5.990143369175628, "percentage": 29.95, "elapsed_time": "0:21:03", "remaining_time": "0:49:15", "throughput": 2016.23, "total_tokens": 2547904}
1343
+ {"current_steps": 6690, "total_steps": 22320, "loss": 0.4206, "lr": 0.0008833738348086067, "epoch": 5.994623655913978, "percentage": 29.97, "elapsed_time": "0:21:04", "remaining_time": "0:49:14", "throughput": 2016.37, "total_tokens": 2549952}
1344
+ {"current_steps": 6695, "total_steps": 22320, "loss": 0.3895, "lr": 0.0008831227293466128, "epoch": 5.999103942652329, "percentage": 30.0, "elapsed_time": "0:21:05", "remaining_time": "0:49:13", "throughput": 2016.48, "total_tokens": 2551904}
1345
+ {"current_steps": 6696, "total_steps": 22320, "eval_loss": 0.5720012187957764, "epoch": 6.0, "percentage": 30.0, "elapsed_time": "0:21:15", "remaining_time": "0:49:36", "throughput": 2000.45, "total_tokens": 2551992}
1346
+ {"current_steps": 6700, "total_steps": 22320, "loss": 0.6547, "lr": 0.0008828713896213222, "epoch": 6.003584229390681, "percentage": 30.02, "elapsed_time": "0:21:17", "remaining_time": "0:49:37", "throughput": 1999.01, "total_tokens": 2553400}
1347
+ {"current_steps": 6705, "total_steps": 22320, "loss": 0.4619, "lr": 0.0008826198157864186, "epoch": 6.008064516129032, "percentage": 30.04, "elapsed_time": "0:21:18", "remaining_time": "0:49:36", "throughput": 1999.03, "total_tokens": 2555288}
1348
+ {"current_steps": 6710, "total_steps": 22320, "loss": 0.6339, "lr": 0.0008823680079957287, "epoch": 6.012544802867383, "percentage": 30.06, "elapsed_time": "0:21:19", "remaining_time": "0:49:35", "throughput": 1999.09, "total_tokens": 2557112}