Training in progress, step 1400
Browse files- adapter_model.safetensors +1 -1
- trainer_log.jsonl +41 -0
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 257072
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c4692d2476193299efb0eb472ca3a6a3182de51f294ca59121f94238c8c84626
|
| 3 |
size 257072
|
trainer_log.jsonl
CHANGED
|
@@ -244,3 +244,44 @@
|
|
| 244 |
{"current_steps": 1195, "total_steps": 40000, "loss": 15.4037, "lr": 4.989015503897561e-05, "epoch": 0.15299916778695347, "percentage": 2.99, "elapsed_time": "0:29:29", "remaining_time": "15:57:27", "throughput": 3961.76, "total_tokens": 7008704}
|
| 245 |
{"current_steps": 1200, "total_steps": 40000, "loss": 14.5867, "lr": 4.988923381955383e-05, "epoch": 0.15363933166890725, "percentage": 3.0, "elapsed_time": "0:29:32", "remaining_time": "15:55:11", "throughput": 3970.78, "total_tokens": 7038272}
|
| 246 |
{"current_steps": 1200, "total_steps": 40000, "eval_loss": 14.924871444702148, "epoch": 0.15363933166890725, "percentage": 3.0, "elapsed_time": "0:32:39", "remaining_time": "17:36:09", "throughput": 3591.19, "total_tokens": 7038272}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 244 |
{"current_steps": 1195, "total_steps": 40000, "loss": 15.4037, "lr": 4.989015503897561e-05, "epoch": 0.15299916778695347, "percentage": 2.99, "elapsed_time": "0:29:29", "remaining_time": "15:57:27", "throughput": 3961.76, "total_tokens": 7008704}
|
| 245 |
{"current_steps": 1200, "total_steps": 40000, "loss": 14.5867, "lr": 4.988923381955383e-05, "epoch": 0.15363933166890725, "percentage": 3.0, "elapsed_time": "0:29:32", "remaining_time": "15:55:11", "throughput": 3970.78, "total_tokens": 7038272}
|
| 246 |
{"current_steps": 1200, "total_steps": 40000, "eval_loss": 14.924871444702148, "epoch": 0.15363933166890725, "percentage": 3.0, "elapsed_time": "0:32:39", "remaining_time": "17:36:09", "throughput": 3591.19, "total_tokens": 7038272}
|
| 247 |
+
{"current_steps": 1205, "total_steps": 40000, "loss": 15.0996, "lr": 4.988830876189942e-05, "epoch": 0.154279495550861, "percentage": 3.01, "elapsed_time": "0:32:45", "remaining_time": "17:34:23", "throughput": 3596.74, "total_tokens": 7067680}
|
| 248 |
+
{"current_steps": 1210, "total_steps": 40000, "loss": 14.347, "lr": 4.988737986615503e-05, "epoch": 0.1549196594328148, "percentage": 3.02, "elapsed_time": "0:32:48", "remaining_time": "17:31:43", "throughput": 3605.04, "total_tokens": 7096192}
|
| 249 |
+
{"current_steps": 1215, "total_steps": 40000, "loss": 14.8178, "lr": 4.988644713246391e-05, "epoch": 0.15555982331476859, "percentage": 3.04, "elapsed_time": "0:32:51", "remaining_time": "17:29:04", "throughput": 3613.59, "total_tokens": 7125376}
|
| 250 |
+
{"current_steps": 1220, "total_steps": 40000, "loss": 14.7585, "lr": 4.988551056096991e-05, "epoch": 0.15619998719672237, "percentage": 3.05, "elapsed_time": "0:32:55", "remaining_time": "17:26:26", "throughput": 3621.88, "total_tokens": 7154016}
|
| 251 |
+
{"current_steps": 1225, "total_steps": 40000, "loss": 14.1274, "lr": 4.988457015181743e-05, "epoch": 0.15684015107867613, "percentage": 3.06, "elapsed_time": "0:32:58", "remaining_time": "17:23:53", "throughput": 3631.5, "total_tokens": 7185888}
|
| 252 |
+
{"current_steps": 1230, "total_steps": 40000, "loss": 15.081, "lr": 4.988362590515153e-05, "epoch": 0.15748031496062992, "percentage": 3.08, "elapsed_time": "0:33:02", "remaining_time": "17:21:18", "throughput": 3639.9, "total_tokens": 7214912}
|
| 253 |
+
{"current_steps": 1235, "total_steps": 40000, "loss": 14.3095, "lr": 4.9882677821117805e-05, "epoch": 0.1581204788425837, "percentage": 3.09, "elapsed_time": "0:33:05", "remaining_time": "17:18:45", "throughput": 3648.5, "total_tokens": 7244448}
|
| 254 |
+
{"current_steps": 1240, "total_steps": 40000, "loss": 15.617, "lr": 4.988172589986246e-05, "epoch": 0.1587606427245375, "percentage": 3.1, "elapsed_time": "0:33:09", "remaining_time": "17:16:13", "throughput": 3657.23, "total_tokens": 7274336}
|
| 255 |
+
{"current_steps": 1245, "total_steps": 40000, "loss": 15.1727, "lr": 4.9880770141532304e-05, "epoch": 0.15940080660649125, "percentage": 3.11, "elapsed_time": "0:33:12", "remaining_time": "17:13:42", "throughput": 3666.05, "total_tokens": 7304512}
|
| 256 |
+
{"current_steps": 1250, "total_steps": 40000, "loss": 14.7478, "lr": 4.987981054627472e-05, "epoch": 0.16004097048844504, "percentage": 3.12, "elapsed_time": "0:33:15", "remaining_time": "17:11:13", "throughput": 3674.51, "total_tokens": 7333984}
|
| 257 |
+
{"current_steps": 1255, "total_steps": 40000, "loss": 14.6592, "lr": 4.987884711423769e-05, "epoch": 0.16068113437039883, "percentage": 3.14, "elapsed_time": "0:33:19", "remaining_time": "17:08:45", "throughput": 3683.3, "total_tokens": 7364320}
|
| 258 |
+
{"current_steps": 1260, "total_steps": 40000, "loss": 14.5911, "lr": 4.9877879845569784e-05, "epoch": 0.1613212982523526, "percentage": 3.15, "elapsed_time": "0:33:22", "remaining_time": "17:06:18", "throughput": 3691.58, "total_tokens": 7393504}
|
| 259 |
+
{"current_steps": 1265, "total_steps": 40000, "loss": 14.8758, "lr": 4.9876908740420175e-05, "epoch": 0.16196146213430637, "percentage": 3.16, "elapsed_time": "0:33:26", "remaining_time": "17:03:51", "throughput": 3699.67, "total_tokens": 7422304}
|
| 260 |
+
{"current_steps": 1270, "total_steps": 40000, "loss": 15.2936, "lr": 4.987593379893861e-05, "epoch": 0.16260162601626016, "percentage": 3.17, "elapsed_time": "0:33:29", "remaining_time": "17:01:26", "throughput": 3707.99, "total_tokens": 7451744}
|
| 261 |
+
{"current_steps": 1275, "total_steps": 40000, "loss": 14.7761, "lr": 4.987495502127545e-05, "epoch": 0.16324178989821395, "percentage": 3.19, "elapsed_time": "0:33:33", "remaining_time": "16:59:02", "throughput": 3716.39, "total_tokens": 7481408}
|
| 262 |
+
{"current_steps": 1280, "total_steps": 40000, "loss": 14.5276, "lr": 4.987397240758162e-05, "epoch": 0.16388195378016773, "percentage": 3.2, "elapsed_time": "0:33:36", "remaining_time": "16:56:40", "throughput": 3724.76, "total_tokens": 7511136}
|
| 263 |
+
{"current_steps": 1285, "total_steps": 40000, "loss": 14.3979, "lr": 4.9872985958008664e-05, "epoch": 0.1645221176621215, "percentage": 3.21, "elapsed_time": "0:33:39", "remaining_time": "16:54:19", "throughput": 3733.21, "total_tokens": 7541056}
|
| 264 |
+
{"current_steps": 1290, "total_steps": 40000, "loss": 14.6138, "lr": 4.987199567270871e-05, "epoch": 0.16516228154407528, "percentage": 3.23, "elapsed_time": "0:33:43", "remaining_time": "16:51:58", "throughput": 3741.33, "total_tokens": 7570336}
|
| 265 |
+
{"current_steps": 1295, "total_steps": 40000, "loss": 14.1238, "lr": 4.9871001551834444e-05, "epoch": 0.16580244542602907, "percentage": 3.24, "elapsed_time": "0:33:46", "remaining_time": "16:49:38", "throughput": 3749.18, "total_tokens": 7598976}
|
| 266 |
+
{"current_steps": 1300, "total_steps": 40000, "loss": 14.9749, "lr": 4.98700035955392e-05, "epoch": 0.16644260930798285, "percentage": 3.25, "elapsed_time": "0:33:50", "remaining_time": "16:47:19", "throughput": 3757.42, "total_tokens": 7628544}
|
| 267 |
+
{"current_steps": 1305, "total_steps": 40000, "loss": 14.4559, "lr": 4.986900180397686e-05, "epoch": 0.1670827731899366, "percentage": 3.26, "elapsed_time": "0:33:53", "remaining_time": "16:45:01", "throughput": 3765.55, "total_tokens": 7657920}
|
| 268 |
+
{"current_steps": 1310, "total_steps": 40000, "loss": 14.6495, "lr": 4.9867996177301926e-05, "epoch": 0.1677229370718904, "percentage": 3.28, "elapsed_time": "0:33:57", "remaining_time": "16:42:44", "throughput": 3773.69, "total_tokens": 7687360}
|
| 269 |
+
{"current_steps": 1315, "total_steps": 40000, "loss": 15.424, "lr": 4.9866986715669464e-05, "epoch": 0.16836310095384419, "percentage": 3.29, "elapsed_time": "0:34:00", "remaining_time": "16:40:27", "throughput": 3781.07, "total_tokens": 7715200}
|
| 270 |
+
{"current_steps": 1320, "total_steps": 40000, "loss": 14.7766, "lr": 4.9865973419235155e-05, "epoch": 0.16900326483579797, "percentage": 3.3, "elapsed_time": "0:34:03", "remaining_time": "16:38:13", "throughput": 3789.52, "total_tokens": 7745568}
|
| 271 |
+
{"current_steps": 1325, "total_steps": 40000, "loss": 14.3843, "lr": 4.986495628815526e-05, "epoch": 0.16964342871775173, "percentage": 3.31, "elapsed_time": "0:34:07", "remaining_time": "16:35:58", "throughput": 3797.14, "total_tokens": 7773984}
|
| 272 |
+
{"current_steps": 1330, "total_steps": 40000, "loss": 14.7862, "lr": 4.986393532258663e-05, "epoch": 0.17028359259970552, "percentage": 3.33, "elapsed_time": "0:34:10", "remaining_time": "16:33:45", "throughput": 3805.04, "total_tokens": 7803136}
|
| 273 |
+
{"current_steps": 1335, "total_steps": 40000, "loss": 16.6248, "lr": 4.986291052268671e-05, "epoch": 0.1709237564816593, "percentage": 3.34, "elapsed_time": "0:34:14", "remaining_time": "16:31:33", "throughput": 3812.68, "total_tokens": 7831808}
|
| 274 |
+
{"current_steps": 1340, "total_steps": 40000, "loss": 15.705, "lr": 4.986188188861355e-05, "epoch": 0.1715639203636131, "percentage": 3.35, "elapsed_time": "0:34:17", "remaining_time": "16:29:21", "throughput": 3820.21, "total_tokens": 7860224}
|
| 275 |
+
{"current_steps": 1345, "total_steps": 40000, "loss": 14.2646, "lr": 4.9860849420525766e-05, "epoch": 0.17220408424556685, "percentage": 3.36, "elapsed_time": "0:34:20", "remaining_time": "16:27:10", "throughput": 3827.86, "total_tokens": 7888960}
|
| 276 |
+
{"current_steps": 1350, "total_steps": 40000, "loss": 15.5638, "lr": 4.9859813118582575e-05, "epoch": 0.17284424812752064, "percentage": 3.38, "elapsed_time": "0:34:24", "remaining_time": "16:25:02", "throughput": 3835.7, "total_tokens": 7918304}
|
| 277 |
+
{"current_steps": 1355, "total_steps": 40000, "loss": 14.5794, "lr": 4.98587729829438e-05, "epoch": 0.17348441200947443, "percentage": 3.39, "elapsed_time": "0:34:27", "remaining_time": "16:22:53", "throughput": 3843.24, "total_tokens": 7946912}
|
| 278 |
+
{"current_steps": 1360, "total_steps": 40000, "loss": 14.1284, "lr": 4.985772901376983e-05, "epoch": 0.1741245758914282, "percentage": 3.4, "elapsed_time": "0:34:31", "remaining_time": "16:20:46", "throughput": 3850.78, "total_tokens": 7975680}
|
| 279 |
+
{"current_steps": 1365, "total_steps": 40000, "loss": 14.6585, "lr": 4.9856681211221666e-05, "epoch": 0.174764739773382, "percentage": 3.41, "elapsed_time": "0:34:34", "remaining_time": "16:18:39", "throughput": 3858.53, "total_tokens": 8004864}
|
| 280 |
+
{"current_steps": 1370, "total_steps": 40000, "loss": 15.578, "lr": 4.985562957546089e-05, "epoch": 0.17540490365533576, "percentage": 3.43, "elapsed_time": "0:34:38", "remaining_time": "16:16:33", "throughput": 3866.23, "total_tokens": 8034048}
|
| 281 |
+
{"current_steps": 1375, "total_steps": 40000, "loss": 14.0787, "lr": 4.9854574106649686e-05, "epoch": 0.17604506753728955, "percentage": 3.44, "elapsed_time": "0:34:41", "remaining_time": "16:14:29", "throughput": 3873.95, "total_tokens": 8063392}
|
| 282 |
+
{"current_steps": 1380, "total_steps": 40000, "loss": 14.3822, "lr": 4.985351480495081e-05, "epoch": 0.17668523141924333, "percentage": 3.45, "elapsed_time": "0:34:44", "remaining_time": "16:12:25", "throughput": 3881.41, "total_tokens": 8092192}
|
| 283 |
+
{"current_steps": 1385, "total_steps": 40000, "loss": 15.4682, "lr": 4.985245167052762e-05, "epoch": 0.17732539530119712, "percentage": 3.46, "elapsed_time": "0:34:48", "remaining_time": "16:10:22", "throughput": 3888.98, "total_tokens": 8121184}
|
| 284 |
+
{"current_steps": 1390, "total_steps": 40000, "loss": 13.7816, "lr": 4.9851384703544066e-05, "epoch": 0.17796555918315088, "percentage": 3.48, "elapsed_time": "0:34:51", "remaining_time": "16:08:19", "throughput": 3896.61, "total_tokens": 8150368}
|
| 285 |
+
{"current_steps": 1395, "total_steps": 40000, "loss": 14.1934, "lr": 4.985031390416469e-05, "epoch": 0.17860572306510467, "percentage": 3.49, "elapsed_time": "0:34:55", "remaining_time": "16:06:19", "throughput": 3904.18, "total_tokens": 8179648}
|
| 286 |
+
{"current_steps": 1400, "total_steps": 40000, "loss": 14.5693, "lr": 4.984923927255461e-05, "epoch": 0.17924588694705845, "percentage": 3.5, "elapsed_time": "0:34:58", "remaining_time": "16:04:18", "throughput": 3911.43, "total_tokens": 8208128}
|
| 287 |
+
{"current_steps": 1400, "total_steps": 40000, "eval_loss": 14.9047269821167, "epoch": 0.17924588694705845, "percentage": 3.5, "elapsed_time": "0:38:05", "remaining_time": "17:30:25", "throughput": 3590.79, "total_tokens": 8208128}
|