Training in progress, step 1400
Browse files- adapter_model.safetensors +1 -1
- trainer_log.jsonl +41 -0
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 18124968
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b049d5651cc3c510e8db7cf03abb995e40228a85051d1de3907b1478e216e713
|
| 3 |
size 18124968
|
trainer_log.jsonl
CHANGED
|
@@ -244,3 +244,44 @@
|
|
| 244 |
{"current_steps": 1195, "total_steps": 40000, "loss": 0.5034, "lr": 4.989015503897561e-05, "epoch": 0.30599833557390693, "percentage": 2.99, "elapsed_time": "0:55:57", "remaining_time": "1 day, 6:17:14", "throughput": 4164.43, "total_tokens": 13983040}
|
| 245 |
{"current_steps": 1200, "total_steps": 40000, "loss": 0.5224, "lr": 4.988923381955383e-05, "epoch": 0.3072786633378145, "percentage": 3.0, "elapsed_time": "0:56:06", "remaining_time": "1 day, 6:14:16", "throughput": 4170.48, "total_tokens": 14040704}
|
| 246 |
{"current_steps": 1200, "total_steps": 40000, "eval_loss": 0.5356107950210571, "epoch": 0.3072786633378145, "percentage": 3.0, "elapsed_time": "1:00:04", "remaining_time": "1 day, 8:22:26", "throughput": 3895.3, "total_tokens": 14040704}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 244 |
{"current_steps": 1195, "total_steps": 40000, "loss": 0.5034, "lr": 4.989015503897561e-05, "epoch": 0.30599833557390693, "percentage": 2.99, "elapsed_time": "0:55:57", "remaining_time": "1 day, 6:17:14", "throughput": 4164.43, "total_tokens": 13983040}
|
| 245 |
{"current_steps": 1200, "total_steps": 40000, "loss": 0.5224, "lr": 4.988923381955383e-05, "epoch": 0.3072786633378145, "percentage": 3.0, "elapsed_time": "0:56:06", "remaining_time": "1 day, 6:14:16", "throughput": 4170.48, "total_tokens": 14040704}
|
| 246 |
{"current_steps": 1200, "total_steps": 40000, "eval_loss": 0.5356107950210571, "epoch": 0.3072786633378145, "percentage": 3.0, "elapsed_time": "1:00:04", "remaining_time": "1 day, 8:22:26", "throughput": 3895.3, "total_tokens": 14040704}
|
| 247 |
+
{"current_steps": 1205, "total_steps": 40000, "loss": 0.4913, "lr": 4.988830876189942e-05, "epoch": 0.308558991101722, "percentage": 3.01, "elapsed_time": "1:00:15", "remaining_time": "1 day, 8:20:14", "throughput": 3899.16, "total_tokens": 14099072}
|
| 248 |
+
{"current_steps": 1210, "total_steps": 40000, "loss": 0.5034, "lr": 4.988737986615503e-05, "epoch": 0.3098393188656296, "percentage": 3.02, "elapsed_time": "1:00:24", "remaining_time": "1 day, 8:16:45", "throughput": 3905.99, "total_tokens": 14158688}
|
| 249 |
+
{"current_steps": 1215, "total_steps": 40000, "loss": 0.5239, "lr": 4.988644713246391e-05, "epoch": 0.31111964662953717, "percentage": 3.04, "elapsed_time": "1:00:33", "remaining_time": "1 day, 8:13:18", "throughput": 3912.36, "total_tokens": 14216800}
|
| 250 |
+
{"current_steps": 1220, "total_steps": 40000, "loss": 0.5506, "lr": 4.988551056096991e-05, "epoch": 0.31239997439344475, "percentage": 3.05, "elapsed_time": "1:00:42", "remaining_time": "1 day, 8:09:50", "throughput": 3918.75, "total_tokens": 14274880}
|
| 251 |
+
{"current_steps": 1225, "total_steps": 40000, "loss": 0.5362, "lr": 4.988457015181743e-05, "epoch": 0.31368030215735226, "percentage": 3.06, "elapsed_time": "1:00:51", "remaining_time": "1 day, 8:06:28", "throughput": 3925.03, "total_tokens": 14333120}
|
| 252 |
+
{"current_steps": 1230, "total_steps": 40000, "loss": 0.4998, "lr": 4.988362590515153e-05, "epoch": 0.31496062992125984, "percentage": 3.08, "elapsed_time": "1:01:00", "remaining_time": "1 day, 8:03:06", "throughput": 3931.15, "total_tokens": 14390752}
|
| 253 |
+
{"current_steps": 1235, "total_steps": 40000, "loss": 0.5682, "lr": 4.9882677821117805e-05, "epoch": 0.3162409576851674, "percentage": 3.09, "elapsed_time": "1:01:09", "remaining_time": "1 day, 7:59:46", "throughput": 3937.89, "total_tokens": 14450784}
|
| 254 |
+
{"current_steps": 1240, "total_steps": 40000, "loss": 0.4529, "lr": 4.988172589986246e-05, "epoch": 0.317521285449075, "percentage": 3.1, "elapsed_time": "1:01:19", "remaining_time": "1 day, 7:56:41", "throughput": 3944.64, "total_tokens": 14512672}
|
| 255 |
+
{"current_steps": 1245, "total_steps": 40000, "loss": 0.4684, "lr": 4.9880770141532304e-05, "epoch": 0.3188016132129825, "percentage": 3.11, "elapsed_time": "1:01:28", "remaining_time": "1 day, 7:53:25", "throughput": 3950.72, "total_tokens": 14570688}
|
| 256 |
+
{"current_steps": 1250, "total_steps": 40000, "loss": 0.5747, "lr": 4.987981054627472e-05, "epoch": 0.3200819409768901, "percentage": 3.12, "elapsed_time": "1:01:37", "remaining_time": "1 day, 7:50:09", "throughput": 3957.07, "total_tokens": 14629600}
|
| 257 |
+
{"current_steps": 1255, "total_steps": 40000, "loss": 0.5077, "lr": 4.987884711423769e-05, "epoch": 0.32136226874079765, "percentage": 3.14, "elapsed_time": "1:01:46", "remaining_time": "1 day, 7:46:54", "throughput": 3963.51, "total_tokens": 14688896}
|
| 258 |
+
{"current_steps": 1260, "total_steps": 40000, "loss": 0.5368, "lr": 4.9877879845569784e-05, "epoch": 0.3226425965047052, "percentage": 3.15, "elapsed_time": "1:01:55", "remaining_time": "1 day, 7:43:41", "throughput": 3969.62, "total_tokens": 14747168}
|
| 259 |
+
{"current_steps": 1265, "total_steps": 40000, "loss": 0.5205, "lr": 4.9876908740420175e-05, "epoch": 0.32392292426861274, "percentage": 3.16, "elapsed_time": "1:02:04", "remaining_time": "1 day, 7:40:32", "throughput": 3975.64, "total_tokens": 14805440}
|
| 260 |
+
{"current_steps": 1270, "total_steps": 40000, "loss": 0.4988, "lr": 4.987593379893861e-05, "epoch": 0.3252032520325203, "percentage": 3.17, "elapsed_time": "1:02:13", "remaining_time": "1 day, 7:37:22", "throughput": 3981.38, "total_tokens": 14862624}
|
| 261 |
+
{"current_steps": 1275, "total_steps": 40000, "loss": 0.5342, "lr": 4.987495502127545e-05, "epoch": 0.3264835797964279, "percentage": 3.19, "elapsed_time": "1:02:22", "remaining_time": "1 day, 7:34:14", "throughput": 3987.76, "total_tokens": 14922208}
|
| 262 |
+
{"current_steps": 1280, "total_steps": 40000, "loss": 0.5396, "lr": 4.987397240758162e-05, "epoch": 0.32776390756033547, "percentage": 3.2, "elapsed_time": "1:02:30", "remaining_time": "1 day, 7:31:05", "throughput": 3993.67, "total_tokens": 14979936}
|
| 263 |
+
{"current_steps": 1285, "total_steps": 40000, "loss": 0.5141, "lr": 4.9872985958008664e-05, "epoch": 0.329044235324243, "percentage": 3.21, "elapsed_time": "1:02:39", "remaining_time": "1 day, 7:27:59", "throughput": 3999.57, "total_tokens": 15037984}
|
| 264 |
+
{"current_steps": 1290, "total_steps": 40000, "loss": 0.5612, "lr": 4.987199567270871e-05, "epoch": 0.33032456308815056, "percentage": 3.23, "elapsed_time": "1:02:48", "remaining_time": "1 day, 7:24:56", "throughput": 4005.42, "total_tokens": 15096032}
|
| 265 |
+
{"current_steps": 1295, "total_steps": 40000, "loss": 0.523, "lr": 4.9871001551834444e-05, "epoch": 0.33160489085205813, "percentage": 3.24, "elapsed_time": "1:02:57", "remaining_time": "1 day, 7:21:53", "throughput": 4011.28, "total_tokens": 15154176}
|
| 266 |
+
{"current_steps": 1300, "total_steps": 40000, "loss": 0.573, "lr": 4.98700035955392e-05, "epoch": 0.3328852186159657, "percentage": 3.25, "elapsed_time": "1:03:06", "remaining_time": "1 day, 7:18:52", "throughput": 4017.22, "total_tokens": 15212736}
|
| 267 |
+
{"current_steps": 1305, "total_steps": 40000, "loss": 0.5518, "lr": 4.986900180397686e-05, "epoch": 0.3341655463798732, "percentage": 3.26, "elapsed_time": "1:03:15", "remaining_time": "1 day, 7:15:52", "throughput": 4022.89, "total_tokens": 15270368}
|
| 268 |
+
{"current_steps": 1310, "total_steps": 40000, "loss": 0.486, "lr": 4.9867996177301926e-05, "epoch": 0.3354458741437808, "percentage": 3.28, "elapsed_time": "1:03:24", "remaining_time": "1 day, 7:12:53", "throughput": 4028.86, "total_tokens": 15329152}
|
| 269 |
+
{"current_steps": 1315, "total_steps": 40000, "loss": 0.567, "lr": 4.9866986715669464e-05, "epoch": 0.33672620190768837, "percentage": 3.29, "elapsed_time": "1:03:33", "remaining_time": "1 day, 7:09:55", "throughput": 4034.92, "total_tokens": 15388416}
|
| 270 |
+
{"current_steps": 1320, "total_steps": 40000, "loss": 0.4572, "lr": 4.9865973419235155e-05, "epoch": 0.33800652967159595, "percentage": 3.3, "elapsed_time": "1:03:42", "remaining_time": "1 day, 7:06:58", "throughput": 4041.23, "total_tokens": 15448608}
|
| 271 |
+
{"current_steps": 1325, "total_steps": 40000, "loss": 0.5503, "lr": 4.986495628815526e-05, "epoch": 0.33928685743550346, "percentage": 3.31, "elapsed_time": "1:03:51", "remaining_time": "1 day, 7:04:04", "throughput": 4047.06, "total_tokens": 15507424}
|
| 272 |
+
{"current_steps": 1330, "total_steps": 40000, "loss": 0.5289, "lr": 4.986393532258663e-05, "epoch": 0.34056718519941104, "percentage": 3.33, "elapsed_time": "1:04:00", "remaining_time": "1 day, 7:01:13", "throughput": 4052.69, "total_tokens": 15565760}
|
| 273 |
+
{"current_steps": 1335, "total_steps": 40000, "loss": 0.5213, "lr": 4.986291052268671e-05, "epoch": 0.3418475129633186, "percentage": 3.34, "elapsed_time": "1:04:09", "remaining_time": "1 day, 6:58:19", "throughput": 4058.3, "total_tokens": 15623648}
|
| 274 |
+
{"current_steps": 1340, "total_steps": 40000, "loss": 0.5089, "lr": 4.986188188861355e-05, "epoch": 0.3431278407272262, "percentage": 3.35, "elapsed_time": "1:04:18", "remaining_time": "1 day, 6:55:29", "throughput": 4064.28, "total_tokens": 15683296}
|
| 275 |
+
{"current_steps": 1345, "total_steps": 40000, "loss": 0.5044, "lr": 4.9860849420525766e-05, "epoch": 0.3444081684911337, "percentage": 3.36, "elapsed_time": "1:04:27", "remaining_time": "1 day, 6:52:40", "throughput": 4069.98, "total_tokens": 15741920}
|
| 276 |
+
{"current_steps": 1350, "total_steps": 40000, "loss": 0.4744, "lr": 4.9859813118582575e-05, "epoch": 0.3456884962550413, "percentage": 3.38, "elapsed_time": "1:04:36", "remaining_time": "1 day, 6:49:51", "throughput": 4075.18, "total_tokens": 15798656}
|
| 277 |
+
{"current_steps": 1355, "total_steps": 40000, "loss": 0.5213, "lr": 4.98587729829438e-05, "epoch": 0.34696882401894885, "percentage": 3.39, "elapsed_time": "1:04:45", "remaining_time": "1 day, 6:47:03", "throughput": 4080.54, "total_tokens": 15856128}
|
| 278 |
+
{"current_steps": 1360, "total_steps": 40000, "loss": 0.5909, "lr": 4.985772901376983e-05, "epoch": 0.3482491517828564, "percentage": 3.4, "elapsed_time": "1:04:54", "remaining_time": "1 day, 6:44:15", "throughput": 4086.23, "total_tokens": 15914656}
|
| 279 |
+
{"current_steps": 1365, "total_steps": 40000, "loss": 0.4517, "lr": 4.9856681211221666e-05, "epoch": 0.349529479546764, "percentage": 3.41, "elapsed_time": "1:05:03", "remaining_time": "1 day, 6:41:32", "throughput": 4091.7, "total_tokens": 15973024}
|
| 280 |
+
{"current_steps": 1370, "total_steps": 40000, "loss": 0.5244, "lr": 4.985562957546089e-05, "epoch": 0.3508098073106715, "percentage": 3.43, "elapsed_time": "1:05:12", "remaining_time": "1 day, 6:38:48", "throughput": 4097.28, "total_tokens": 16031616}
|
| 281 |
+
{"current_steps": 1375, "total_steps": 40000, "loss": 0.5822, "lr": 4.9854574106649686e-05, "epoch": 0.3520901350745791, "percentage": 3.44, "elapsed_time": "1:05:21", "remaining_time": "1 day, 6:36:03", "throughput": 4102.75, "total_tokens": 16089728}
|
| 282 |
+
{"current_steps": 1380, "total_steps": 40000, "loss": 0.4936, "lr": 4.985351480495081e-05, "epoch": 0.35337046283848667, "percentage": 3.45, "elapsed_time": "1:05:30", "remaining_time": "1 day, 6:33:23", "throughput": 4107.93, "total_tokens": 16147136}
|
| 283 |
+
{"current_steps": 1385, "total_steps": 40000, "loss": 0.5207, "lr": 4.985245167052762e-05, "epoch": 0.35465079060239424, "percentage": 3.46, "elapsed_time": "1:05:39", "remaining_time": "1 day, 6:30:42", "throughput": 4113.27, "total_tokens": 16205056}
|
| 284 |
+
{"current_steps": 1390, "total_steps": 40000, "loss": 0.4917, "lr": 4.9851384703544066e-05, "epoch": 0.35593111836630176, "percentage": 3.48, "elapsed_time": "1:05:48", "remaining_time": "1 day, 6:28:01", "throughput": 4118.99, "total_tokens": 16264416}
|
| 285 |
+
{"current_steps": 1395, "total_steps": 40000, "loss": 0.4778, "lr": 4.985031390416469e-05, "epoch": 0.35721144613020933, "percentage": 3.49, "elapsed_time": "1:05:57", "remaining_time": "1 day, 6:25:23", "throughput": 4124.22, "total_tokens": 16322176}
|
| 286 |
+
{"current_steps": 1400, "total_steps": 40000, "loss": 0.5305, "lr": 4.984923927255461e-05, "epoch": 0.3584917738941169, "percentage": 3.5, "elapsed_time": "1:06:06", "remaining_time": "1 day, 6:22:47", "throughput": 4129.2, "total_tokens": 16379264}
|
| 287 |
+
{"current_steps": 1400, "total_steps": 40000, "eval_loss": 0.5197495222091675, "epoch": 0.3584917738941169, "percentage": 3.5, "elapsed_time": "1:10:05", "remaining_time": "1 day, 8:12:23", "throughput": 3895.01, "total_tokens": 16379264}
|