Training in progress, step 5360
Browse files- adapter_model.safetensors +1 -1
- trainer_log.jsonl +51 -0
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1638528
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e170aa5e4831cbcb8ba20e2234044d395266262f7c15e3147337251252ae0fdf
|
| 3 |
size 1638528
|
trainer_log.jsonl
CHANGED
|
@@ -1040,3 +1040,54 @@
|
|
| 1040 |
{"current_steps": 5105, "total_steps": 5360, "loss": 0.8389, "lr": 3.4663221854105423e-07, "epoch": 9.524253731343283, "percentage": 95.24, "elapsed_time": "0:16:17", "remaining_time": "0:00:48", "throughput": 1484.58, "total_tokens": 1451024}
|
| 1041 |
{"current_steps": 5110, "total_steps": 5360, "loss": 0.4868, "lr": 3.332539971966836e-07, "epoch": 9.533582089552239, "percentage": 95.34, "elapsed_time": "0:16:18", "remaining_time": "0:00:47", "throughput": 1484.66, "total_tokens": 1452304}
|
| 1042 |
{"current_steps": 5115, "total_steps": 5360, "loss": 0.4929, "lr": 3.201373149577247e-07, "epoch": 9.542910447761194, "percentage": 95.43, "elapsed_time": "0:16:19", "remaining_time": "0:00:46", "throughput": 1485.01, "total_tokens": 1453968}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1040 |
{"current_steps": 5105, "total_steps": 5360, "loss": 0.8389, "lr": 3.4663221854105423e-07, "epoch": 9.524253731343283, "percentage": 95.24, "elapsed_time": "0:16:17", "remaining_time": "0:00:48", "throughput": 1484.58, "total_tokens": 1451024}
|
| 1041 |
{"current_steps": 5110, "total_steps": 5360, "loss": 0.4868, "lr": 3.332539971966836e-07, "epoch": 9.533582089552239, "percentage": 95.34, "elapsed_time": "0:16:18", "remaining_time": "0:00:47", "throughput": 1484.66, "total_tokens": 1452304}
|
| 1042 |
{"current_steps": 5115, "total_steps": 5360, "loss": 0.4929, "lr": 3.201373149577247e-07, "epoch": 9.542910447761194, "percentage": 95.43, "elapsed_time": "0:16:19", "remaining_time": "0:00:46", "throughput": 1485.01, "total_tokens": 1453968}
|
| 1043 |
+
{"current_steps": 5120, "total_steps": 5360, "loss": 0.4344, "lr": 3.0728231089907634e-07, "epoch": 9.552238805970148, "percentage": 95.52, "elapsed_time": "0:16:19", "remaining_time": "0:00:45", "throughput": 1485.2, "total_tokens": 1455376}
|
| 1044 |
+
{"current_steps": 5125, "total_steps": 5360, "loss": 0.3555, "lr": 2.946891213211012e-07, "epoch": 9.561567164179104, "percentage": 95.62, "elapsed_time": "0:16:20", "remaining_time": "0:00:44", "throughput": 1485.38, "total_tokens": 1456816}
|
| 1045 |
+
{"current_steps": 5130, "total_steps": 5360, "loss": 0.4273, "lr": 2.823578797481574e-07, "epoch": 9.57089552238806, "percentage": 95.71, "elapsed_time": "0:16:21", "remaining_time": "0:00:44", "throughput": 1485.64, "total_tokens": 1458320}
|
| 1046 |
+
{"current_steps": 5135, "total_steps": 5360, "loss": 0.5191, "lr": 2.7028871692720003e-07, "epoch": 9.580223880597014, "percentage": 95.8, "elapsed_time": "0:16:22", "remaining_time": "0:00:43", "throughput": 1485.73, "total_tokens": 1459632}
|
| 1047 |
+
{"current_steps": 5140, "total_steps": 5360, "loss": 0.4946, "lr": 2.5848176082639007e-07, "epoch": 9.58955223880597, "percentage": 95.9, "elapsed_time": "0:16:23", "remaining_time": "0:00:42", "throughput": 1485.86, "total_tokens": 1461008}
|
| 1048 |
+
{"current_steps": 5145, "total_steps": 5360, "loss": 0.2777, "lr": 2.4693713663372644e-07, "epoch": 9.598880597014926, "percentage": 95.99, "elapsed_time": "0:16:24", "remaining_time": "0:00:41", "throughput": 1485.88, "total_tokens": 1462224}
|
| 1049 |
+
{"current_steps": 5150, "total_steps": 5360, "loss": 0.5296, "lr": 2.3565496675574118e-07, "epoch": 9.60820895522388, "percentage": 96.08, "elapsed_time": "0:16:24", "remaining_time": "0:00:40", "throughput": 1486.06, "total_tokens": 1463664}
|
| 1050 |
+
{"current_steps": 5155, "total_steps": 5360, "loss": 0.2898, "lr": 2.246353708161758e-07, "epoch": 9.617537313432836, "percentage": 96.18, "elapsed_time": "0:16:25", "remaining_time": "0:00:39", "throughput": 1486.27, "total_tokens": 1465168}
|
| 1051 |
+
{"current_steps": 5160, "total_steps": 5360, "loss": 0.4143, "lr": 2.1387846565474045e-07, "epoch": 9.626865671641792, "percentage": 96.27, "elapsed_time": "0:16:26", "remaining_time": "0:00:38", "throughput": 1486.41, "total_tokens": 1466544}
|
| 1052 |
+
{"current_steps": 5165, "total_steps": 5360, "loss": 0.6039, "lr": 2.0338436532584827e-07, "epoch": 9.636194029850746, "percentage": 96.36, "elapsed_time": "0:16:27", "remaining_time": "0:00:37", "throughput": 1486.7, "total_tokens": 1468176}
|
| 1053 |
+
{"current_steps": 5170, "total_steps": 5360, "loss": 0.7007, "lr": 1.9315318109742465e-07, "epoch": 9.645522388059701, "percentage": 96.46, "elapsed_time": "0:16:28", "remaining_time": "0:00:36", "throughput": 1486.95, "total_tokens": 1469680}
|
| 1054 |
+
{"current_steps": 5175, "total_steps": 5360, "loss": 0.5124, "lr": 1.831850214497194e-07, "epoch": 9.654850746268657, "percentage": 96.55, "elapsed_time": "0:16:29", "remaining_time": "0:00:35", "throughput": 1487.06, "total_tokens": 1471024}
|
| 1055 |
+
{"current_steps": 5180, "total_steps": 5360, "loss": 0.2848, "lr": 1.7347999207415478e-07, "epoch": 9.664179104477611, "percentage": 96.64, "elapsed_time": "0:16:30", "remaining_time": "0:00:34", "throughput": 1487.23, "total_tokens": 1472464}
|
| 1056 |
+
{"current_steps": 5185, "total_steps": 5360, "loss": 0.4261, "lr": 1.6403819587221814e-07, "epoch": 9.673507462686567, "percentage": 96.74, "elapsed_time": "0:16:30", "remaining_time": "0:00:33", "throughput": 1487.41, "total_tokens": 1473904}
|
| 1057 |
+
{"current_steps": 5190, "total_steps": 5360, "loss": 0.575, "lr": 1.5485973295434885e-07, "epoch": 9.682835820895523, "percentage": 96.83, "elapsed_time": "0:16:31", "remaining_time": "0:00:32", "throughput": 1487.66, "total_tokens": 1475408}
|
| 1058 |
+
{"current_steps": 5195, "total_steps": 5360, "loss": 0.3295, "lr": 1.4594470063890308e-07, "epoch": 9.692164179104477, "percentage": 96.92, "elapsed_time": "0:16:32", "remaining_time": "0:00:31", "throughput": 1487.8, "total_tokens": 1476784}
|
| 1059 |
+
{"current_steps": 5200, "total_steps": 5360, "loss": 0.5133, "lr": 1.3729319345109348e-07, "epoch": 9.701492537313433, "percentage": 97.01, "elapsed_time": "0:16:33", "remaining_time": "0:00:30", "throughput": 1487.93, "total_tokens": 1478160}
|
| 1060 |
+
{"current_steps": 5205, "total_steps": 5360, "loss": 0.3318, "lr": 1.2890530312200945e-07, "epoch": 9.710820895522389, "percentage": 97.11, "elapsed_time": "0:16:34", "remaining_time": "0:00:29", "throughput": 1488.11, "total_tokens": 1479600}
|
| 1061 |
+
{"current_steps": 5210, "total_steps": 5360, "loss": 0.2828, "lr": 1.207811185876373e-07, "epoch": 9.720149253731343, "percentage": 97.2, "elapsed_time": "0:16:35", "remaining_time": "0:00:28", "throughput": 1488.22, "total_tokens": 1480944}
|
| 1062 |
+
{"current_steps": 5215, "total_steps": 5360, "loss": 0.3973, "lr": 1.1292072598791114e-07, "epoch": 9.729477611940299, "percentage": 97.29, "elapsed_time": "0:16:35", "remaining_time": "0:00:27", "throughput": 1488.45, "total_tokens": 1482448}
|
| 1063 |
+
{"current_steps": 5220, "total_steps": 5360, "loss": 0.4369, "lr": 1.0532420866581072e-07, "epoch": 9.738805970149254, "percentage": 97.39, "elapsed_time": "0:16:36", "remaining_time": "0:00:26", "throughput": 1488.65, "total_tokens": 1483920}
|
| 1064 |
+
{"current_steps": 5225, "total_steps": 5360, "loss": 0.3919, "lr": 9.799164716646769e-08, "epoch": 9.748134328358208, "percentage": 97.48, "elapsed_time": "0:16:37", "remaining_time": "0:00:25", "throughput": 1488.75, "total_tokens": 1485232}
|
| 1065 |
+
{"current_steps": 5230, "total_steps": 5360, "loss": 0.364, "lr": 9.092311923632191e-08, "epoch": 9.757462686567164, "percentage": 97.57, "elapsed_time": "0:16:38", "remaining_time": "0:00:24", "throughput": 1488.95, "total_tokens": 1486640}
|
| 1066 |
+
{"current_steps": 5235, "total_steps": 5360, "loss": 0.6139, "lr": 8.411869982228038e-08, "epoch": 9.76679104477612, "percentage": 97.67, "elapsed_time": "0:16:39", "remaining_time": "0:00:23", "throughput": 1489.15, "total_tokens": 1488080}
|
| 1067 |
+
{"current_steps": 5240, "total_steps": 5360, "loss": 0.3338, "lr": 7.757846107094291e-08, "epoch": 9.776119402985074, "percentage": 97.76, "elapsed_time": "0:16:40", "remaining_time": "0:00:22", "throughput": 1489.29, "total_tokens": 1489456}
|
| 1068 |
+
{"current_steps": 5245, "total_steps": 5360, "loss": 0.3959, "lr": 7.130247232782216e-08, "epoch": 9.78544776119403, "percentage": 97.85, "elapsed_time": "0:16:40", "remaining_time": "0:00:21", "throughput": 1489.57, "total_tokens": 1491024}
|
| 1069 |
+
{"current_steps": 5250, "total_steps": 5360, "loss": 0.3863, "lr": 6.529080013661648e-08, "epoch": 9.794776119402986, "percentage": 97.95, "elapsed_time": "0:16:41", "remaining_time": "0:00:20", "throughput": 1489.74, "total_tokens": 1492432}
|
| 1070 |
+
{"current_steps": 5255, "total_steps": 5360, "loss": 0.3354, "lr": 5.954350823850208e-08, "epoch": 9.80410447761194, "percentage": 98.04, "elapsed_time": "0:16:42", "remaining_time": "0:00:20", "throughput": 1490.01, "total_tokens": 1494064}
|
| 1071 |
+
{"current_steps": 5260, "total_steps": 5360, "loss": 0.3973, "lr": 5.4060657571453064e-08, "epoch": 9.813432835820896, "percentage": 98.13, "elapsed_time": "0:16:43", "remaining_time": "0:00:19", "throughput": 1490.2, "total_tokens": 1495536}
|
| 1072 |
+
{"current_steps": 5265, "total_steps": 5360, "loss": 0.4862, "lr": 4.884230626960307e-08, "epoch": 9.822761194029852, "percentage": 98.23, "elapsed_time": "0:16:44", "remaining_time": "0:00:18", "throughput": 1490.28, "total_tokens": 1496848}
|
| 1073 |
+
{"current_steps": 5270, "total_steps": 5360, "loss": 0.3312, "lr": 4.388850966261793e-08, "epoch": 9.832089552238806, "percentage": 98.32, "elapsed_time": "0:16:45", "remaining_time": "0:00:17", "throughput": 1490.5, "total_tokens": 1498352}
|
| 1074 |
+
{"current_steps": 5275, "total_steps": 5360, "loss": 0.477, "lr": 3.919932027512674e-08, "epoch": 9.841417910447761, "percentage": 98.41, "elapsed_time": "0:16:46", "remaining_time": "0:00:16", "throughput": 1490.71, "total_tokens": 1499824}
|
| 1075 |
+
{"current_steps": 5280, "total_steps": 5360, "loss": 0.5223, "lr": 3.477478782614452e-08, "epoch": 9.850746268656717, "percentage": 98.51, "elapsed_time": "0:16:46", "remaining_time": "0:00:15", "throughput": 1490.94, "total_tokens": 1501328}
|
| 1076 |
+
{"current_steps": 5285, "total_steps": 5360, "loss": 0.4075, "lr": 3.061495922855873e-08, "epoch": 9.860074626865671, "percentage": 98.6, "elapsed_time": "0:16:47", "remaining_time": "0:00:14", "throughput": 1491.03, "total_tokens": 1502640}
|
| 1077 |
+
{"current_steps": 5290, "total_steps": 5360, "loss": 0.4224, "lr": 2.67198785886269e-08, "epoch": 9.869402985074627, "percentage": 98.69, "elapsed_time": "0:16:48", "remaining_time": "0:00:13", "throughput": 1491.28, "total_tokens": 1504144}
|
| 1078 |
+
{"current_steps": 5295, "total_steps": 5360, "loss": 0.531, "lr": 2.3089587205507578e-08, "epoch": 9.878731343283581, "percentage": 98.79, "elapsed_time": "0:16:49", "remaining_time": "0:00:12", "throughput": 1491.48, "total_tokens": 1505584}
|
| 1079 |
+
{"current_steps": 5300, "total_steps": 5360, "loss": 0.4155, "lr": 1.972412357083009e-08, "epoch": 9.888059701492537, "percentage": 98.88, "elapsed_time": "0:16:50", "remaining_time": "0:00:11", "throughput": 1491.8, "total_tokens": 1507184}
|
| 1080 |
+
{"current_steps": 5305, "total_steps": 5360, "loss": 0.4789, "lr": 1.662352336827544e-08, "epoch": 9.897388059701493, "percentage": 98.97, "elapsed_time": "0:16:51", "remaining_time": "0:00:10", "throughput": 1492.0, "total_tokens": 1508656}
|
| 1081 |
+
{"current_steps": 5310, "total_steps": 5360, "loss": 0.5689, "lr": 1.3787819473207176e-08, "epoch": 9.906716417910447, "percentage": 99.07, "elapsed_time": "0:16:52", "remaining_time": "0:00:09", "throughput": 1492.22, "total_tokens": 1510192}
|
| 1082 |
+
{"current_steps": 5315, "total_steps": 5360, "loss": 0.2944, "lr": 1.1217041952313323e-08, "epoch": 9.916044776119403, "percentage": 99.16, "elapsed_time": "0:16:52", "remaining_time": "0:00:08", "throughput": 1492.39, "total_tokens": 1511632}
|
| 1083 |
+
{"current_steps": 5320, "total_steps": 5360, "loss": 0.2992, "lr": 8.91121806330386e-09, "epoch": 9.925373134328359, "percentage": 99.25, "elapsed_time": "0:16:53", "remaining_time": "0:00:07", "throughput": 1492.52, "total_tokens": 1513008}
|
| 1084 |
+
{"current_steps": 5325, "total_steps": 5360, "loss": 0.3149, "lr": 6.870372254602631e-09, "epoch": 9.934701492537313, "percentage": 99.35, "elapsed_time": "0:16:54", "remaining_time": "0:00:06", "throughput": 1492.68, "total_tokens": 1514416}
|
| 1085 |
+
{"current_steps": 5330, "total_steps": 5360, "loss": 0.6028, "lr": 5.09452616509476e-09, "epoch": 9.944029850746269, "percentage": 99.44, "elapsed_time": "0:16:55", "remaining_time": "0:00:05", "throughput": 1492.87, "total_tokens": 1515856}
|
| 1086 |
+
{"current_steps": 5335, "total_steps": 5360, "loss": 0.4039, "lr": 3.58369862391017e-09, "epoch": 9.953358208955224, "percentage": 99.53, "elapsed_time": "0:16:56", "remaining_time": "0:00:04", "throughput": 1493.07, "total_tokens": 1517328}
|
| 1087 |
+
{"current_steps": 5340, "total_steps": 5360, "loss": 0.3266, "lr": 2.3379056502015327e-09, "epoch": 9.962686567164178, "percentage": 99.63, "elapsed_time": "0:16:57", "remaining_time": "0:00:03", "throughput": 1493.25, "total_tokens": 1518800}
|
| 1088 |
+
{"current_steps": 5345, "total_steps": 5360, "loss": 0.4888, "lr": 1.357160452988837e-09, "epoch": 9.972014925373134, "percentage": 99.72, "elapsed_time": "0:16:57", "remaining_time": "0:00:02", "throughput": 1493.4, "total_tokens": 1520208}
|
| 1089 |
+
{"current_steps": 5350, "total_steps": 5360, "loss": 0.319, "lr": 6.414734310233872e-10, "epoch": 9.98134328358209, "percentage": 99.81, "elapsed_time": "0:16:58", "remaining_time": "0:00:01", "throughput": 1493.54, "total_tokens": 1521616}
|
| 1090 |
+
{"current_steps": 5355, "total_steps": 5360, "loss": 0.4874, "lr": 1.9085217266290312e-10, "epoch": 9.990671641791044, "percentage": 99.91, "elapsed_time": "0:16:59", "remaining_time": "0:00:00", "throughput": 1493.66, "total_tokens": 1522992}
|
| 1091 |
+
{"current_steps": 5360, "total_steps": 5360, "loss": 0.2695, "lr": 5.3014557993558144e-12, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "0:17:00", "remaining_time": "0:00:00", "throughput": 1493.58, "total_tokens": 1524216}
|
| 1092 |
+
{"current_steps": 5360, "total_steps": 5360, "eval_loss": 0.6314803957939148, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "0:17:05", "remaining_time": "0:00:00", "throughput": 1486.91, "total_tokens": 1524216}
|
| 1093 |
+
{"current_steps": 5360, "total_steps": 5360, "epoch": 10.0, "percentage": 100.0, "elapsed_time": "0:17:06", "remaining_time": "0:00:00", "throughput": 1484.55, "total_tokens": 1524216}
|