Training in progress, step 7600
Browse files- model-00001-of-00004.safetensors +1 -1
- model-00002-of-00004.safetensors +1 -1
- model-00003-of-00004.safetensors +1 -1
- model-00004-of-00004.safetensors +1 -1
- trainer_log.jsonl +60 -0
- training_args.bin +1 -1
model-00001-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4902257696
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d0e50885aaed73a8dfc4ac1b06e08fba79fa5503477defc3e6eeee34f6d045b7
|
| 3 |
size 4902257696
|
model-00002-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4915960368
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0e205cb24f15ca99742001709518e9a613878a0cc3829f6056bc58713cb37c61
|
| 3 |
size 4915960368
|
model-00003-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4983068496
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:16e0bbb7eadc2c252853f9b37184b857d4577e550def9ec6b5bd8c3bfa42ff34
|
| 3 |
size 4983068496
|
model-00004-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1580230264
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:538ab5256ac540e508280020dc51511c1044696e75ee5557226078b50c1862bf
|
| 3 |
size 1580230264
|
trainer_log.jsonl
CHANGED
|
@@ -2182,3 +2182,63 @@
|
|
| 2182 |
{"current_steps": 7390, "total_steps": 8169, "loss": 0.1345, "lr": 1.1006613505585783e-06, "epoch": 6.332476435304199, "percentage": 90.46, "elapsed_time": "1 day, 23:29:39", "remaining_time": "5:00:23"}
|
| 2183 |
{"current_steps": 7395, "total_steps": 8169, "loss": 0.1336, "lr": 1.0867243311592079e-06, "epoch": 6.336760925449871, "percentage": 90.53, "elapsed_time": "1 day, 23:32:23", "remaining_time": "4:58:32"}
|
| 2184 |
{"current_steps": 7400, "total_steps": 8169, "loss": 0.1375, "lr": 1.072873648439936e-06, "epoch": 6.341045415595544, "percentage": 90.59, "elapsed_time": "1 day, 23:35:10", "remaining_time": "4:56:42"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2182 |
{"current_steps": 7390, "total_steps": 8169, "loss": 0.1345, "lr": 1.1006613505585783e-06, "epoch": 6.332476435304199, "percentage": 90.46, "elapsed_time": "1 day, 23:29:39", "remaining_time": "5:00:23"}
|
| 2183 |
{"current_steps": 7395, "total_steps": 8169, "loss": 0.1336, "lr": 1.0867243311592079e-06, "epoch": 6.336760925449871, "percentage": 90.53, "elapsed_time": "1 day, 23:32:23", "remaining_time": "4:58:32"}
|
| 2184 |
{"current_steps": 7400, "total_steps": 8169, "loss": 0.1375, "lr": 1.072873648439936e-06, "epoch": 6.341045415595544, "percentage": 90.59, "elapsed_time": "1 day, 23:35:10", "remaining_time": "4:56:42"}
|
| 2185 |
+
{"current_steps": 7405, "total_steps": 8169, "loss": 0.1205, "lr": 1.059109365627351e-06, "epoch": 6.345329905741217, "percentage": 90.65, "elapsed_time": "1 day, 23:39:16", "remaining_time": "4:55:00"}
|
| 2186 |
+
{"current_steps": 7410, "total_steps": 8169, "loss": 0.1379, "lr": 1.0454315455536436e-06, "epoch": 6.349614395886889, "percentage": 90.71, "elapsed_time": "1 day, 23:42:04", "remaining_time": "4:53:09"}
|
| 2187 |
+
{"current_steps": 7415, "total_steps": 8169, "loss": 0.1253, "lr": 1.0318402506563062e-06, "epoch": 6.353898886032562, "percentage": 90.77, "elapsed_time": "1 day, 23:45:04", "remaining_time": "4:51:20"}
|
| 2188 |
+
{"current_steps": 7420, "total_steps": 8169, "loss": 0.1406, "lr": 1.0183355429778595e-06, "epoch": 6.358183376178235, "percentage": 90.83, "elapsed_time": "1 day, 23:47:46", "remaining_time": "4:49:29"}
|
| 2189 |
+
{"current_steps": 7425, "total_steps": 8169, "loss": 0.1323, "lr": 1.0049174841655685e-06, "epoch": 6.362467866323907, "percentage": 90.89, "elapsed_time": "1 day, 23:50:35", "remaining_time": "4:47:38"}
|
| 2190 |
+
{"current_steps": 7430, "total_steps": 8169, "loss": 0.1147, "lr": 9.915861354711498e-07, "epoch": 6.36675235646958, "percentage": 90.95, "elapsed_time": "1 day, 23:53:20", "remaining_time": "4:45:47"}
|
| 2191 |
+
{"current_steps": 7435, "total_steps": 8169, "loss": 0.1457, "lr": 9.783415577505018e-07, "epoch": 6.371036846615253, "percentage": 91.01, "elapsed_time": "1 day, 23:56:12", "remaining_time": "4:43:56"}
|
| 2192 |
+
{"current_steps": 7405, "total_steps": 8169, "loss": 0.1332, "lr": 1.059109365627351e-06, "epoch": 6.345329905741217, "percentage": 90.65, "elapsed_time": "0:02:54", "remaining_time": "0:00:18"}
|
| 2193 |
+
{"current_steps": 7410, "total_steps": 8169, "loss": 0.1159, "lr": 1.0454315455536436e-06, "epoch": 6.349614395886889, "percentage": 90.71, "elapsed_time": "0:05:52", "remaining_time": "0:00:36"}
|
| 2194 |
+
{"current_steps": 7415, "total_steps": 8169, "loss": 0.1253, "lr": 1.0318402506563062e-06, "epoch": 6.353898886032562, "percentage": 90.77, "elapsed_time": "0:08:47", "remaining_time": "0:00:53"}
|
| 2195 |
+
{"current_steps": 7420, "total_steps": 8169, "loss": 0.1313, "lr": 1.0183355429778595e-06, "epoch": 6.358183376178235, "percentage": 90.83, "elapsed_time": "0:11:36", "remaining_time": "0:01:10"}
|
| 2196 |
+
{"current_steps": 7425, "total_steps": 8169, "loss": 0.1325, "lr": 1.0049174841655685e-06, "epoch": 6.362467866323907, "percentage": 90.89, "elapsed_time": "0:14:27", "remaining_time": "0:01:26"}
|
| 2197 |
+
{"current_steps": 7430, "total_steps": 8169, "loss": 0.1399, "lr": 9.915861354711498e-07, "epoch": 6.36675235646958, "percentage": 90.95, "elapsed_time": "0:17:17", "remaining_time": "0:01:43"}
|
| 2198 |
+
{"current_steps": 7435, "total_steps": 8169, "loss": 0.134, "lr": 9.783415577505018e-07, "epoch": 6.371036846615253, "percentage": 91.01, "elapsed_time": "0:20:12", "remaining_time": "0:01:59"}
|
| 2199 |
+
{"current_steps": 7440, "total_steps": 8169, "loss": 0.1249, "lr": 9.651838114634216e-07, "epoch": 6.375321336760925, "percentage": 91.08, "elapsed_time": "0:23:06", "remaining_time": "0:02:15"}
|
| 2200 |
+
{"current_steps": 7445, "total_steps": 8169, "loss": 0.117, "lr": 9.521129566733389e-07, "epoch": 6.379605826906598, "percentage": 91.14, "elapsed_time": "0:26:07", "remaining_time": "0:02:32"}
|
| 2201 |
+
{"current_steps": 7450, "total_steps": 8169, "loss": 0.1332, "lr": 9.391290530470277e-07, "epoch": 6.383890317052271, "percentage": 91.2, "elapsed_time": "0:29:00", "remaining_time": "0:02:47"}
|
| 2202 |
+
{"current_steps": 7455, "total_steps": 8169, "loss": 0.1269, "lr": 9.2623215985435e-07, "epoch": 6.388174807197943, "percentage": 91.26, "elapsed_time": "0:31:54", "remaining_time": "0:03:03"}
|
| 2203 |
+
{"current_steps": 7460, "total_steps": 8169, "loss": 0.1305, "lr": 9.134223359679683e-07, "epoch": 6.392459297343616, "percentage": 91.32, "elapsed_time": "0:34:54", "remaining_time": "0:03:19"}
|
| 2204 |
+
{"current_steps": 7465, "total_steps": 8169, "loss": 0.1558, "lr": 9.006996398630851e-07, "epoch": 6.396743787489289, "percentage": 91.38, "elapsed_time": "0:37:38", "remaining_time": "0:03:33"}
|
| 2205 |
+
{"current_steps": 7470, "total_steps": 8169, "loss": 0.1394, "lr": 8.88064129617181e-07, "epoch": 6.401028277634961, "percentage": 91.44, "elapsed_time": "0:40:27", "remaining_time": "0:03:47"}
|
| 2206 |
+
{"current_steps": 7475, "total_steps": 8169, "loss": 0.1281, "lr": 8.755158629097393e-07, "epoch": 6.405312767780634, "percentage": 91.5, "elapsed_time": "0:43:08", "remaining_time": "0:04:00"}
|
| 2207 |
+
{"current_steps": 7480, "total_steps": 8169, "loss": 0.1327, "lr": 8.630548970219888e-07, "epoch": 6.409597257926307, "percentage": 91.57, "elapsed_time": "0:45:55", "remaining_time": "0:04:13"}
|
| 2208 |
+
{"current_steps": 7485, "total_steps": 8169, "loss": 0.1291, "lr": 8.506812888366412e-07, "epoch": 6.413881748071979, "percentage": 91.63, "elapsed_time": "0:48:47", "remaining_time": "0:04:27"}
|
| 2209 |
+
{"current_steps": 7490, "total_steps": 8169, "loss": 0.1342, "lr": 8.383950948376385e-07, "epoch": 6.418166238217652, "percentage": 91.69, "elapsed_time": "0:51:38", "remaining_time": "0:04:40"}
|
| 2210 |
+
{"current_steps": 7495, "total_steps": 8169, "loss": 0.1163, "lr": 8.261963711098798e-07, "epoch": 6.422450728363325, "percentage": 91.75, "elapsed_time": "0:54:36", "remaining_time": "0:04:54"}
|
| 2211 |
+
{"current_steps": 7500, "total_steps": 8169, "loss": 0.1332, "lr": 8.140851733389743e-07, "epoch": 6.426735218508997, "percentage": 91.81, "elapsed_time": "0:57:21", "remaining_time": "0:05:07"}
|
| 2212 |
+
{"current_steps": 7505, "total_steps": 8169, "loss": 0.1304, "lr": 8.020615568109868e-07, "epoch": 6.43101970865467, "percentage": 91.87, "elapsed_time": "1:00:05", "remaining_time": "0:05:18"}
|
| 2213 |
+
{"current_steps": 7510, "total_steps": 8169, "loss": 0.1159, "lr": 7.901255764121862e-07, "epoch": 6.435304198800343, "percentage": 91.93, "elapsed_time": "1:02:54", "remaining_time": "0:05:31"}
|
| 2214 |
+
{"current_steps": 7515, "total_steps": 8169, "loss": 0.1312, "lr": 7.782772866287968e-07, "epoch": 6.439588688946015, "percentage": 91.99, "elapsed_time": "1:05:47", "remaining_time": "0:05:43"}
|
| 2215 |
+
{"current_steps": 7520, "total_steps": 8169, "loss": 0.1203, "lr": 7.66516741546739e-07, "epoch": 6.443873179091688, "percentage": 92.06, "elapsed_time": "1:08:40", "remaining_time": "0:05:55"}
|
| 2216 |
+
{"current_steps": 7525, "total_steps": 8169, "loss": 0.1268, "lr": 7.548439948514019e-07, "epoch": 6.448157669237361, "percentage": 92.12, "elapsed_time": "1:11:40", "remaining_time": "0:06:08"}
|
| 2217 |
+
{"current_steps": 7530, "total_steps": 8169, "loss": 0.1321, "lr": 7.432590998273714e-07, "epoch": 6.4524421593830334, "percentage": 92.18, "elapsed_time": "1:14:34", "remaining_time": "0:06:19"}
|
| 2218 |
+
{"current_steps": 7535, "total_steps": 8169, "loss": 0.1334, "lr": 7.317621093582117e-07, "epoch": 6.456726649528706, "percentage": 92.24, "elapsed_time": "1:17:28", "remaining_time": "0:06:31"}
|
| 2219 |
+
{"current_steps": 7540, "total_steps": 8169, "loss": 0.1234, "lr": 7.2035307592621e-07, "epoch": 6.461011139674379, "percentage": 92.3, "elapsed_time": "1:20:19", "remaining_time": "0:06:42"}
|
| 2220 |
+
{"current_steps": 7545, "total_steps": 8169, "loss": 0.1252, "lr": 7.090320516121418e-07, "epoch": 6.4652956298200515, "percentage": 92.36, "elapsed_time": "1:23:11", "remaining_time": "0:06:52"}
|
| 2221 |
+
{"current_steps": 7550, "total_steps": 8169, "loss": 0.1265, "lr": 6.977990880950348e-07, "epoch": 6.469580119965724, "percentage": 92.42, "elapsed_time": "1:25:49", "remaining_time": "0:07:02"}
|
| 2222 |
+
{"current_steps": 7555, "total_steps": 8169, "loss": 0.1306, "lr": 6.866542366519247e-07, "epoch": 6.473864610111397, "percentage": 92.48, "elapsed_time": "1:28:39", "remaining_time": "0:07:12"}
|
| 2223 |
+
{"current_steps": 7560, "total_steps": 8169, "loss": 0.1346, "lr": 6.755975481576338e-07, "epoch": 6.4781491002570695, "percentage": 92.54, "elapsed_time": "1:31:27", "remaining_time": "0:07:22"}
|
| 2224 |
+
{"current_steps": 7565, "total_steps": 8169, "loss": 0.1303, "lr": 6.646290730845285e-07, "epoch": 6.482433590402742, "percentage": 92.61, "elapsed_time": "1:34:22", "remaining_time": "0:07:32"}
|
| 2225 |
+
{"current_steps": 7570, "total_steps": 8169, "loss": 0.1324, "lr": 6.537488615022902e-07, "epoch": 6.486718080548415, "percentage": 92.67, "elapsed_time": "1:37:06", "remaining_time": "0:07:41"}
|
| 2226 |
+
{"current_steps": 7575, "total_steps": 8169, "loss": 0.1281, "lr": 6.429569630776899e-07, "epoch": 6.4910025706940875, "percentage": 92.73, "elapsed_time": "1:39:58", "remaining_time": "0:07:50"}
|
| 2227 |
+
{"current_steps": 7580, "total_steps": 8169, "loss": 0.1409, "lr": 6.322534270743653e-07, "epoch": 6.49528706083976, "percentage": 92.79, "elapsed_time": "1:42:48", "remaining_time": "0:07:59"}
|
| 2228 |
+
{"current_steps": 7585, "total_steps": 8169, "loss": 0.1331, "lr": 6.216383023525829e-07, "epoch": 6.499571550985433, "percentage": 92.85, "elapsed_time": "1:45:33", "remaining_time": "0:08:07"}
|
| 2229 |
+
{"current_steps": 7590, "total_steps": 8169, "loss": 0.1302, "lr": 6.111116373690262e-07, "epoch": 6.5038560411311055, "percentage": 92.91, "elapsed_time": "1:48:09", "remaining_time": "0:08:15"}
|
| 2230 |
+
{"current_steps": 7595, "total_steps": 8169, "loss": 0.1218, "lr": 6.006734801765746e-07, "epoch": 6.508140531276778, "percentage": 92.97, "elapsed_time": "1:51:09", "remaining_time": "0:08:24"}
|
| 2231 |
+
{"current_steps": 7600, "total_steps": 8169, "loss": 0.1373, "lr": 5.903238784240794e-07, "epoch": 6.512425021422451, "percentage": 93.03, "elapsed_time": "1:53:53", "remaining_time": "0:08:31"}
|
| 2232 |
+
{"current_steps": 7605, "total_steps": 8169, "loss": 0.1196, "lr": 5.800628793561447e-07, "epoch": 6.5167095115681235, "percentage": 93.1, "elapsed_time": "1:57:50", "remaining_time": "0:08:44"}
|
| 2233 |
+
{"current_steps": 7610, "total_steps": 8169, "loss": 0.122, "lr": 5.698905298129154e-07, "epoch": 6.520994001713796, "percentage": 93.16, "elapsed_time": "2:00:39", "remaining_time": "0:08:51"}
|
| 2234 |
+
{"current_steps": 7615, "total_steps": 8169, "loss": 0.1366, "lr": 5.598068762298647e-07, "epoch": 6.525278491859469, "percentage": 93.22, "elapsed_time": "2:03:22", "remaining_time": "0:08:58"}
|
| 2235 |
+
{"current_steps": 7620, "total_steps": 8169, "loss": 0.1385, "lr": 5.49811964637581e-07, "epoch": 6.5295629820051415, "percentage": 93.28, "elapsed_time": "2:06:03", "remaining_time": "0:09:04"}
|
| 2236 |
+
{"current_steps": 7625, "total_steps": 8169, "loss": 0.1364, "lr": 5.399058406615498e-07, "epoch": 6.533847472150814, "percentage": 93.34, "elapsed_time": "2:08:47", "remaining_time": "0:09:11"}
|
| 2237 |
+
{"current_steps": 7630, "total_steps": 8169, "loss": 0.1172, "lr": 5.300885495219654e-07, "epoch": 6.538131962296487, "percentage": 93.4, "elapsed_time": "2:11:36", "remaining_time": "0:09:17"}
|
| 2238 |
+
{"current_steps": 7635, "total_steps": 8169, "loss": 0.1524, "lr": 5.203601360334998e-07, "epoch": 6.5424164524421595, "percentage": 93.46, "elapsed_time": "2:14:20", "remaining_time": "0:09:23"}
|
| 2239 |
+
{"current_steps": 7640, "total_steps": 8169, "loss": 0.1387, "lr": 5.107206446051138e-07, "epoch": 6.546700942587832, "percentage": 93.52, "elapsed_time": "2:17:11", "remaining_time": "0:09:29"}
|
| 2240 |
+
{"current_steps": 7645, "total_steps": 8169, "loss": 0.1278, "lr": 5.01170119239851e-07, "epoch": 6.550985432733505, "percentage": 93.59, "elapsed_time": "2:20:05", "remaining_time": "0:09:36"}
|
| 2241 |
+
{"current_steps": 7650, "total_steps": 8169, "loss": 0.1261, "lr": 4.917086035346374e-07, "epoch": 6.5552699228791775, "percentage": 93.65, "elapsed_time": "2:22:55", "remaining_time": "0:09:41"}
|
| 2242 |
+
{"current_steps": 7655, "total_steps": 8169, "loss": 0.1288, "lr": 4.823361406800775e-07, "epoch": 6.55955441302485, "percentage": 93.71, "elapsed_time": "2:25:45", "remaining_time": "0:09:47"}
|
| 2243 |
+
{"current_steps": 7660, "total_steps": 8169, "loss": 0.1205, "lr": 4.7305277346026523e-07, "epoch": 6.563838903170523, "percentage": 93.77, "elapsed_time": "2:28:36", "remaining_time": "0:09:52"}
|
| 2244 |
+
{"current_steps": 7665, "total_steps": 8169, "loss": 0.1328, "lr": 4.6385854425258225e-07, "epoch": 6.5681233933161955, "percentage": 93.83, "elapsed_time": "2:31:24", "remaining_time": "0:09:57"}
|
training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 8657
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8206f94d6f67781788a52ab2bb2cbb83583e35512bad021d03fbf94e9bfe829c
|
| 3 |
size 8657
|