Training in progress, epoch 1
Browse files
model-00001-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4877660776
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5fb93b2d60f815ceb62cd5006844138788a0a0e3c074ca8391dd94ddcdaf730c
|
| 3 |
size 4877660776
|
model-00002-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4932751008
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:61c71d3142b1620c9dcbb8fd84c9768a16cfb760509c8e0a951b662fcd73cf77
|
| 3 |
size 4932751008
|
model-00003-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4330865200
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9d78d885f188fcf24c2ac142a7bd264134a1a709bcd12c011bf3ccd69f526f89
|
| 3 |
size 4330865200
|
model-00004-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1089994880
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:dd3ac08dd319f49242740f96a6b6fa3a478ffb335c72eddd28865ce8861f11b6
|
| 3 |
size 1089994880
|
trainer_log.jsonl
CHANGED
|
@@ -115,3 +115,74 @@
|
|
| 115 |
{"current_steps": 115, "total_steps": 216, "loss": 0.2917, "lr": 5.323649091872179e-06, "epoch": 1.5898617511520738, "percentage": 53.24, "elapsed_time": "0:07:36", "remaining_time": "0:06:40"}
|
| 116 |
{"current_steps": 116, "total_steps": 216, "loss": 0.2357, "lr": 5.242811110572243e-06, "epoch": 1.6036866359447006, "percentage": 53.7, "elapsed_time": "0:07:39", "remaining_time": "0:06:36"}
|
| 117 |
{"current_steps": 117, "total_steps": 216, "loss": 0.2259, "lr": 5.161909456266781e-06, "epoch": 1.6175115207373272, "percentage": 54.17, "elapsed_time": "0:07:43", "remaining_time": "0:06:31"}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
{"current_steps": 115, "total_steps": 216, "loss": 0.2917, "lr": 5.323649091872179e-06, "epoch": 1.5898617511520738, "percentage": 53.24, "elapsed_time": "0:07:36", "remaining_time": "0:06:40"}
|
| 116 |
{"current_steps": 116, "total_steps": 216, "loss": 0.2357, "lr": 5.242811110572243e-06, "epoch": 1.6036866359447006, "percentage": 53.7, "elapsed_time": "0:07:39", "remaining_time": "0:06:36"}
|
| 117 |
{"current_steps": 117, "total_steps": 216, "loss": 0.2259, "lr": 5.161909456266781e-06, "epoch": 1.6175115207373272, "percentage": 54.17, "elapsed_time": "0:07:43", "remaining_time": "0:06:31"}
|
| 118 |
+
{"current_steps": 118, "total_steps": 216, "loss": 0.2918, "lr": 5.080965344012509e-06, "epoch": 1.631336405529954, "percentage": 54.63, "elapsed_time": "0:07:46", "remaining_time": "0:06:27"}
|
| 119 |
+
{"current_steps": 119, "total_steps": 216, "loss": 0.2057, "lr": 5e-06, "epoch": 1.6451612903225805, "percentage": 55.09, "elapsed_time": "0:07:49", "remaining_time": "0:06:23"}
|
| 120 |
+
{"current_steps": 120, "total_steps": 216, "loss": 0.2469, "lr": 4.919034655987493e-06, "epoch": 1.6589861751152073, "percentage": 55.56, "elapsed_time": "0:07:53", "remaining_time": "0:06:18"}
|
| 121 |
+
{"current_steps": 121, "total_steps": 216, "loss": 0.2977, "lr": 4.838090543733222e-06, "epoch": 1.672811059907834, "percentage": 56.02, "elapsed_time": "0:07:56", "remaining_time": "0:06:14"}
|
| 122 |
+
{"current_steps": 122, "total_steps": 216, "loss": 0.2664, "lr": 4.757188889427761e-06, "epoch": 1.6866359447004609, "percentage": 56.48, "elapsed_time": "0:08:00", "remaining_time": "0:06:10"}
|
| 123 |
+
{"current_steps": 123, "total_steps": 216, "loss": 0.2938, "lr": 4.6763509081278215e-06, "epoch": 1.7004608294930876, "percentage": 56.94, "elapsed_time": "0:08:03", "remaining_time": "0:06:05"}
|
| 124 |
+
{"current_steps": 124, "total_steps": 216, "loss": 0.2918, "lr": 4.59559779819298e-06, "epoch": 1.7142857142857144, "percentage": 57.41, "elapsed_time": "0:08:07", "remaining_time": "0:06:01"}
|
| 125 |
+
{"current_steps": 125, "total_steps": 216, "loss": 0.2593, "lr": 4.51495073572676e-06, "epoch": 1.728110599078341, "percentage": 57.87, "elapsed_time": "0:08:10", "remaining_time": "0:05:57"}
|
| 126 |
+
{"current_steps": 126, "total_steps": 216, "loss": 0.2565, "lr": 4.434430869023579e-06, "epoch": 1.7419354838709677, "percentage": 58.33, "elapsed_time": "0:08:13", "remaining_time": "0:05:52"}
|
| 127 |
+
{"current_steps": 127, "total_steps": 216, "loss": 0.2542, "lr": 4.3540593130229695e-06, "epoch": 1.7557603686635943, "percentage": 58.8, "elapsed_time": "0:08:17", "remaining_time": "0:05:48"}
|
| 128 |
+
{"current_steps": 128, "total_steps": 216, "loss": 0.2809, "lr": 4.27385714377255e-06, "epoch": 1.769585253456221, "percentage": 59.26, "elapsed_time": "0:08:20", "remaining_time": "0:05:44"}
|
| 129 |
+
{"current_steps": 129, "total_steps": 216, "loss": 0.2656, "lr": 4.1938453929012014e-06, "epoch": 1.7834101382488479, "percentage": 59.72, "elapsed_time": "0:08:24", "remaining_time": "0:05:40"}
|
| 130 |
+
{"current_steps": 130, "total_steps": 216, "loss": 0.2602, "lr": 4.1140450421038865e-06, "epoch": 1.7972350230414746, "percentage": 60.19, "elapsed_time": "0:08:27", "remaining_time": "0:05:35"}
|
| 131 |
+
{"current_steps": 131, "total_steps": 216, "loss": 0.2708, "lr": 4.034477017639561e-06, "epoch": 1.8110599078341014, "percentage": 60.65, "elapsed_time": "0:08:31", "remaining_time": "0:05:31"}
|
| 132 |
+
{"current_steps": 132, "total_steps": 216, "loss": 0.2728, "lr": 3.955162184843625e-06, "epoch": 1.8248847926267282, "percentage": 61.11, "elapsed_time": "0:08:34", "remaining_time": "0:05:27"}
|
| 133 |
+
{"current_steps": 133, "total_steps": 216, "loss": 0.2763, "lr": 3.8761213426563546e-06, "epoch": 1.838709677419355, "percentage": 61.57, "elapsed_time": "0:08:37", "remaining_time": "0:05:23"}
|
| 134 |
+
{"current_steps": 134, "total_steps": 216, "loss": 0.2574, "lr": 3.7973752181687336e-06, "epoch": 1.8525345622119815, "percentage": 62.04, "elapsed_time": "0:08:41", "remaining_time": "0:05:18"}
|
| 135 |
+
{"current_steps": 135, "total_steps": 216, "loss": 0.2768, "lr": 3.7189444611871383e-06, "epoch": 1.8663594470046083, "percentage": 62.5, "elapsed_time": "0:08:44", "remaining_time": "0:05:14"}
|
| 136 |
+
{"current_steps": 136, "total_steps": 216, "loss": 0.3172, "lr": 3.6408496388182857e-06, "epoch": 1.8801843317972349, "percentage": 62.96, "elapsed_time": "0:08:48", "remaining_time": "0:05:10"}
|
| 137 |
+
{"current_steps": 137, "total_steps": 216, "loss": 0.2375, "lr": 3.5631112300758595e-06, "epoch": 1.8940092165898617, "percentage": 63.43, "elapsed_time": "0:08:51", "remaining_time": "0:05:06"}
|
| 138 |
+
{"current_steps": 138, "total_steps": 216, "loss": 0.2889, "lr": 3.4857496205102475e-06, "epoch": 1.9078341013824884, "percentage": 63.89, "elapsed_time": "0:08:54", "remaining_time": "0:05:02"}
|
| 139 |
+
{"current_steps": 139, "total_steps": 216, "loss": 0.2668, "lr": 3.4087850968627823e-06, "epoch": 1.9216589861751152, "percentage": 64.35, "elapsed_time": "0:08:58", "remaining_time": "0:04:58"}
|
| 140 |
+
{"current_steps": 140, "total_steps": 216, "loss": 0.2675, "lr": 3.3322378417458985e-06, "epoch": 1.935483870967742, "percentage": 64.81, "elapsed_time": "0:09:01", "remaining_time": "0:04:54"}
|
| 141 |
+
{"current_steps": 141, "total_steps": 216, "loss": 0.2711, "lr": 3.2561279283505888e-06, "epoch": 1.9493087557603688, "percentage": 65.28, "elapsed_time": "0:09:05", "remaining_time": "0:04:49"}
|
| 142 |
+
{"current_steps": 142, "total_steps": 216, "loss": 0.2665, "lr": 3.180475315182563e-06, "epoch": 1.9631336405529956, "percentage": 65.74, "elapsed_time": "0:09:08", "remaining_time": "0:04:45"}
|
| 143 |
+
{"current_steps": 143, "total_steps": 216, "loss": 0.2708, "lr": 3.1052998408284664e-06, "epoch": 1.976958525345622, "percentage": 66.2, "elapsed_time": "0:09:12", "remaining_time": "0:04:41"}
|
| 144 |
+
{"current_steps": 144, "total_steps": 216, "loss": 0.2798, "lr": 3.0306212187535653e-06, "epoch": 1.9907834101382489, "percentage": 66.67, "elapsed_time": "0:09:15", "remaining_time": "0:04:37"}
|
| 145 |
+
{"current_steps": 145, "total_steps": 216, "loss": 0.3732, "lr": 2.9564590321322206e-06, "epoch": 2.0046082949308754, "percentage": 67.13, "elapsed_time": "0:10:34", "remaining_time": "0:05:10"}
|
| 146 |
+
{"current_steps": 146, "total_steps": 216, "loss": 0.2203, "lr": 2.882832728712551e-06, "epoch": 2.0184331797235022, "percentage": 67.59, "elapsed_time": "0:10:38", "remaining_time": "0:05:05"}
|
| 147 |
+
{"current_steps": 147, "total_steps": 216, "loss": 0.196, "lr": 2.8097616157165886e-06, "epoch": 2.032258064516129, "percentage": 68.06, "elapsed_time": "0:10:41", "remaining_time": "0:05:01"}
|
| 148 |
+
{"current_steps": 148, "total_steps": 216, "loss": 0.2146, "lr": 2.7372648547773063e-06, "epoch": 2.046082949308756, "percentage": 68.52, "elapsed_time": "0:10:45", "remaining_time": "0:04:56"}
|
| 149 |
+
{"current_steps": 149, "total_steps": 216, "loss": 0.2404, "lr": 2.665361456913797e-06, "epoch": 2.0599078341013826, "percentage": 68.98, "elapsed_time": "0:10:48", "remaining_time": "0:04:51"}
|
| 150 |
+
{"current_steps": 150, "total_steps": 216, "loss": 0.2051, "lr": 2.594070277545975e-06, "epoch": 2.0737327188940093, "percentage": 69.44, "elapsed_time": "0:10:51", "remaining_time": "0:04:46"}
|
| 151 |
+
{"current_steps": 151, "total_steps": 216, "loss": 0.2648, "lr": 2.5234100115500643e-06, "epoch": 2.087557603686636, "percentage": 69.91, "elapsed_time": "0:10:55", "remaining_time": "0:04:42"}
|
| 152 |
+
{"current_steps": 152, "total_steps": 216, "loss": 0.2245, "lr": 2.4533991883561868e-06, "epoch": 2.1013824884792625, "percentage": 70.37, "elapsed_time": "0:10:58", "remaining_time": "0:04:37"}
|
| 153 |
+
{"current_steps": 153, "total_steps": 216, "loss": 0.2162, "lr": 2.38405616708935e-06, "epoch": 2.1152073732718892, "percentage": 70.83, "elapsed_time": "0:11:02", "remaining_time": "0:04:32"}
|
| 154 |
+
{"current_steps": 154, "total_steps": 216, "loss": 0.3197, "lr": 2.315399131755081e-06, "epoch": 2.129032258064516, "percentage": 71.3, "elapsed_time": "0:11:05", "remaining_time": "0:04:28"}
|
| 155 |
+
{"current_steps": 155, "total_steps": 216, "loss": 0.2537, "lr": 2.2474460864709825e-06, "epoch": 2.142857142857143, "percentage": 71.76, "elapsed_time": "0:11:09", "remaining_time": "0:04:23"}
|
| 156 |
+
{"current_steps": 156, "total_steps": 216, "loss": 0.245, "lr": 2.1802148507454675e-06, "epoch": 2.1566820276497696, "percentage": 72.22, "elapsed_time": "0:11:12", "remaining_time": "0:04:18"}
|
| 157 |
+
{"current_steps": 157, "total_steps": 216, "loss": 0.2074, "lr": 2.1137230548049042e-06, "epoch": 2.1705069124423964, "percentage": 72.69, "elapsed_time": "0:11:16", "remaining_time": "0:04:14"}
|
| 158 |
+
{"current_steps": 158, "total_steps": 216, "loss": 0.1891, "lr": 2.0479881349703885e-06, "epoch": 2.184331797235023, "percentage": 73.15, "elapsed_time": "0:11:19", "remaining_time": "0:04:09"}
|
| 159 |
+
{"current_steps": 159, "total_steps": 216, "loss": 0.2141, "lr": 1.983027329085377e-06, "epoch": 2.19815668202765, "percentage": 73.61, "elapsed_time": "0:11:23", "remaining_time": "0:04:05"}
|
| 160 |
+
{"current_steps": 160, "total_steps": 216, "loss": 0.2395, "lr": 1.9188576719953635e-06, "epoch": 2.2119815668202767, "percentage": 74.07, "elapsed_time": "0:11:26", "remaining_time": "0:04:00"}
|
| 161 |
+
{"current_steps": 161, "total_steps": 216, "loss": 0.2325, "lr": 1.8554959910807773e-06, "epoch": 2.225806451612903, "percentage": 74.54, "elapsed_time": "0:11:30", "remaining_time": "0:03:55"}
|
| 162 |
+
{"current_steps": 162, "total_steps": 216, "loss": 0.2189, "lr": 1.7929589018443016e-06, "epoch": 2.23963133640553, "percentage": 75.0, "elapsed_time": "0:11:33", "remaining_time": "0:03:51"}
|
| 163 |
+
{"current_steps": 163, "total_steps": 216, "loss": 0.1941, "lr": 1.7312628035537388e-06, "epoch": 2.2534562211981566, "percentage": 75.46, "elapsed_time": "0:11:37", "remaining_time": "0:03:46"}
|
| 164 |
+
{"current_steps": 164, "total_steps": 216, "loss": 0.2274, "lr": 1.6704238749415958e-06, "epoch": 2.2672811059907834, "percentage": 75.93, "elapsed_time": "0:11:40", "remaining_time": "0:03:42"}
|
| 165 |
+
{"current_steps": 165, "total_steps": 216, "loss": 0.2582, "lr": 1.6104580699624839e-06, "epoch": 2.28110599078341, "percentage": 76.39, "elapsed_time": "0:11:44", "remaining_time": "0:03:37"}
|
| 166 |
+
{"current_steps": 166, "total_steps": 216, "loss": 0.2518, "lr": 1.5513811136094786e-06, "epoch": 2.294930875576037, "percentage": 76.85, "elapsed_time": "0:11:47", "remaining_time": "0:03:33"}
|
| 167 |
+
{"current_steps": 167, "total_steps": 216, "loss": 0.1982, "lr": 1.4932084977905043e-06, "epoch": 2.3087557603686637, "percentage": 77.31, "elapsed_time": "0:11:51", "remaining_time": "0:03:28"}
|
| 168 |
+
{"current_steps": 168, "total_steps": 216, "loss": 0.2607, "lr": 1.4359554772658551e-06, "epoch": 2.3225806451612905, "percentage": 77.78, "elapsed_time": "0:11:54", "remaining_time": "0:03:24"}
|
| 169 |
+
{"current_steps": 169, "total_steps": 216, "loss": 0.2072, "lr": 1.3796370656478936e-06, "epoch": 2.3364055299539173, "percentage": 78.24, "elapsed_time": "0:11:58", "remaining_time": "0:03:19"}
|
| 170 |
+
{"current_steps": 170, "total_steps": 216, "loss": 0.2143, "lr": 1.3242680314639995e-06, "epoch": 2.3502304147465436, "percentage": 78.7, "elapsed_time": "0:12:01", "remaining_time": "0:03:15"}
|
| 171 |
+
{"current_steps": 171, "total_steps": 216, "loss": 0.1718, "lr": 1.2698628942837698e-06, "epoch": 2.3640552995391704, "percentage": 79.17, "elapsed_time": "0:12:05", "remaining_time": "0:03:10"}
|
| 172 |
+
{"current_steps": 172, "total_steps": 216, "loss": 0.1923, "lr": 1.2164359209115235e-06, "epoch": 2.377880184331797, "percentage": 79.63, "elapsed_time": "0:12:08", "remaining_time": "0:03:06"}
|
| 173 |
+
{"current_steps": 173, "total_steps": 216, "loss": 0.2524, "lr": 1.164001121645069e-06, "epoch": 2.391705069124424, "percentage": 80.09, "elapsed_time": "0:12:11", "remaining_time": "0:03:01"}
|
| 174 |
+
{"current_steps": 174, "total_steps": 216, "loss": 0.2125, "lr": 1.1125722466017547e-06, "epoch": 2.4055299539170507, "percentage": 80.56, "elapsed_time": "0:12:15", "remaining_time": "0:02:57"}
|
| 175 |
+
{"current_steps": 175, "total_steps": 216, "loss": 0.2456, "lr": 1.062162782112729e-06, "epoch": 2.4193548387096775, "percentage": 81.02, "elapsed_time": "0:12:18", "remaining_time": "0:02:53"}
|
| 176 |
+
{"current_steps": 176, "total_steps": 216, "loss": 0.1834, "lr": 1.012785947186397e-06, "epoch": 2.4331797235023043, "percentage": 81.48, "elapsed_time": "0:12:22", "remaining_time": "0:02:48"}
|
| 177 |
+
{"current_steps": 177, "total_steps": 216, "loss": 0.2056, "lr": 9.644546900419533e-07, "epoch": 2.447004608294931, "percentage": 81.94, "elapsed_time": "0:12:25", "remaining_time": "0:02:44"}
|
| 178 |
+
{"current_steps": 178, "total_steps": 216, "loss": 0.2044, "lr": 9.171816847139447e-07, "epoch": 2.460829493087558, "percentage": 82.41, "elapsed_time": "0:12:29", "remaining_time": "0:02:39"}
|
| 179 |
+
{"current_steps": 179, "total_steps": 216, "loss": 0.2334, "lr": 8.709793277287182e-07, "epoch": 2.474654377880184, "percentage": 82.87, "elapsed_time": "0:12:32", "remaining_time": "0:02:35"}
|
| 180 |
+
{"current_steps": 180, "total_steps": 216, "loss": 0.1885, "lr": 8.258597348536452e-07, "epoch": 2.488479262672811, "percentage": 83.33, "elapsed_time": "0:12:36", "remaining_time": "0:02:31"}
|
| 181 |
+
{"current_steps": 181, "total_steps": 216, "loss": 0.2601, "lr": 7.818347379199781e-07, "epoch": 2.5023041474654377, "percentage": 83.8, "elapsed_time": "0:12:39", "remaining_time": "0:02:26"}
|
| 182 |
+
{"current_steps": 182, "total_steps": 216, "loss": 0.1937, "lr": 7.389158817201541e-07, "epoch": 2.5161290322580645, "percentage": 84.26, "elapsed_time": "0:12:43", "remaining_time": "0:02:22"}
|
| 183 |
+
{"current_steps": 183, "total_steps": 216, "loss": 0.2487, "lr": 6.971144209803738e-07, "epoch": 2.5299539170506913, "percentage": 84.72, "elapsed_time": "0:12:46", "remaining_time": "0:02:18"}
|
| 184 |
+
{"current_steps": 184, "total_steps": 216, "loss": 0.2134, "lr": 6.564413174092443e-07, "epoch": 2.543778801843318, "percentage": 85.19, "elapsed_time": "0:12:50", "remaining_time": "0:02:13"}
|
| 185 |
+
{"current_steps": 185, "total_steps": 216, "loss": 0.2115, "lr": 6.16907236823262e-07, "epoch": 2.557603686635945, "percentage": 85.65, "elapsed_time": "0:12:53", "remaining_time": "0:02:09"}
|
| 186 |
+
{"current_steps": 186, "total_steps": 216, "loss": 0.2161, "lr": 5.785225463498828e-07, "epoch": 2.571428571428571, "percentage": 86.11, "elapsed_time": "0:12:57", "remaining_time": "0:02:05"}
|
| 187 |
+
{"current_steps": 187, "total_steps": 216, "loss": 0.249, "lr": 5.412973117089288e-07, "epoch": 2.5852534562211984, "percentage": 86.57, "elapsed_time": "0:13:00", "remaining_time": "0:02:01"}
|
| 188 |
+
{"current_steps": 188, "total_steps": 216, "loss": 0.1794, "lr": 5.05241294573024e-07, "epoch": 2.5990783410138247, "percentage": 87.04, "elapsed_time": "0:13:03", "remaining_time": "0:01:56"}
|