Training in progress, epoch 0
Browse files- model-00001-of-00004.safetensors +1 -1
- model-00002-of-00004.safetensors +1 -1
- model-00003-of-00004.safetensors +1 -1
- model-00004-of-00004.safetensors +1 -1
- trainer_log.jsonl +71 -31
- training_args.bin +1 -1
model-00001-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4877660776
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bb3140d8c8a0a0b7ba66165ae2b2e9e84e56da05ec80bda2f7ca5910a53e437e
|
| 3 |
size 4877660776
|
model-00002-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4932751008
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b4962144a9d20e16f299d981e28b0df1dfe1591270d5b298bf8c55424fd4cd3d
|
| 3 |
size 4932751008
|
model-00003-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4330865200
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b2612ec448f3edf6453778bf4652b6f5f50cb187d2d3a85bd44ff26995a1f456
|
| 3 |
size 4330865200
|
model-00004-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1089994880
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3e2c6b9ac7acec7f941982958e4efec6914995b6b691fb0e146e1a4b6f383d22
|
| 3 |
size 1089994880
|
trainer_log.jsonl
CHANGED
|
@@ -1,31 +1,71 @@
|
|
| 1 |
-
{"current_steps": 1, "total_steps":
|
| 2 |
-
{"current_steps": 2, "total_steps":
|
| 3 |
-
{"current_steps": 3, "total_steps":
|
| 4 |
-
{"current_steps": 4, "total_steps":
|
| 5 |
-
{"current_steps": 5, "total_steps":
|
| 6 |
-
{"current_steps": 6, "total_steps":
|
| 7 |
-
{"current_steps": 7, "total_steps":
|
| 8 |
-
{"current_steps": 8, "total_steps":
|
| 9 |
-
{"current_steps": 9, "total_steps":
|
| 10 |
-
{"current_steps": 10, "total_steps":
|
| 11 |
-
{"current_steps": 11, "total_steps":
|
| 12 |
-
{"current_steps": 12, "total_steps":
|
| 13 |
-
{"current_steps": 13, "total_steps":
|
| 14 |
-
{"current_steps": 14, "total_steps":
|
| 15 |
-
{"current_steps": 15, "total_steps":
|
| 16 |
-
{"current_steps": 16, "total_steps":
|
| 17 |
-
{"current_steps": 17, "total_steps":
|
| 18 |
-
{"current_steps": 18, "total_steps":
|
| 19 |
-
{"current_steps": 19, "total_steps":
|
| 20 |
-
{"current_steps": 20, "total_steps":
|
| 21 |
-
{"current_steps": 21, "total_steps":
|
| 22 |
-
{"current_steps": 22, "total_steps":
|
| 23 |
-
{"current_steps": 23, "total_steps":
|
| 24 |
-
{"current_steps": 24, "total_steps":
|
| 25 |
-
{"current_steps": 25, "total_steps":
|
| 26 |
-
{"current_steps": 26, "total_steps":
|
| 27 |
-
{"current_steps": 27, "total_steps":
|
| 28 |
-
{"current_steps": 28, "total_steps":
|
| 29 |
-
{"current_steps": 29, "total_steps":
|
| 30 |
-
{"current_steps": 30, "total_steps":
|
| 31 |
-
{"current_steps":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"current_steps": 1, "total_steps": 156, "loss": 1.3429, "lr": 6.25e-07, "epoch": 0.01910828025477707, "percentage": 0.64, "elapsed_time": "0:00:12", "remaining_time": "0:32:17"}
|
| 2 |
+
{"current_steps": 2, "total_steps": 156, "loss": 1.3616, "lr": 1.25e-06, "epoch": 0.03821656050955414, "percentage": 1.28, "elapsed_time": "0:00:20", "remaining_time": "0:25:56"}
|
| 3 |
+
{"current_steps": 3, "total_steps": 156, "loss": 1.296, "lr": 1.8750000000000003e-06, "epoch": 0.05732484076433121, "percentage": 1.92, "elapsed_time": "0:00:32", "remaining_time": "0:27:23"}
|
| 4 |
+
{"current_steps": 4, "total_steps": 156, "loss": 1.3065, "lr": 2.5e-06, "epoch": 0.07643312101910828, "percentage": 2.56, "elapsed_time": "0:00:44", "remaining_time": "0:28:13"}
|
| 5 |
+
{"current_steps": 5, "total_steps": 156, "loss": 1.3084, "lr": 3.125e-06, "epoch": 0.09554140127388536, "percentage": 3.21, "elapsed_time": "0:00:55", "remaining_time": "0:27:44"}
|
| 6 |
+
{"current_steps": 6, "total_steps": 156, "loss": 1.2615, "lr": 3.7500000000000005e-06, "epoch": 0.11464968152866242, "percentage": 3.85, "elapsed_time": "0:01:02", "remaining_time": "0:26:04"}
|
| 7 |
+
{"current_steps": 7, "total_steps": 156, "loss": 1.1771, "lr": 4.3750000000000005e-06, "epoch": 0.1337579617834395, "percentage": 4.49, "elapsed_time": "0:01:12", "remaining_time": "0:25:43"}
|
| 8 |
+
{"current_steps": 8, "total_steps": 156, "loss": 1.2099, "lr": 5e-06, "epoch": 0.15286624203821655, "percentage": 5.13, "elapsed_time": "0:01:20", "remaining_time": "0:24:43"}
|
| 9 |
+
{"current_steps": 9, "total_steps": 156, "loss": 1.1225, "lr": 5.625e-06, "epoch": 0.17197452229299362, "percentage": 5.77, "elapsed_time": "0:01:32", "remaining_time": "0:25:14"}
|
| 10 |
+
{"current_steps": 10, "total_steps": 156, "loss": 1.0774, "lr": 6.25e-06, "epoch": 0.1910828025477707, "percentage": 6.41, "elapsed_time": "0:01:41", "remaining_time": "0:24:46"}
|
| 11 |
+
{"current_steps": 11, "total_steps": 156, "loss": 1.0759, "lr": 6.875e-06, "epoch": 0.21019108280254778, "percentage": 7.05, "elapsed_time": "0:01:50", "remaining_time": "0:24:18"}
|
| 12 |
+
{"current_steps": 12, "total_steps": 156, "loss": 1.0191, "lr": 7.500000000000001e-06, "epoch": 0.22929936305732485, "percentage": 7.69, "elapsed_time": "0:02:01", "remaining_time": "0:24:22"}
|
| 13 |
+
{"current_steps": 13, "total_steps": 156, "loss": 1.0475, "lr": 8.125000000000001e-06, "epoch": 0.2484076433121019, "percentage": 8.33, "elapsed_time": "0:02:12", "remaining_time": "0:24:18"}
|
| 14 |
+
{"current_steps": 14, "total_steps": 156, "loss": 1.0031, "lr": 8.750000000000001e-06, "epoch": 0.267515923566879, "percentage": 8.97, "elapsed_time": "0:02:24", "remaining_time": "0:24:28"}
|
| 15 |
+
{"current_steps": 15, "total_steps": 156, "loss": 0.9644, "lr": 9.375000000000001e-06, "epoch": 0.28662420382165604, "percentage": 9.62, "elapsed_time": "0:02:36", "remaining_time": "0:24:29"}
|
| 16 |
+
{"current_steps": 16, "total_steps": 156, "loss": 0.958, "lr": 1e-05, "epoch": 0.3057324840764331, "percentage": 10.26, "elapsed_time": "0:02:49", "remaining_time": "0:24:41"}
|
| 17 |
+
{"current_steps": 17, "total_steps": 156, "loss": 0.9513, "lr": 9.998741174712534e-06, "epoch": 0.3248407643312102, "percentage": 10.9, "elapsed_time": "0:02:58", "remaining_time": "0:24:20"}
|
| 18 |
+
{"current_steps": 18, "total_steps": 156, "loss": 0.9529, "lr": 9.994965332706574e-06, "epoch": 0.34394904458598724, "percentage": 11.54, "elapsed_time": "0:03:06", "remaining_time": "0:23:46"}
|
| 19 |
+
{"current_steps": 19, "total_steps": 156, "loss": 0.9513, "lr": 9.98867437523228e-06, "epoch": 0.3630573248407643, "percentage": 12.18, "elapsed_time": "0:03:20", "remaining_time": "0:24:02"}
|
| 20 |
+
{"current_steps": 20, "total_steps": 156, "loss": 0.9268, "lr": 9.979871469976197e-06, "epoch": 0.3821656050955414, "percentage": 12.82, "elapsed_time": "0:03:28", "remaining_time": "0:23:37"}
|
| 21 |
+
{"current_steps": 21, "total_steps": 156, "loss": 0.9387, "lr": 9.968561049466214e-06, "epoch": 0.4012738853503185, "percentage": 13.46, "elapsed_time": "0:03:40", "remaining_time": "0:23:37"}
|
| 22 |
+
{"current_steps": 22, "total_steps": 156, "loss": 0.8954, "lr": 9.954748808839675e-06, "epoch": 0.42038216560509556, "percentage": 14.1, "elapsed_time": "0:03:53", "remaining_time": "0:23:44"}
|
| 23 |
+
{"current_steps": 23, "total_steps": 156, "loss": 0.8937, "lr": 9.938441702975689e-06, "epoch": 0.4394904458598726, "percentage": 14.74, "elapsed_time": "0:04:05", "remaining_time": "0:23:38"}
|
| 24 |
+
{"current_steps": 24, "total_steps": 156, "loss": 0.9142, "lr": 9.91964794299315e-06, "epoch": 0.4585987261146497, "percentage": 15.38, "elapsed_time": "0:04:17", "remaining_time": "0:23:34"}
|
| 25 |
+
{"current_steps": 25, "total_steps": 156, "loss": 0.8878, "lr": 9.898376992116179e-06, "epoch": 0.47770700636942676, "percentage": 16.03, "elapsed_time": "0:04:32", "remaining_time": "0:23:47"}
|
| 26 |
+
{"current_steps": 26, "total_steps": 156, "loss": 0.8914, "lr": 9.874639560909118e-06, "epoch": 0.4968152866242038, "percentage": 16.67, "elapsed_time": "0:04:42", "remaining_time": "0:23:32"}
|
| 27 |
+
{"current_steps": 27, "total_steps": 156, "loss": 0.918, "lr": 9.848447601883436e-06, "epoch": 0.5159235668789809, "percentage": 17.31, "elapsed_time": "0:04:55", "remaining_time": "0:23:33"}
|
| 28 |
+
{"current_steps": 28, "total_steps": 156, "loss": 0.9225, "lr": 9.819814303479268e-06, "epoch": 0.535031847133758, "percentage": 17.95, "elapsed_time": "0:05:05", "remaining_time": "0:23:14"}
|
| 29 |
+
{"current_steps": 29, "total_steps": 156, "loss": 0.8652, "lr": 9.788754083424654e-06, "epoch": 0.554140127388535, "percentage": 18.59, "elapsed_time": "0:05:22", "remaining_time": "0:23:33"}
|
| 30 |
+
{"current_steps": 30, "total_steps": 156, "loss": 0.8733, "lr": 9.755282581475769e-06, "epoch": 0.5732484076433121, "percentage": 19.23, "elapsed_time": "0:05:34", "remaining_time": "0:23:26"}
|
| 31 |
+
{"current_steps": 31, "total_steps": 156, "loss": 0.8964, "lr": 9.719416651541839e-06, "epoch": 0.5923566878980892, "percentage": 19.87, "elapsed_time": "0:05:46", "remaining_time": "0:23:16"}
|
| 32 |
+
{"current_steps": 32, "total_steps": 156, "loss": 0.896, "lr": 9.681174353198687e-06, "epoch": 0.6114649681528662, "percentage": 20.51, "elapsed_time": "0:05:59", "remaining_time": "0:23:14"}
|
| 33 |
+
{"current_steps": 33, "total_steps": 156, "loss": 0.8807, "lr": 9.640574942595195e-06, "epoch": 0.6305732484076433, "percentage": 21.15, "elapsed_time": "0:06:10", "remaining_time": "0:22:59"}
|
| 34 |
+
{"current_steps": 34, "total_steps": 156, "loss": 0.8396, "lr": 9.597638862757255e-06, "epoch": 0.6496815286624203, "percentage": 21.79, "elapsed_time": "0:06:21", "remaining_time": "0:22:49"}
|
| 35 |
+
{"current_steps": 35, "total_steps": 156, "loss": 0.8644, "lr": 9.552387733294081e-06, "epoch": 0.6687898089171974, "percentage": 22.44, "elapsed_time": "0:06:30", "remaining_time": "0:22:30"}
|
| 36 |
+
{"current_steps": 36, "total_steps": 156, "loss": 0.8791, "lr": 9.504844339512096e-06, "epoch": 0.6878980891719745, "percentage": 23.08, "elapsed_time": "0:06:41", "remaining_time": "0:22:18"}
|
| 37 |
+
{"current_steps": 37, "total_steps": 156, "loss": 0.8776, "lr": 9.45503262094184e-06, "epoch": 0.7070063694267515, "percentage": 23.72, "elapsed_time": "0:06:51", "remaining_time": "0:22:04"}
|
| 38 |
+
{"current_steps": 38, "total_steps": 156, "loss": 0.8911, "lr": 9.40297765928369e-06, "epoch": 0.7261146496815286, "percentage": 24.36, "elapsed_time": "0:07:00", "remaining_time": "0:21:45"}
|
| 39 |
+
{"current_steps": 39, "total_steps": 156, "loss": 0.8545, "lr": 9.348705665778479e-06, "epoch": 0.7452229299363057, "percentage": 25.0, "elapsed_time": "0:07:13", "remaining_time": "0:21:40"}
|
| 40 |
+
{"current_steps": 40, "total_steps": 156, "loss": 0.8032, "lr": 9.292243968009332e-06, "epoch": 0.7643312101910829, "percentage": 25.64, "elapsed_time": "0:07:22", "remaining_time": "0:21:22"}
|
| 41 |
+
{"current_steps": 41, "total_steps": 156, "loss": 0.8763, "lr": 9.233620996141421e-06, "epoch": 0.7834394904458599, "percentage": 26.28, "elapsed_time": "0:07:30", "remaining_time": "0:21:03"}
|
| 42 |
+
{"current_steps": 42, "total_steps": 156, "loss": 0.8522, "lr": 9.172866268606514e-06, "epoch": 0.802547770700637, "percentage": 26.92, "elapsed_time": "0:07:40", "remaining_time": "0:20:48"}
|
| 43 |
+
{"current_steps": 43, "total_steps": 156, "loss": 0.8872, "lr": 9.110010377239552e-06, "epoch": 0.821656050955414, "percentage": 27.56, "elapsed_time": "0:07:47", "remaining_time": "0:20:27"}
|
| 44 |
+
{"current_steps": 44, "total_steps": 156, "loss": 0.8448, "lr": 9.045084971874738e-06, "epoch": 0.8407643312101911, "percentage": 28.21, "elapsed_time": "0:08:00", "remaining_time": "0:20:23"}
|
| 45 |
+
{"current_steps": 45, "total_steps": 156, "loss": 0.8585, "lr": 8.978122744408905e-06, "epoch": 0.8598726114649682, "percentage": 28.85, "elapsed_time": "0:08:09", "remaining_time": "0:20:07"}
|
| 46 |
+
{"current_steps": 46, "total_steps": 156, "loss": 0.8294, "lr": 8.90915741234015e-06, "epoch": 0.8789808917197452, "percentage": 29.49, "elapsed_time": "0:08:20", "remaining_time": "0:19:55"}
|
| 47 |
+
{"current_steps": 47, "total_steps": 156, "loss": 0.8258, "lr": 8.838223701790057e-06, "epoch": 0.8980891719745223, "percentage": 30.13, "elapsed_time": "0:08:32", "remaining_time": "0:19:48"}
|
| 48 |
+
{"current_steps": 48, "total_steps": 156, "loss": 0.823, "lr": 8.765357330018056e-06, "epoch": 0.9171974522292994, "percentage": 30.77, "elapsed_time": "0:08:48", "remaining_time": "0:19:48"}
|
| 49 |
+
{"current_steps": 49, "total_steps": 156, "loss": 0.8533, "lr": 8.690594987436705e-06, "epoch": 0.9363057324840764, "percentage": 31.41, "elapsed_time": "0:09:01", "remaining_time": "0:19:41"}
|
| 50 |
+
{"current_steps": 50, "total_steps": 156, "loss": 0.8479, "lr": 8.613974319136959e-06, "epoch": 0.9554140127388535, "percentage": 32.05, "elapsed_time": "0:09:10", "remaining_time": "0:19:26"}
|
| 51 |
+
{"current_steps": 51, "total_steps": 156, "loss": 0.871, "lr": 8.535533905932739e-06, "epoch": 0.9745222929936306, "percentage": 32.69, "elapsed_time": "0:09:27", "remaining_time": "0:19:28"}
|
| 52 |
+
{"current_steps": 52, "total_steps": 156, "loss": 0.8503, "lr": 8.455313244934324e-06, "epoch": 0.9936305732484076, "percentage": 33.33, "elapsed_time": "0:09:38", "remaining_time": "0:19:16"}
|
| 53 |
+
{"current_steps": 53, "total_steps": 156, "loss": 1.2777, "lr": 8.373352729660373e-06, "epoch": 1.0127388535031847, "percentage": 33.97, "elapsed_time": "0:10:33", "remaining_time": "0:20:30"}
|
| 54 |
+
{"current_steps": 54, "total_steps": 156, "loss": 0.7338, "lr": 8.289693629698564e-06, "epoch": 1.0318471337579618, "percentage": 34.62, "elapsed_time": "0:10:43", "remaining_time": "0:20:14"}
|
| 55 |
+
{"current_steps": 55, "total_steps": 156, "loss": 0.7477, "lr": 8.204378069925121e-06, "epoch": 1.0509554140127388, "percentage": 35.26, "elapsed_time": "0:10:53", "remaining_time": "0:19:59"}
|
| 56 |
+
{"current_steps": 56, "total_steps": 156, "loss": 0.7373, "lr": 8.117449009293668e-06, "epoch": 1.070063694267516, "percentage": 35.9, "elapsed_time": "0:11:07", "remaining_time": "0:19:51"}
|
| 57 |
+
{"current_steps": 57, "total_steps": 156, "loss": 0.6939, "lr": 8.0289502192041e-06, "epoch": 1.089171974522293, "percentage": 36.54, "elapsed_time": "0:11:16", "remaining_time": "0:19:34"}
|
| 58 |
+
{"current_steps": 58, "total_steps": 156, "loss": 0.7188, "lr": 7.938926261462366e-06, "epoch": 1.10828025477707, "percentage": 37.18, "elapsed_time": "0:11:36", "remaining_time": "0:19:36"}
|
| 59 |
+
{"current_steps": 59, "total_steps": 156, "loss": 0.7351, "lr": 7.84742246584226e-06, "epoch": 1.127388535031847, "percentage": 37.82, "elapsed_time": "0:11:50", "remaining_time": "0:19:28"}
|
| 60 |
+
{"current_steps": 60, "total_steps": 156, "loss": 0.6817, "lr": 7.754484907260513e-06, "epoch": 1.1464968152866242, "percentage": 38.46, "elapsed_time": "0:11:58", "remaining_time": "0:19:09"}
|
| 61 |
+
{"current_steps": 61, "total_steps": 156, "loss": 0.7569, "lr": 7.660160382576683e-06, "epoch": 1.1656050955414012, "percentage": 39.1, "elapsed_time": "0:12:06", "remaining_time": "0:18:51"}
|
| 62 |
+
{"current_steps": 62, "total_steps": 156, "loss": 0.6861, "lr": 7.564496387029532e-06, "epoch": 1.1847133757961783, "percentage": 39.74, "elapsed_time": "0:12:18", "remaining_time": "0:18:40"}
|
| 63 |
+
{"current_steps": 63, "total_steps": 156, "loss": 0.7372, "lr": 7.467541090321735e-06, "epoch": 1.2038216560509554, "percentage": 40.38, "elapsed_time": "0:12:29", "remaining_time": "0:18:26"}
|
| 64 |
+
{"current_steps": 64, "total_steps": 156, "loss": 0.6683, "lr": 7.369343312364994e-06, "epoch": 1.2229299363057324, "percentage": 41.03, "elapsed_time": "0:12:43", "remaining_time": "0:18:17"}
|
| 65 |
+
{"current_steps": 65, "total_steps": 156, "loss": 0.7442, "lr": 7.269952498697734e-06, "epoch": 1.2420382165605095, "percentage": 41.67, "elapsed_time": "0:12:50", "remaining_time": "0:17:59"}
|
| 66 |
+
{"current_steps": 66, "total_steps": 156, "loss": 0.721, "lr": 7.169418695587791e-06, "epoch": 1.2611464968152866, "percentage": 42.31, "elapsed_time": "0:12:57", "remaining_time": "0:17:40"}
|
| 67 |
+
{"current_steps": 67, "total_steps": 156, "loss": 0.7401, "lr": 7.067792524832604e-06, "epoch": 1.2802547770700636, "percentage": 42.95, "elapsed_time": "0:13:09", "remaining_time": "0:17:29"}
|
| 68 |
+
{"current_steps": 68, "total_steps": 156, "loss": 0.689, "lr": 6.965125158269619e-06, "epoch": 1.2993630573248407, "percentage": 43.59, "elapsed_time": "0:13:18", "remaining_time": "0:17:13"}
|
| 69 |
+
{"current_steps": 69, "total_steps": 156, "loss": 0.7508, "lr": 6.8614682920097265e-06, "epoch": 1.3184713375796178, "percentage": 44.23, "elapsed_time": "0:13:29", "remaining_time": "0:17:01"}
|
| 70 |
+
{"current_steps": 70, "total_steps": 156, "loss": 0.6699, "lr": 6.7568741204067145e-06, "epoch": 1.3375796178343948, "percentage": 44.87, "elapsed_time": "0:13:50", "remaining_time": "0:17:00"}
|
| 71 |
+
{"current_steps": 71, "total_steps": 156, "loss": 0.7125, "lr": 6.651395309775837e-06, "epoch": 1.356687898089172, "percentage": 45.51, "elapsed_time": "0:14:01", "remaining_time": "0:16:47"}
|
training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 7160
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6181b61510cd6766ddf1e6be445bd8be1eb93944bc0728470ea62e975bcdab79
|
| 3 |
size 7160
|