Training in progress, step 455
Browse files- model-00001-of-00004.safetensors +1 -1
- model-00002-of-00004.safetensors +1 -1
- model-00003-of-00004.safetensors +1 -1
- model-00004-of-00004.safetensors +1 -1
- trainer_log.jsonl +55 -55
- training_args.bin +1 -1
model-00001-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4976698672
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cb9f73b7073a1c472f96b7fdd6e255357b74ecaf09c3ec3e4aac189951246175
|
| 3 |
size 4976698672
|
model-00002-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4999802720
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1eb7c6a6b71e1d0762efdb2b9e55f1864e9edd6b63bb4166744424e8e8c7b331
|
| 3 |
size 4999802720
|
model-00003-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4915916176
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:835c622f683ab3acb2d477bfb990200d8f1a40bd06ee52d9e0f124c961a24778
|
| 3 |
size 4915916176
|
model-00004-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1168138808
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e77f0b127c89ca1c307564ff33a282bb4e802439a24e1a39cfc734f706097018
|
| 3 |
size 1168138808
|
trainer_log.jsonl
CHANGED
|
@@ -1,55 +1,55 @@
|
|
| 1 |
-
{"current_steps": 10, "total_steps": 455, "loss": 0.
|
| 2 |
-
{"current_steps": 20, "total_steps": 455, "loss": 0.
|
| 3 |
-
{"current_steps": 30, "total_steps": 455, "loss": 0.
|
| 4 |
-
{"current_steps": 40, "total_steps": 455, "loss": 0.
|
| 5 |
-
{"current_steps": 50, "total_steps": 455, "loss": 0.
|
| 6 |
-
{"current_steps": 50, "total_steps": 455, "eval_loss": 0.
|
| 7 |
-
{"current_steps": 60, "total_steps": 455, "loss": 0.
|
| 8 |
-
{"current_steps": 70, "total_steps": 455, "loss": 0.
|
| 9 |
-
{"current_steps": 80, "total_steps": 455, "loss": 0.
|
| 10 |
-
{"current_steps": 90, "total_steps": 455, "loss": 0.
|
| 11 |
-
{"current_steps": 100, "total_steps": 455, "loss": 0.
|
| 12 |
-
{"current_steps": 100, "total_steps": 455, "eval_loss": 0.
|
| 13 |
-
{"current_steps": 110, "total_steps": 455, "loss": 0.
|
| 14 |
-
{"current_steps": 120, "total_steps": 455, "loss": 0.
|
| 15 |
-
{"current_steps": 130, "total_steps": 455, "loss": 0.
|
| 16 |
-
{"current_steps": 140, "total_steps": 455, "loss": 0.
|
| 17 |
-
{"current_steps": 150, "total_steps": 455, "loss": 0.
|
| 18 |
-
{"current_steps": 150, "total_steps": 455, "eval_loss": 0.
|
| 19 |
-
{"current_steps": 160, "total_steps": 455, "loss": 0.
|
| 20 |
-
{"current_steps": 170, "total_steps": 455, "loss": 0.
|
| 21 |
-
{"current_steps": 180, "total_steps": 455, "loss": 0.
|
| 22 |
-
{"current_steps": 190, "total_steps": 455, "loss": 0.
|
| 23 |
-
{"current_steps": 200, "total_steps": 455, "loss": 0.
|
| 24 |
-
{"current_steps": 200, "total_steps": 455, "eval_loss": 0.
|
| 25 |
-
{"current_steps": 210, "total_steps": 455, "loss": 0.
|
| 26 |
-
{"current_steps": 220, "total_steps": 455, "loss": 0.
|
| 27 |
-
{"current_steps": 230, "total_steps": 455, "loss": 0.
|
| 28 |
-
{"current_steps": 240, "total_steps": 455, "loss": 0.
|
| 29 |
-
{"current_steps": 250, "total_steps": 455, "loss": 0.
|
| 30 |
-
{"current_steps": 250, "total_steps": 455, "eval_loss": 0.
|
| 31 |
-
{"current_steps": 260, "total_steps": 455, "loss": 0.
|
| 32 |
-
{"current_steps": 270, "total_steps": 455, "loss": 0.
|
| 33 |
-
{"current_steps": 280, "total_steps": 455, "loss": 0.
|
| 34 |
-
{"current_steps": 290, "total_steps": 455, "loss": 0.2901, "lr": 3.5061398705569544e-06, "epoch": 0.6362265185794598, "percentage": 63.74, "elapsed_time": "
|
| 35 |
-
{"current_steps": 300, "total_steps": 455, "loss": 0.
|
| 36 |
-
{"current_steps": 300, "total_steps": 455, "eval_loss": 0.
|
| 37 |
-
{"current_steps": 310, "total_steps": 455, "loss": 0.
|
| 38 |
-
{"current_steps": 320, "total_steps": 455, "loss": 0.
|
| 39 |
-
{"current_steps": 330, "total_steps": 455, "loss": 0.2961, "lr": 2.1329917771761806e-06, "epoch": 0.7239819004524887, "percentage": 72.53, "elapsed_time": "
|
| 40 |
-
{"current_steps": 340, "total_steps": 455, "loss": 0.
|
| 41 |
-
{"current_steps": 350, "total_steps": 455, "loss": 0.
|
| 42 |
-
{"current_steps": 350, "total_steps": 455, "eval_loss": 0.
|
| 43 |
-
{"current_steps": 360, "total_steps": 455, "loss": 0.
|
| 44 |
-
{"current_steps": 370, "total_steps": 455, "loss": 0.
|
| 45 |
-
{"current_steps": 380, "total_steps": 455, "loss": 0.
|
| 46 |
-
{"current_steps": 390, "total_steps": 455, "loss": 0.
|
| 47 |
-
{"current_steps": 400, "total_steps": 455, "loss": 0.
|
| 48 |
-
{"current_steps": 400, "total_steps": 455, "eval_loss": 0.
|
| 49 |
-
{"current_steps": 410, "total_steps": 455, "loss": 0.
|
| 50 |
-
{"current_steps": 420, "total_steps": 455, "loss": 0.
|
| 51 |
-
{"current_steps": 430, "total_steps": 455, "loss": 0.
|
| 52 |
-
{"current_steps": 440, "total_steps": 455, "loss": 0.
|
| 53 |
-
{"current_steps": 450, "total_steps": 455, "loss": 0.
|
| 54 |
-
{"current_steps": 450, "total_steps": 455, "eval_loss": 0.
|
| 55 |
-
{"current_steps": 455, "total_steps": 455, "epoch": 0.9982174688057041, "percentage": 100.0, "elapsed_time": "
|
|
|
|
| 1 |
+
{"current_steps": 10, "total_steps": 455, "loss": 0.6003, "lr": 2.173913043478261e-06, "epoch": 0.021938845468257234, "percentage": 2.2, "elapsed_time": "0:02:09", "remaining_time": "1:36:15"}
|
| 2 |
+
{"current_steps": 20, "total_steps": 455, "loss": 0.4488, "lr": 4.347826086956522e-06, "epoch": 0.04387769093651447, "percentage": 4.4, "elapsed_time": "0:04:21", "remaining_time": "1:34:38"}
|
| 3 |
+
{"current_steps": 30, "total_steps": 455, "loss": 0.4128, "lr": 6.521739130434783e-06, "epoch": 0.0658165364047717, "percentage": 6.59, "elapsed_time": "0:06:30", "remaining_time": "1:32:06"}
|
| 4 |
+
{"current_steps": 40, "total_steps": 455, "loss": 0.3861, "lr": 8.695652173913044e-06, "epoch": 0.08775538187302893, "percentage": 8.79, "elapsed_time": "0:08:38", "remaining_time": "1:29:44"}
|
| 5 |
+
{"current_steps": 50, "total_steps": 455, "loss": 0.3971, "lr": 9.997640179574575e-06, "epoch": 0.10969422734128617, "percentage": 10.99, "elapsed_time": "0:10:46", "remaining_time": "1:27:18"}
|
| 6 |
+
{"current_steps": 50, "total_steps": 455, "eval_loss": 0.40791964530944824, "epoch": 0.10969422734128617, "percentage": 10.99, "elapsed_time": "0:11:17", "remaining_time": "1:31:24"}
|
| 7 |
+
{"current_steps": 60, "total_steps": 455, "loss": 0.3866, "lr": 9.971117774604978e-06, "epoch": 0.1316330728095434, "percentage": 13.19, "elapsed_time": "0:13:24", "remaining_time": "1:28:14"}
|
| 8 |
+
{"current_steps": 70, "total_steps": 455, "loss": 0.3991, "lr": 9.915280116903003e-06, "epoch": 0.15357191827780062, "percentage": 15.38, "elapsed_time": "0:15:34", "remaining_time": "1:25:37"}
|
| 9 |
+
{"current_steps": 80, "total_steps": 455, "loss": 0.3761, "lr": 9.83045648755225e-06, "epoch": 0.17551076374605787, "percentage": 17.58, "elapsed_time": "0:17:44", "remaining_time": "1:23:09"}
|
| 10 |
+
{"current_steps": 90, "total_steps": 455, "loss": 0.3702, "lr": 9.717147101241817e-06, "epoch": 0.1974496092143151, "percentage": 19.78, "elapsed_time": "0:19:54", "remaining_time": "1:20:46"}
|
| 11 |
+
{"current_steps": 100, "total_steps": 455, "loss": 0.3678, "lr": 9.576020156442802e-06, "epoch": 0.21938845468257234, "percentage": 21.98, "elapsed_time": "0:22:03", "remaining_time": "1:18:19"}
|
| 12 |
+
{"current_steps": 100, "total_steps": 455, "eval_loss": 0.36698606610298157, "epoch": 0.21938845468257234, "percentage": 21.98, "elapsed_time": "0:22:34", "remaining_time": "1:20:07"}
|
| 13 |
+
{"current_steps": 110, "total_steps": 455, "loss": 0.3638, "lr": 9.407907894965138e-06, "epoch": 0.24132730015082957, "percentage": 24.18, "elapsed_time": "0:24:43", "remaining_time": "1:17:33"}
|
| 14 |
+
{"current_steps": 120, "total_steps": 455, "loss": 0.3657, "lr": 9.213801694132014e-06, "epoch": 0.2632661456190868, "percentage": 26.37, "elapsed_time": "0:26:53", "remaining_time": "1:15:03"}
|
| 15 |
+
{"current_steps": 130, "total_steps": 455, "loss": 0.3588, "lr": 8.994846220513872e-06, "epoch": 0.28520499108734404, "percentage": 28.57, "elapsed_time": "0:29:01", "remaining_time": "1:12:33"}
|
| 16 |
+
{"current_steps": 140, "total_steps": 455, "loss": 0.3497, "lr": 8.752332679698128e-06, "epoch": 0.30714383655560124, "percentage": 30.77, "elapsed_time": "0:31:10", "remaining_time": "1:10:08"}
|
| 17 |
+
{"current_steps": 150, "total_steps": 455, "loss": 0.3599, "lr": 8.48769120190144e-06, "epoch": 0.3290826820238585, "percentage": 32.97, "elapsed_time": "0:33:15", "remaining_time": "1:07:37"}
|
| 18 |
+
{"current_steps": 150, "total_steps": 455, "eval_loss": 0.33442166447639465, "epoch": 0.3290826820238585, "percentage": 32.97, "elapsed_time": "0:33:45", "remaining_time": "1:08:39"}
|
| 19 |
+
{"current_steps": 160, "total_steps": 455, "loss": 0.3526, "lr": 8.202482408327496e-06, "epoch": 0.35102152749211574, "percentage": 35.16, "elapsed_time": "0:35:53", "remaining_time": "1:06:11"}
|
| 20 |
+
{"current_steps": 170, "total_steps": 455, "loss": 0.3357, "lr": 7.898388208004449e-06, "epoch": 0.372960372960373, "percentage": 37.36, "elapsed_time": "0:38:03", "remaining_time": "1:03:47"}
|
| 21 |
+
{"current_steps": 180, "total_steps": 455, "loss": 0.3415, "lr": 7.577201879374114e-06, "epoch": 0.3948992184286302, "percentage": 39.56, "elapsed_time": "0:40:12", "remaining_time": "1:01:25"}
|
| 22 |
+
{"current_steps": 190, "total_steps": 455, "loss": 0.3376, "lr": 7.240817495122936e-06, "epoch": 0.41683806389688743, "percentage": 41.76, "elapsed_time": "0:42:20", "remaining_time": "0:59:03"}
|
| 23 |
+
{"current_steps": 200, "total_steps": 455, "loss": 0.3186, "lr": 6.891218752617715e-06, "epoch": 0.4387769093651447, "percentage": 43.96, "elapsed_time": "0:44:28", "remaining_time": "0:56:41"}
|
| 24 |
+
{"current_steps": 200, "total_steps": 455, "eval_loss": 0.32387444376945496, "epoch": 0.4387769093651447, "percentage": 43.96, "elapsed_time": "0:44:58", "remaining_time": "0:57:20"}
|
| 25 |
+
{"current_steps": 210, "total_steps": 455, "loss": 0.3317, "lr": 6.5304672758143014e-06, "epoch": 0.4607157548334019, "percentage": 46.15, "elapsed_time": "0:47:06", "remaining_time": "0:54:57"}
|
| 26 |
+
{"current_steps": 220, "total_steps": 455, "loss": 0.327, "lr": 6.160690457624223e-06, "epoch": 0.48265460030165913, "percentage": 48.35, "elapsed_time": "0:49:15", "remaining_time": "0:52:36"}
|
| 27 |
+
{"current_steps": 230, "total_steps": 455, "loss": 0.3146, "lr": 5.784068914434239e-06, "epoch": 0.5045934457699164, "percentage": 50.55, "elapsed_time": "0:51:25", "remaining_time": "0:50:18"}
|
| 28 |
+
{"current_steps": 240, "total_steps": 455, "loss": 0.3171, "lr": 5.40282362676094e-06, "epoch": 0.5265322912381736, "percentage": 52.75, "elapsed_time": "0:53:33", "remaining_time": "0:47:58"}
|
| 29 |
+
{"current_steps": 250, "total_steps": 455, "loss": 0.2979, "lr": 5.019202841873434e-06, "epoch": 0.5484711367064308, "percentage": 54.95, "elapsed_time": "0:55:39", "remaining_time": "0:45:38"}
|
| 30 |
+
{"current_steps": 250, "total_steps": 455, "eval_loss": 0.29967406392097473, "epoch": 0.5484711367064308, "percentage": 54.95, "elapsed_time": "0:56:10", "remaining_time": "0:46:03"}
|
| 31 |
+
{"current_steps": 260, "total_steps": 455, "loss": 0.3121, "lr": 4.635468815620862e-06, "epoch": 0.5704099821746881, "percentage": 57.14, "elapsed_time": "0:58:18", "remaining_time": "0:43:43"}
|
| 32 |
+
{"current_steps": 270, "total_steps": 455, "loss": 0.3036, "lr": 4.2538844716497075e-06, "epoch": 0.5923488276429453, "percentage": 59.34, "elapsed_time": "1:00:29", "remaining_time": "0:41:27"}
|
| 33 |
+
{"current_steps": 280, "total_steps": 455, "loss": 0.2978, "lr": 3.876700056683026e-06, "epoch": 0.6142876731112025, "percentage": 61.54, "elapsed_time": "1:02:41", "remaining_time": "0:39:10"}
|
| 34 |
+
{"current_steps": 290, "total_steps": 455, "loss": 0.2901, "lr": 3.5061398705569544e-06, "epoch": 0.6362265185794598, "percentage": 63.74, "elapsed_time": "1:04:49", "remaining_time": "0:36:52"}
|
| 35 |
+
{"current_steps": 300, "total_steps": 455, "loss": 0.2996, "lr": 3.144389149268983e-06, "epoch": 0.658165364047717, "percentage": 65.93, "elapsed_time": "1:06:58", "remaining_time": "0:34:35"}
|
| 36 |
+
{"current_steps": 300, "total_steps": 455, "eval_loss": 0.2765507102012634, "epoch": 0.658165364047717, "percentage": 65.93, "elapsed_time": "1:07:28", "remaining_time": "0:34:51"}
|
| 37 |
+
{"current_steps": 310, "total_steps": 455, "loss": 0.2885, "lr": 2.7935811783901878e-06, "epoch": 0.6801042095159742, "percentage": 68.13, "elapsed_time": "1:09:39", "remaining_time": "0:32:34"}
|
| 38 |
+
{"current_steps": 320, "total_steps": 455, "loss": 0.2898, "lr": 2.455784712835084e-06, "epoch": 0.7020430549842315, "percentage": 70.33, "elapsed_time": "1:11:47", "remaining_time": "0:30:17"}
|
| 39 |
+
{"current_steps": 330, "total_steps": 455, "loss": 0.2961, "lr": 2.1329917771761806e-06, "epoch": 0.7239819004524887, "percentage": 72.53, "elapsed_time": "1:14:00", "remaining_time": "0:28:01"}
|
| 40 |
+
{"current_steps": 340, "total_steps": 455, "loss": 0.2856, "lr": 1.8271059184461781e-06, "epoch": 0.745920745920746, "percentage": 74.73, "elapsed_time": "1:16:08", "remaining_time": "0:25:45"}
|
| 41 |
+
{"current_steps": 350, "total_steps": 455, "loss": 0.2887, "lr": 1.5399309807023942e-06, "epoch": 0.7678595913890032, "percentage": 76.92, "elapsed_time": "1:18:17", "remaining_time": "0:23:29"}
|
| 42 |
+
{"current_steps": 350, "total_steps": 455, "eval_loss": 0.2653253376483917, "epoch": 0.7678595913890032, "percentage": 76.92, "elapsed_time": "1:18:47", "remaining_time": "0:23:38"}
|
| 43 |
+
{"current_steps": 360, "total_steps": 455, "loss": 0.2752, "lr": 1.2731604675510729e-06, "epoch": 0.7897984368572604, "percentage": 79.12, "elapsed_time": "1:20:56", "remaining_time": "0:21:21"}
|
| 44 |
+
{"current_steps": 370, "total_steps": 455, "loss": 0.2744, "lr": 1.0283675553620281e-06, "epoch": 0.8117372823255177, "percentage": 81.32, "elapsed_time": "1:23:05", "remaining_time": "0:19:05"}
|
| 45 |
+
{"current_steps": 380, "total_steps": 455, "loss": 0.2813, "lr": 8.069958160668256e-07, "epoch": 0.8336761277937749, "percentage": 83.52, "elapsed_time": "1:25:14", "remaining_time": "0:16:49"}
|
| 46 |
+
{"current_steps": 390, "total_steps": 455, "loss": 0.2624, "lr": 6.10350704249219e-07, "epoch": 0.8556149732620321, "percentage": 85.71, "elapsed_time": "1:27:20", "remaining_time": "0:14:33"}
|
| 47 |
+
{"current_steps": 400, "total_steps": 455, "loss": 0.2696, "lr": 4.3959185872947007e-07, "epoch": 0.8775538187302894, "percentage": 87.91, "elapsed_time": "1:29:32", "remaining_time": "0:12:18"}
|
| 48 |
+
{"current_steps": 400, "total_steps": 455, "eval_loss": 0.25855177640914917, "epoch": 0.8775538187302894, "percentage": 87.91, "elapsed_time": "1:30:02", "remaining_time": "0:12:22"}
|
| 49 |
+
{"current_steps": 410, "total_steps": 455, "loss": 0.2795, "lr": 2.9572626404096915e-07, "epoch": 0.8994926641985466, "percentage": 90.11, "elapsed_time": "1:32:08", "remaining_time": "0:10:06"}
|
| 50 |
+
{"current_steps": 420, "total_steps": 455, "loss": 0.2661, "lr": 1.7960231212674095e-07, "epoch": 0.9214315096668038, "percentage": 92.31, "elapsed_time": "1:34:15", "remaining_time": "0:07:51"}
|
| 51 |
+
{"current_steps": 430, "total_steps": 455, "loss": 0.2787, "lr": 9.190479927466023e-08, "epoch": 0.9433703551350611, "percentage": 94.51, "elapsed_time": "1:36:25", "remaining_time": "0:05:36"}
|
| 52 |
+
{"current_steps": 440, "total_steps": 455, "loss": 0.2752, "lr": 3.315088779506259e-08, "epoch": 0.9653092006033183, "percentage": 96.7, "elapsed_time": "1:38:33", "remaining_time": "0:03:21"}
|
| 53 |
+
{"current_steps": 450, "total_steps": 455, "loss": 0.2784, "lr": 3.6870562551699627e-09, "epoch": 0.9872480460715755, "percentage": 98.9, "elapsed_time": "1:40:44", "remaining_time": "0:01:07"}
|
| 54 |
+
{"current_steps": 450, "total_steps": 455, "eval_loss": 0.2559176981449127, "epoch": 0.9872480460715755, "percentage": 98.9, "elapsed_time": "1:41:14", "remaining_time": "0:01:07"}
|
| 55 |
+
{"current_steps": 455, "total_steps": 455, "epoch": 0.9982174688057041, "percentage": 100.0, "elapsed_time": "1:43:16", "remaining_time": "0:00:00"}
|
training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 7608
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:da49d7bc6153266740c60b542e01d6ad6cb9a1d807163f1f309c8911fe0a18e7
|
| 3 |
size 7608
|