Model save
Browse files- README.md +7 -108
- adapter_config.json +3 -3
- adapter_model.safetensors +1 -1
- training_args.bin +1 -1
README.md
CHANGED
|
@@ -16,8 +16,13 @@ should probably proofread and complete it, then remove this comment. -->
|
|
| 16 |
|
| 17 |
This model is a fine-tuned version of [HuggingFaceTB/SmolLM-135M](https://huggingface.co/HuggingFaceTB/SmolLM-135M) on an unknown dataset.
|
| 18 |
It achieves the following results on the evaluation set:
|
| 19 |
-
-
|
| 20 |
-
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
|
| 22 |
## Model description
|
| 23 |
|
|
@@ -48,112 +53,6 @@ The following hyperparameters were used during training:
|
|
| 48 |
- num_epochs: 2
|
| 49 |
- mixed_precision_training: Native AMP
|
| 50 |
|
| 51 |
-
### Training results
|
| 52 |
-
|
| 53 |
-
| Training Loss | Epoch | Step | Validation Loss | Perplexity |
|
| 54 |
-
|:-------------:|:------:|:----:|:---------------:|:----------:|
|
| 55 |
-
| 6.9452 | 0.02 | 10 | 6.9564 | 1049.8097 |
|
| 56 |
-
| 6.821 | 0.04 | 20 | 6.7827 | 882.4031 |
|
| 57 |
-
| 6.6345 | 0.06 | 30 | 6.3818 | 590.9616 |
|
| 58 |
-
| 6.0263 | 0.08 | 40 | 5.6650 | 288.5819 |
|
| 59 |
-
| 5.361 | 0.1 | 50 | 4.9706 | 144.1120 |
|
| 60 |
-
| 4.6135 | 0.12 | 60 | 4.2190 | 67.9680 |
|
| 61 |
-
| 3.9045 | 0.14 | 70 | 3.3976 | 29.8913 |
|
| 62 |
-
| 3.0717 | 0.16 | 80 | 2.6529 | 14.1943 |
|
| 63 |
-
| 2.3341 | 0.18 | 90 | 1.9881 | 7.3015 |
|
| 64 |
-
| 1.6176 | 0.2 | 100 | 1.3017 | 3.6755 |
|
| 65 |
-
| 1.0052 | 0.22 | 110 | 0.7482 | 2.1132 |
|
| 66 |
-
| 0.6496 | 0.24 | 120 | 0.5192 | 1.6807 |
|
| 67 |
-
| 0.4372 | 0.26 | 130 | 0.3742 | 1.4538 |
|
| 68 |
-
| 0.34 | 0.28 | 140 | 0.3129 | 1.3674 |
|
| 69 |
-
| 0.2917 | 0.3 | 150 | 0.2748 | 1.3163 |
|
| 70 |
-
| 0.2604 | 0.32 | 160 | 0.2507 | 1.2850 |
|
| 71 |
-
| 0.2392 | 0.34 | 170 | 0.2325 | 1.2618 |
|
| 72 |
-
| 0.2253 | 0.36 | 180 | 0.2174 | 1.2428 |
|
| 73 |
-
| 0.2129 | 0.38 | 190 | 0.2074 | 1.2304 |
|
| 74 |
-
| 0.2032 | 0.4 | 200 | 0.1976 | 1.2185 |
|
| 75 |
-
| 0.1924 | 0.42 | 210 | 0.1874 | 1.2062 |
|
| 76 |
-
| 0.1837 | 0.44 | 220 | 0.1790 | 1.1961 |
|
| 77 |
-
| 0.1756 | 0.46 | 230 | 0.1705 | 1.1858 |
|
| 78 |
-
| 0.1676 | 0.48 | 240 | 0.1640 | 1.1782 |
|
| 79 |
-
| 0.1607 | 0.5 | 250 | 0.1573 | 1.1703 |
|
| 80 |
-
| 0.1556 | 0.52 | 260 | 0.1516 | 1.1637 |
|
| 81 |
-
| 0.1483 | 0.54 | 270 | 0.1465 | 1.1578 |
|
| 82 |
-
| 0.1427 | 0.56 | 280 | 0.1408 | 1.1512 |
|
| 83 |
-
| 0.1371 | 0.58 | 290 | 0.1355 | 1.1451 |
|
| 84 |
-
| 0.133 | 0.6 | 300 | 0.1306 | 1.1395 |
|
| 85 |
-
| 0.1278 | 0.62 | 310 | 0.1256 | 1.1338 |
|
| 86 |
-
| 0.1233 | 0.64 | 320 | 0.1222 | 1.1300 |
|
| 87 |
-
| 0.12 | 0.66 | 330 | 0.1172 | 1.1243 |
|
| 88 |
-
| 0.1153 | 0.68 | 340 | 0.1126 | 1.1192 |
|
| 89 |
-
| 0.1115 | 0.7 | 350 | 0.1091 | 1.1153 |
|
| 90 |
-
| 0.1076 | 0.72 | 360 | 0.1058 | 1.1116 |
|
| 91 |
-
| 0.105 | 0.74 | 370 | 0.1024 | 1.1078 |
|
| 92 |
-
| 0.1013 | 0.76 | 380 | 0.0999 | 1.1051 |
|
| 93 |
-
| 0.098 | 0.78 | 390 | 0.0965 | 1.1013 |
|
| 94 |
-
| 0.0954 | 0.8 | 400 | 0.0936 | 1.0981 |
|
| 95 |
-
| 0.0926 | 0.82 | 410 | 0.0917 | 1.0960 |
|
| 96 |
-
| 0.0903 | 0.84 | 420 | 0.0889 | 1.0929 |
|
| 97 |
-
| 0.0874 | 0.86 | 430 | 0.0866 | 1.0904 |
|
| 98 |
-
| 0.0856 | 0.88 | 440 | 0.0838 | 1.0874 |
|
| 99 |
-
| 0.0832 | 0.9 | 450 | 0.0815 | 1.0849 |
|
| 100 |
-
| 0.0811 | 0.92 | 460 | 0.0800 | 1.0833 |
|
| 101 |
-
| 0.0792 | 0.94 | 470 | 0.0778 | 1.0809 |
|
| 102 |
-
| 0.0771 | 0.96 | 480 | 0.0761 | 1.0791 |
|
| 103 |
-
| 0.0753 | 0.98 | 490 | 0.0744 | 1.0772 |
|
| 104 |
-
| 0.0739 | 1.0 | 500 | 0.0727 | 1.0754 |
|
| 105 |
-
| 0.0721 | 1.02 | 510 | 0.0710 | 1.0736 |
|
| 106 |
-
| 0.0706 | 1.04 | 520 | 0.0699 | 1.0724 |
|
| 107 |
-
| 0.0693 | 1.06 | 530 | 0.0685 | 1.0709 |
|
| 108 |
-
| 0.068 | 1.08 | 540 | 0.0673 | 1.0696 |
|
| 109 |
-
| 0.0667 | 1.1 | 550 | 0.0662 | 1.0685 |
|
| 110 |
-
| 0.0656 | 1.12 | 560 | 0.0646 | 1.0667 |
|
| 111 |
-
| 0.0644 | 1.1400 | 570 | 0.0640 | 1.0661 |
|
| 112 |
-
| 0.0633 | 1.16 | 580 | 0.0627 | 1.0647 |
|
| 113 |
-
| 0.0626 | 1.18 | 590 | 0.0625 | 1.0645 |
|
| 114 |
-
| 0.0616 | 1.2 | 600 | 0.0613 | 1.0632 |
|
| 115 |
-
| 0.0606 | 1.22 | 610 | 0.0605 | 1.0623 |
|
| 116 |
-
| 0.0599 | 1.24 | 620 | 0.0592 | 1.0610 |
|
| 117 |
-
| 0.059 | 1.26 | 630 | 0.0585 | 1.0602 |
|
| 118 |
-
| 0.0587 | 1.28 | 640 | 0.0580 | 1.0597 |
|
| 119 |
-
| 0.0578 | 1.3 | 650 | 0.0575 | 1.0592 |
|
| 120 |
-
| 0.0569 | 1.32 | 660 | 0.0567 | 1.0583 |
|
| 121 |
-
| 0.0566 | 1.34 | 670 | 0.0558 | 1.0574 |
|
| 122 |
-
| 0.056 | 1.3600 | 680 | 0.0555 | 1.0571 |
|
| 123 |
-
| 0.0555 | 1.38 | 690 | 0.0549 | 1.0565 |
|
| 124 |
-
| 0.0548 | 1.4 | 700 | 0.0545 | 1.0560 |
|
| 125 |
-
| 0.0542 | 1.42 | 710 | 0.0540 | 1.0555 |
|
| 126 |
-
| 0.0539 | 1.44 | 720 | 0.0536 | 1.0550 |
|
| 127 |
-
| 0.0534 | 1.46 | 730 | 0.0531 | 1.0545 |
|
| 128 |
-
| 0.0529 | 1.48 | 740 | 0.0530 | 1.0544 |
|
| 129 |
-
| 0.0524 | 1.5 | 750 | 0.0523 | 1.0537 |
|
| 130 |
-
| 0.0523 | 1.52 | 760 | 0.0518 | 1.0532 |
|
| 131 |
-
| 0.0519 | 1.54 | 770 | 0.0518 | 1.0532 |
|
| 132 |
-
| 0.0515 | 1.56 | 780 | 0.0517 | 1.0531 |
|
| 133 |
-
| 0.0512 | 1.58 | 790 | 0.0509 | 1.0522 |
|
| 134 |
-
| 0.051 | 1.6 | 800 | 0.0508 | 1.0521 |
|
| 135 |
-
| 0.0505 | 1.62 | 810 | 0.0503 | 1.0516 |
|
| 136 |
-
| 0.0502 | 1.6400 | 820 | 0.0501 | 1.0513 |
|
| 137 |
-
| 0.05 | 1.6600 | 830 | 0.0500 | 1.0513 |
|
| 138 |
-
| 0.0498 | 1.6800 | 840 | 0.0497 | 1.0509 |
|
| 139 |
-
| 0.0496 | 1.7 | 850 | 0.0496 | 1.0508 |
|
| 140 |
-
| 0.0492 | 1.72 | 860 | 0.0494 | 1.0506 |
|
| 141 |
-
| 0.0493 | 1.74 | 870 | 0.0491 | 1.0504 |
|
| 142 |
-
| 0.0488 | 1.76 | 880 | 0.0488 | 1.0500 |
|
| 143 |
-
| 0.0488 | 1.78 | 890 | 0.0488 | 1.0500 |
|
| 144 |
-
| 0.0489 | 1.8 | 900 | 0.0487 | 1.0499 |
|
| 145 |
-
| 0.0484 | 1.8200 | 910 | 0.0484 | 1.0496 |
|
| 146 |
-
| 0.0483 | 1.8400 | 920 | 0.0487 | 1.0499 |
|
| 147 |
-
| 0.0484 | 1.8600 | 930 | 0.0485 | 1.0497 |
|
| 148 |
-
| 0.0483 | 1.88 | 940 | 0.0483 | 1.0494 |
|
| 149 |
-
| 0.0481 | 1.9 | 950 | 0.0483 | 1.0495 |
|
| 150 |
-
| 0.0481 | 1.92 | 960 | 0.0482 | 1.0494 |
|
| 151 |
-
| 0.048 | 1.94 | 970 | 0.0483 | 1.0495 |
|
| 152 |
-
| 0.0482 | 1.96 | 980 | 0.0482 | 1.0493 |
|
| 153 |
-
| 0.048 | 1.98 | 990 | 0.0481 | 1.0493 |
|
| 154 |
-
| 0.0478 | 2.0 | 1000 | 0.0481 | 1.0493 |
|
| 155 |
-
|
| 156 |
-
|
| 157 |
### Framework versions
|
| 158 |
|
| 159 |
- PEFT 0.14.0
|
|
|
|
| 16 |
|
| 17 |
This model is a fine-tuned version of [HuggingFaceTB/SmolLM-135M](https://huggingface.co/HuggingFaceTB/SmolLM-135M) on an unknown dataset.
|
| 18 |
It achieves the following results on the evaluation set:
|
| 19 |
+
- eval_loss: 7.0024
|
| 20 |
+
- eval_perplexity: 1099.2529
|
| 21 |
+
- eval_runtime: 3.107
|
| 22 |
+
- eval_samples_per_second: 3.219
|
| 23 |
+
- eval_steps_per_second: 3.219
|
| 24 |
+
- epoch: 0.02
|
| 25 |
+
- step: 10
|
| 26 |
|
| 27 |
## Model description
|
| 28 |
|
|
|
|
| 53 |
- num_epochs: 2
|
| 54 |
- mixed_precision_training: Native AMP
|
| 55 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
### Framework versions
|
| 57 |
|
| 58 |
- PEFT 0.14.0
|
adapter_config.json
CHANGED
|
@@ -23,10 +23,10 @@
|
|
| 23 |
"rank_pattern": {},
|
| 24 |
"revision": null,
|
| 25 |
"target_modules": [
|
| 26 |
-
"q_proj",
|
| 27 |
"v_proj",
|
| 28 |
-
"
|
| 29 |
-
"o_proj"
|
|
|
|
| 30 |
],
|
| 31 |
"task_type": "CAUSAL_LM",
|
| 32 |
"use_dora": false,
|
|
|
|
| 23 |
"rank_pattern": {},
|
| 24 |
"revision": null,
|
| 25 |
"target_modules": [
|
|
|
|
| 26 |
"v_proj",
|
| 27 |
+
"q_proj",
|
| 28 |
+
"o_proj",
|
| 29 |
+
"k_proj"
|
| 30 |
],
|
| 31 |
"task_type": "CAUSAL_LM",
|
| 32 |
"use_dora": false,
|
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1874312
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d17cc7d6ccf55e7ab8d4a9a695ce2adc004e153df6cb49d4ea4326de9af78ae6
|
| 3 |
size 1874312
|
training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 5304
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:41586a34a8ad3f57556aa375a8ae56c0caa738cfea33b847a58d832cd057e27b
|
| 3 |
size 5304
|