sujithatz/finbot-transofrmer-based-phi3.5_adapter
Browse files- README.md +94 -46
- adapter_config.json +4 -4
- adapter_model.safetensors +1 -1
- training_args.bin +1 -1
README.md
CHANGED
|
@@ -18,7 +18,7 @@ should probably proofread and complete it, then remove this comment. -->
|
|
| 18 |
|
| 19 |
This model is a fine-tuned version of [microsoft/Phi-3.5-mini-instruct](https://huggingface.co/microsoft/Phi-3.5-mini-instruct) on an unknown dataset.
|
| 20 |
It achieves the following results on the evaluation set:
|
| 21 |
-
- Loss: 0.
|
| 22 |
|
| 23 |
## Model description
|
| 24 |
|
|
@@ -50,51 +50,99 @@ The following hyperparameters were used during training:
|
|
| 50 |
|
| 51 |
| Training Loss | Epoch | Step | Validation Loss |
|
| 52 |
|:-------------:|:------:|:----:|:---------------:|
|
| 53 |
-
| 1.
|
| 54 |
-
| 1.
|
| 55 |
-
| 1.
|
| 56 |
-
|
|
| 57 |
-
|
|
| 58 |
-
| 0.
|
| 59 |
-
| 0.
|
| 60 |
-
| 0.
|
| 61 |
-
| 0.
|
| 62 |
-
| 0.
|
| 63 |
-
| 0.
|
| 64 |
-
| 0.
|
| 65 |
-
| 0.
|
| 66 |
-
| 0.
|
| 67 |
-
| 0.
|
| 68 |
-
| 0.
|
| 69 |
-
| 0.
|
| 70 |
-
| 0.
|
| 71 |
-
| 0.
|
| 72 |
-
| 0.
|
| 73 |
-
| 0.
|
| 74 |
-
| 0.
|
| 75 |
-
| 0.
|
| 76 |
-
| 0.
|
| 77 |
-
| 0.
|
| 78 |
-
| 0.
|
| 79 |
-
| 0.
|
| 80 |
-
| 0.
|
| 81 |
-
| 0.
|
| 82 |
-
| 0.
|
| 83 |
-
| 0.
|
| 84 |
-
| 0.
|
| 85 |
-
| 0.
|
| 86 |
-
| 0.
|
| 87 |
-
| 0.
|
| 88 |
-
| 0.
|
| 89 |
-
| 0.
|
| 90 |
-
| 0.
|
| 91 |
-
| 0.
|
| 92 |
-
| 0.
|
| 93 |
-
| 0.
|
| 94 |
-
| 0.
|
| 95 |
-
| 0.
|
| 96 |
-
| 0.
|
| 97 |
-
| 0.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
|
| 99 |
|
| 100 |
### Framework versions
|
|
|
|
| 18 |
|
| 19 |
This model is a fine-tuned version of [microsoft/Phi-3.5-mini-instruct](https://huggingface.co/microsoft/Phi-3.5-mini-instruct) on an unknown dataset.
|
| 20 |
It achieves the following results on the evaluation set:
|
| 21 |
+
- Loss: 0.4642
|
| 22 |
|
| 23 |
## Model description
|
| 24 |
|
|
|
|
| 50 |
|
| 51 |
| Training Loss | Epoch | Step | Validation Loss |
|
| 52 |
|:-------------:|:------:|:----:|:---------------:|
|
| 53 |
+
| 1.675 | 0.0429 | 3 | 1.6620 |
|
| 54 |
+
| 1.6239 | 0.0857 | 6 | 1.3707 |
|
| 55 |
+
| 1.2962 | 0.1286 | 9 | 1.1772 |
|
| 56 |
+
| 1.0739 | 0.1714 | 12 | 1.0452 |
|
| 57 |
+
| 1.0342 | 0.2143 | 15 | 0.9612 |
|
| 58 |
+
| 0.8244 | 0.2571 | 18 | 0.8836 |
|
| 59 |
+
| 0.8398 | 0.3 | 21 | 0.8235 |
|
| 60 |
+
| 0.9522 | 0.3429 | 24 | 0.7842 |
|
| 61 |
+
| 0.7504 | 0.3857 | 27 | 0.7502 |
|
| 62 |
+
| 0.8102 | 0.4286 | 30 | 0.7306 |
|
| 63 |
+
| 0.6835 | 0.4714 | 33 | 0.7146 |
|
| 64 |
+
| 0.7594 | 0.5143 | 36 | 0.6884 |
|
| 65 |
+
| 0.73 | 0.5571 | 39 | 0.6722 |
|
| 66 |
+
| 0.6347 | 0.6 | 42 | 0.6595 |
|
| 67 |
+
| 0.7217 | 0.6429 | 45 | 0.6508 |
|
| 68 |
+
| 0.5904 | 0.6857 | 48 | 0.6390 |
|
| 69 |
+
| 0.6656 | 0.7286 | 51 | 0.6266 |
|
| 70 |
+
| 0.5857 | 0.7714 | 54 | 0.6125 |
|
| 71 |
+
| 0.6036 | 0.8143 | 57 | 0.5918 |
|
| 72 |
+
| 0.6584 | 0.8571 | 60 | 0.5849 |
|
| 73 |
+
| 0.4954 | 0.9 | 63 | 0.5746 |
|
| 74 |
+
| 0.5165 | 0.9429 | 66 | 0.5658 |
|
| 75 |
+
| 0.6184 | 0.9857 | 69 | 0.5510 |
|
| 76 |
+
| 0.4926 | 1.0286 | 72 | 0.5484 |
|
| 77 |
+
| 0.6871 | 1.0714 | 75 | 0.5458 |
|
| 78 |
+
| 0.4718 | 1.1143 | 78 | 0.5368 |
|
| 79 |
+
| 0.5862 | 1.1571 | 81 | 0.5308 |
|
| 80 |
+
| 0.5909 | 1.2 | 84 | 0.5199 |
|
| 81 |
+
| 0.4566 | 1.2429 | 87 | 0.5155 |
|
| 82 |
+
| 0.5461 | 1.2857 | 90 | 0.5048 |
|
| 83 |
+
| 0.4024 | 1.3286 | 93 | 0.5063 |
|
| 84 |
+
| 0.5349 | 1.3714 | 96 | 0.5051 |
|
| 85 |
+
| 0.5643 | 1.4143 | 99 | 0.4994 |
|
| 86 |
+
| 0.5109 | 1.4571 | 102 | 0.4937 |
|
| 87 |
+
| 0.4582 | 1.5 | 105 | 0.4988 |
|
| 88 |
+
| 0.4304 | 1.5429 | 108 | 0.4992 |
|
| 89 |
+
| 0.3849 | 1.5857 | 111 | 0.4972 |
|
| 90 |
+
| 0.5013 | 1.6286 | 114 | 0.4979 |
|
| 91 |
+
| 0.3451 | 1.6714 | 117 | 0.4944 |
|
| 92 |
+
| 0.325 | 1.7143 | 120 | 0.4910 |
|
| 93 |
+
| 0.5228 | 1.7571 | 123 | 0.4827 |
|
| 94 |
+
| 0.4379 | 1.8 | 126 | 0.4753 |
|
| 95 |
+
| 0.3186 | 1.8429 | 129 | 0.4734 |
|
| 96 |
+
| 0.6398 | 1.8857 | 132 | 0.4740 |
|
| 97 |
+
| 0.2963 | 1.9286 | 135 | 0.4777 |
|
| 98 |
+
| 0.661 | 1.9714 | 138 | 0.4770 |
|
| 99 |
+
| 0.4953 | 2.0143 | 141 | 0.4794 |
|
| 100 |
+
| 0.3564 | 2.0571 | 144 | 0.4895 |
|
| 101 |
+
| 0.25 | 2.1 | 147 | 0.4962 |
|
| 102 |
+
| 0.4717 | 2.1429 | 150 | 0.4856 |
|
| 103 |
+
| 0.3823 | 2.1857 | 153 | 0.4734 |
|
| 104 |
+
| 0.3204 | 2.2286 | 156 | 0.4689 |
|
| 105 |
+
| 0.2621 | 2.2714 | 159 | 0.4662 |
|
| 106 |
+
| 0.2568 | 2.3143 | 162 | 0.4676 |
|
| 107 |
+
| 0.3661 | 2.3571 | 165 | 0.4713 |
|
| 108 |
+
| 0.5833 | 2.4 | 168 | 0.4691 |
|
| 109 |
+
| 0.3607 | 2.4429 | 171 | 0.4656 |
|
| 110 |
+
| 0.3806 | 2.4857 | 174 | 0.4667 |
|
| 111 |
+
| 0.2769 | 2.5286 | 177 | 0.4682 |
|
| 112 |
+
| 0.3407 | 2.5714 | 180 | 0.4703 |
|
| 113 |
+
| 0.3535 | 2.6143 | 183 | 0.4710 |
|
| 114 |
+
| 0.3801 | 2.6571 | 186 | 0.4749 |
|
| 115 |
+
| 0.3686 | 2.7 | 189 | 0.4748 |
|
| 116 |
+
| 0.4201 | 2.7429 | 192 | 0.4673 |
|
| 117 |
+
| 0.4519 | 2.7857 | 195 | 0.4634 |
|
| 118 |
+
| 0.3307 | 2.8286 | 198 | 0.4596 |
|
| 119 |
+
| 0.2853 | 2.8714 | 201 | 0.4584 |
|
| 120 |
+
| 0.4844 | 2.9143 | 204 | 0.4567 |
|
| 121 |
+
| 0.3521 | 2.9571 | 207 | 0.4549 |
|
| 122 |
+
| 0.6176 | 3.0 | 210 | 0.4519 |
|
| 123 |
+
| 0.298 | 3.0429 | 213 | 0.4505 |
|
| 124 |
+
| 0.3371 | 3.0857 | 216 | 0.4505 |
|
| 125 |
+
| 0.2549 | 3.1286 | 219 | 0.4519 |
|
| 126 |
+
| 0.3271 | 3.1714 | 222 | 0.4555 |
|
| 127 |
+
| 0.3472 | 3.2143 | 225 | 0.4596 |
|
| 128 |
+
| 0.3883 | 3.2571 | 228 | 0.4607 |
|
| 129 |
+
| 0.285 | 3.3 | 231 | 0.4626 |
|
| 130 |
+
| 0.3243 | 3.3429 | 234 | 0.4642 |
|
| 131 |
+
| 0.3385 | 3.3857 | 237 | 0.4644 |
|
| 132 |
+
| 0.356 | 3.4286 | 240 | 0.4663 |
|
| 133 |
+
| 0.2939 | 3.4714 | 243 | 0.4669 |
|
| 134 |
+
| 0.2338 | 3.5143 | 246 | 0.4677 |
|
| 135 |
+
| 0.4161 | 3.5571 | 249 | 0.4660 |
|
| 136 |
+
| 0.2494 | 3.6 | 252 | 0.4669 |
|
| 137 |
+
| 0.2844 | 3.6429 | 255 | 0.4661 |
|
| 138 |
+
| 0.2141 | 3.6857 | 258 | 0.4643 |
|
| 139 |
+
| 0.2961 | 3.7286 | 261 | 0.4639 |
|
| 140 |
+
| 0.2751 | 3.7714 | 264 | 0.4640 |
|
| 141 |
+
| 0.288 | 3.8143 | 267 | 0.4636 |
|
| 142 |
+
| 0.5415 | 3.8571 | 270 | 0.4641 |
|
| 143 |
+
| 0.2016 | 3.9 | 273 | 0.4634 |
|
| 144 |
+
| 0.3921 | 3.9429 | 276 | 0.4640 |
|
| 145 |
+
| 0.4504 | 3.9857 | 279 | 0.4642 |
|
| 146 |
|
| 147 |
|
| 148 |
### Framework versions
|
adapter_config.json
CHANGED
|
@@ -20,13 +20,13 @@
|
|
| 20 |
"rank_pattern": {},
|
| 21 |
"revision": null,
|
| 22 |
"target_modules": [
|
| 23 |
-
"o_proj",
|
| 24 |
"q_proj",
|
| 25 |
-
"
|
| 26 |
"down_proj",
|
|
|
|
| 27 |
"v_proj",
|
| 28 |
-
"
|
| 29 |
-
"
|
| 30 |
],
|
| 31 |
"task_type": "CAUSAL_LM",
|
| 32 |
"use_dora": false,
|
|
|
|
| 20 |
"rank_pattern": {},
|
| 21 |
"revision": null,
|
| 22 |
"target_modules": [
|
|
|
|
| 23 |
"q_proj",
|
| 24 |
+
"k_proj",
|
| 25 |
"down_proj",
|
| 26 |
+
"gate_proj",
|
| 27 |
"v_proj",
|
| 28 |
+
"up_proj",
|
| 29 |
+
"o_proj"
|
| 30 |
],
|
| 31 |
"task_type": "CAUSAL_LM",
|
| 32 |
"use_dora": false,
|
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 35668592
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:296a557e8ac7fa73beecea698789e37f5d243c1734c4c8e77454c03a84b64de1
|
| 3 |
size 35668592
|
training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 5432
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3d14206dd3da4e8472af498c9b36118033550bef4ae9b4edb0f99f7664786f84
|
| 3 |
size 5432
|