sujithatz/finbot-transofrmer-based-phi3.5_adapter

Browse files

Files changed (4) hide show

README.md +94 -46
adapter_config.json +4 -4
adapter_model.safetensors +1 -1
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -18,7 +18,7 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [microsoft/Phi-3.5-mini-instruct](https://huggingface.co/microsoft/Phi-3.5-mini-instruct) on an unknown dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.3593
 ## Model description
@@ -50,51 +50,99 @@ The following hyperparameters were used during training:
 | Training Loss | Epoch  | Step | Validation Loss |
 |:-------------:|:------:|:----:|:---------------:|
-| 1.5876        | 0.0882 | 3    | 1.4543          |
-| 1.4605        | 0.1765 | 6    | 1.1344          |
-| 1.0858        | 0.2647 | 9    | 0.9486          |
-| 0.8519        | 0.3529 | 12   | 0.7968          |
-| 0.7643        | 0.4412 | 15   | 0.6816          |
-| 0.6301        | 0.5294 | 18   | 0.5971          |
-| 0.5894        | 0.6176 | 21   | 0.5525          |
-| 0.5231        | 0.7059 | 24   | 0.5280          |
-| 0.4116        | 0.7941 | 27   | 0.5061          |
-| 0.5387        | 0.8824 | 30   | 0.4783          |
-| 0.4307        | 0.9706 | 33   | 0.4522          |
-| 0.3153        | 1.0588 | 36   | 0.4465          |
-| 0.3462        | 1.1471 | 39   | 0.4449          |
-| 0.3016        | 1.2353 | 42   | 0.4355          |
-| 0.3467        | 1.3235 | 45   | 0.4190          |
-| 0.3376        | 1.4118 | 48   | 0.4136          |
-| 0.3456        | 1.5    | 51   | 0.4012          |
-| 0.3881        | 1.5882 | 54   | 0.3960          |
-| 0.3482        | 1.6765 | 57   | 0.3979          |
-| 0.3376        | 1.7647 | 60   | 0.3924          |
-| 0.3401        | 1.8529 | 63   | 0.3841          |
-| 0.3353        | 1.9412 | 66   | 0.3742          |
-| 0.2029        | 2.0294 | 69   | 0.3745          |
-| 0.2615        | 2.1176 | 72   | 0.3771          |
-| 0.2548        | 2.2059 | 75   | 0.3714          |
-| 0.1868        | 2.2941 | 78   | 0.3702          |
-| 0.2085        | 2.3824 | 81   | 0.3726          |
-| 0.2683        | 2.4706 | 84   | 0.3698          |
-| 0.1547        | 2.5588 | 87   | 0.3695          |
-| 0.2186        | 2.6471 | 90   | 0.3691          |
-| 0.1796        | 2.7353 | 93   | 0.3658          |
-| 0.2604        | 2.8235 | 96   | 0.3596          |
-| 0.2752        | 2.9118 | 99   | 0.3532          |
-| 0.2562        | 3.0    | 102  | 0.3507          |
-| 0.2916        | 3.0882 | 105  | 0.3495          |
-| 0.2361        | 3.1765 | 108  | 0.3497          |
-| 0.1147        | 3.2647 | 111  | 0.3513          |
-| 0.1569        | 3.3529 | 114  | 0.3536          |
-| 0.2173        | 3.4412 | 117  | 0.3562          |
-| 0.2192        | 3.5294 | 120  | 0.3574          |
-| 0.2203        | 3.6176 | 123  | 0.3583          |
-| 0.1551        | 3.7059 | 126  | 0.3588          |
-| 0.1516        | 3.7941 | 129  | 0.3586          |
-| 0.1704        | 3.8824 | 132  | 0.3582          |
-| 0.1869        | 3.9706 | 135  | 0.3593          |
 ### Framework versions

 This model is a fine-tuned version of [microsoft/Phi-3.5-mini-instruct](https://huggingface.co/microsoft/Phi-3.5-mini-instruct) on an unknown dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.4642
 ## Model description
 | Training Loss | Epoch  | Step | Validation Loss |
 |:-------------:|:------:|:----:|:---------------:|
+| 1.675         | 0.0429 | 3    | 1.6620          |
+| 1.6239        | 0.0857 | 6    | 1.3707          |
+| 1.2962        | 0.1286 | 9    | 1.1772          |
+| 1.0739        | 0.1714 | 12   | 1.0452          |
+| 1.0342        | 0.2143 | 15   | 0.9612          |
+| 0.8244        | 0.2571 | 18   | 0.8836          |
+| 0.8398        | 0.3    | 21   | 0.8235          |
+| 0.9522        | 0.3429 | 24   | 0.7842          |
+| 0.7504        | 0.3857 | 27   | 0.7502          |
+| 0.8102        | 0.4286 | 30   | 0.7306          |
+| 0.6835        | 0.4714 | 33   | 0.7146          |
+| 0.7594        | 0.5143 | 36   | 0.6884          |
+| 0.73          | 0.5571 | 39   | 0.6722          |
+| 0.6347        | 0.6    | 42   | 0.6595          |
+| 0.7217        | 0.6429 | 45   | 0.6508          |
+| 0.5904        | 0.6857 | 48   | 0.6390          |
+| 0.6656        | 0.7286 | 51   | 0.6266          |
+| 0.5857        | 0.7714 | 54   | 0.6125          |
+| 0.6036        | 0.8143 | 57   | 0.5918          |
+| 0.6584        | 0.8571 | 60   | 0.5849          |
+| 0.4954        | 0.9    | 63   | 0.5746          |
+| 0.5165        | 0.9429 | 66   | 0.5658          |
+| 0.6184        | 0.9857 | 69   | 0.5510          |
+| 0.4926        | 1.0286 | 72   | 0.5484          |
+| 0.6871        | 1.0714 | 75   | 0.5458          |
+| 0.4718        | 1.1143 | 78   | 0.5368          |
+| 0.5862        | 1.1571 | 81   | 0.5308          |
+| 0.5909        | 1.2    | 84   | 0.5199          |
+| 0.4566        | 1.2429 | 87   | 0.5155          |
+| 0.5461        | 1.2857 | 90   | 0.5048          |
+| 0.4024        | 1.3286 | 93   | 0.5063          |
+| 0.5349        | 1.3714 | 96   | 0.5051          |
+| 0.5643        | 1.4143 | 99   | 0.4994          |
+| 0.5109        | 1.4571 | 102  | 0.4937          |
+| 0.4582        | 1.5    | 105  | 0.4988          |
+| 0.4304        | 1.5429 | 108  | 0.4992          |
+| 0.3849        | 1.5857 | 111  | 0.4972          |
+| 0.5013        | 1.6286 | 114  | 0.4979          |
+| 0.3451        | 1.6714 | 117  | 0.4944          |
+| 0.325         | 1.7143 | 120  | 0.4910          |
+| 0.5228        | 1.7571 | 123  | 0.4827          |
+| 0.4379        | 1.8    | 126  | 0.4753          |
+| 0.3186        | 1.8429 | 129  | 0.4734          |
+| 0.6398        | 1.8857 | 132  | 0.4740          |
+| 0.2963        | 1.9286 | 135  | 0.4777          |
+| 0.661         | 1.9714 | 138  | 0.4770          |
+| 0.4953        | 2.0143 | 141  | 0.4794          |
+| 0.3564        | 2.0571 | 144  | 0.4895          |
+| 0.25          | 2.1    | 147  | 0.4962          |
+| 0.4717        | 2.1429 | 150  | 0.4856          |
+| 0.3823        | 2.1857 | 153  | 0.4734          |
+| 0.3204        | 2.2286 | 156  | 0.4689          |
+| 0.2621        | 2.2714 | 159  | 0.4662          |
+| 0.2568        | 2.3143 | 162  | 0.4676          |
+| 0.3661        | 2.3571 | 165  | 0.4713          |
+| 0.5833        | 2.4    | 168  | 0.4691          |
+| 0.3607        | 2.4429 | 171  | 0.4656          |
+| 0.3806        | 2.4857 | 174  | 0.4667          |
+| 0.2769        | 2.5286 | 177  | 0.4682          |
+| 0.3407        | 2.5714 | 180  | 0.4703          |
+| 0.3535        | 2.6143 | 183  | 0.4710          |
+| 0.3801        | 2.6571 | 186  | 0.4749          |
+| 0.3686        | 2.7    | 189  | 0.4748          |
+| 0.4201        | 2.7429 | 192  | 0.4673          |
+| 0.4519        | 2.7857 | 195  | 0.4634          |
+| 0.3307        | 2.8286 | 198  | 0.4596          |
+| 0.2853        | 2.8714 | 201  | 0.4584          |
+| 0.4844        | 2.9143 | 204  | 0.4567          |
+| 0.3521        | 2.9571 | 207  | 0.4549          |
+| 0.6176        | 3.0    | 210  | 0.4519          |
+| 0.298         | 3.0429 | 213  | 0.4505          |
+| 0.3371        | 3.0857 | 216  | 0.4505          |
+| 0.2549        | 3.1286 | 219  | 0.4519          |
+| 0.3271        | 3.1714 | 222  | 0.4555          |
+| 0.3472        | 3.2143 | 225  | 0.4596          |
+| 0.3883        | 3.2571 | 228  | 0.4607          |
+| 0.285         | 3.3    | 231  | 0.4626          |
+| 0.3243        | 3.3429 | 234  | 0.4642          |
+| 0.3385        | 3.3857 | 237  | 0.4644          |
+| 0.356         | 3.4286 | 240  | 0.4663          |
+| 0.2939        | 3.4714 | 243  | 0.4669          |
+| 0.2338        | 3.5143 | 246  | 0.4677          |
+| 0.4161        | 3.5571 | 249  | 0.4660          |
+| 0.2494        | 3.6    | 252  | 0.4669          |
+| 0.2844        | 3.6429 | 255  | 0.4661          |
+| 0.2141        | 3.6857 | 258  | 0.4643          |
+| 0.2961        | 3.7286 | 261  | 0.4639          |
+| 0.2751        | 3.7714 | 264  | 0.4640          |
+| 0.288         | 3.8143 | 267  | 0.4636          |
+| 0.5415        | 3.8571 | 270  | 0.4641          |
+| 0.2016        | 3.9    | 273  | 0.4634          |
+| 0.3921        | 3.9429 | 276  | 0.4640          |
+| 0.4504        | 3.9857 | 279  | 0.4642          |
 ### Framework versions

adapter_config.json CHANGED Viewed

@@ -20,13 +20,13 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "o_proj",
     "q_proj",
-    "up_proj",
     "down_proj",
     "v_proj",
-    "k_proj",
-    "gate_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "q_proj",
+    "k_proj",
     "down_proj",
+    "gate_proj",
     "v_proj",
+    "up_proj",
+    "o_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:df72f74aea741b820aaf622e35ae6e7f7b2083dc5eae15fcf3cfefb4ecc448ea
 size 35668592

 version https://git-lfs.github.com/spec/v1
+oid sha256:296a557e8ac7fa73beecea698789e37f5d243c1734c4c8e77454c03a84b64de1
 size 35668592

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:66e852e7b2a2c4c3570755ff4973a9d0245d4fce04315eb3b54bd7345731285f
 size 5432

 version https://git-lfs.github.com/spec/v1
+oid sha256:3d14206dd3da4e8472af498c9b36118033550bef4ae9b4edb0f99f7664786f84
 size 5432