rahul avaghan commited on
Commit
1948063
·
1 Parent(s): 46a1859

Revert "rahul4862/adapter-phi-3-mini"

Browse files

This reverts commit 46a18595a28a0125ee568d156556ab29f55fe9e0.

README.md CHANGED
@@ -18,7 +18,7 @@ should probably proofread and complete it, then remove this comment. -->
18
 
19
  This model is a fine-tuned version of [microsoft/Phi-3-mini-4k-instruct](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
- - Loss: 1.6901
22
 
23
  ## Model description
24
 
@@ -52,16 +52,43 @@ The following hyperparameters were used during training:
52
 
53
  | Training Loss | Epoch | Step | Validation Loss |
54
  |:-------------:|:------:|:----:|:---------------:|
55
- | 1.8562 | 0.6734 | 100 | 1.7290 |
56
- | 1.7394 | 1.3468 | 200 | 1.7045 |
57
- | 1.7176 | 2.0202 | 300 | 1.6950 |
58
- | 1.7102 | 2.6936 | 400 | 1.6901 |
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
 
60
 
61
  ### Framework versions
62
 
63
  - PEFT 0.12.0
64
- - Transformers 4.43.4
65
  - Pytorch 2.3.1+cu121
66
  - Datasets 2.20.0
67
  - Tokenizers 0.19.1
 
18
 
19
  This model is a fine-tuned version of [microsoft/Phi-3-mini-4k-instruct](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct) on an unknown dataset.
20
  It achieves the following results on the evaluation set:
21
+ - Loss: 1.7121
22
 
23
  ## Model description
24
 
 
52
 
53
  | Training Loss | Epoch | Step | Validation Loss |
54
  |:-------------:|:------:|:----:|:---------------:|
55
+ | 1.94 | 0.0953 | 100 | 1.8918 |
56
+ | 1.8113 | 0.1907 | 200 | 1.7877 |
57
+ | 1.7578 | 0.2860 | 300 | 1.7701 |
58
+ | 1.756 | 0.3813 | 400 | 1.7637 |
59
+ | 1.7632 | 0.4766 | 500 | 1.7582 |
60
+ | 1.7477 | 0.5720 | 600 | 1.7542 |
61
+ | 1.7605 | 0.6673 | 700 | 1.7508 |
62
+ | 1.7312 | 0.7626 | 800 | 1.7482 |
63
+ | 1.7315 | 0.8580 | 900 | 1.7439 |
64
+ | 1.7148 | 0.9533 | 1000 | 1.7414 |
65
+ | 1.7263 | 1.0486 | 1100 | 1.7385 |
66
+ | 1.7184 | 1.1439 | 1200 | 1.7361 |
67
+ | 1.7187 | 1.2393 | 1300 | 1.7336 |
68
+ | 1.7231 | 1.3346 | 1400 | 1.7313 |
69
+ | 1.7433 | 1.4299 | 1500 | 1.7290 |
70
+ | 1.6962 | 1.5253 | 1600 | 1.7268 |
71
+ | 1.7136 | 1.6206 | 1700 | 1.7253 |
72
+ | 1.6969 | 1.7159 | 1800 | 1.7236 |
73
+ | 1.7028 | 1.8112 | 1900 | 1.7217 |
74
+ | 1.7066 | 1.9066 | 2000 | 1.7200 |
75
+ | 1.7123 | 2.0019 | 2100 | 1.7191 |
76
+ | 1.7005 | 2.0972 | 2200 | 1.7178 |
77
+ | 1.7052 | 2.1926 | 2300 | 1.7168 |
78
+ | 1.6946 | 2.2879 | 2400 | 1.7160 |
79
+ | 1.6728 | 2.3832 | 2500 | 1.7150 |
80
+ | 1.7033 | 2.4786 | 2600 | 1.7144 |
81
+ | 1.6893 | 2.5739 | 2700 | 1.7136 |
82
+ | 1.7206 | 2.6692 | 2800 | 1.7129 |
83
+ | 1.6747 | 2.7645 | 2900 | 1.7126 |
84
+ | 1.6981 | 2.8599 | 3000 | 1.7123 |
85
+ | 1.6928 | 2.9552 | 3100 | 1.7121 |
86
 
87
 
88
  ### Framework versions
89
 
90
  - PEFT 0.12.0
91
+ - Transformers 4.43.3
92
  - Pytorch 2.3.1+cu121
93
  - Datasets 2.20.0
94
  - Tokenizers 0.19.1
adapter_config.json CHANGED
@@ -20,13 +20,13 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "gate_proj",
24
- "q_proj",
25
  "up_proj",
26
- "down_proj",
27
  "o_proj",
28
  "v_proj",
29
- "k_proj"
 
 
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
+ "k_proj",
 
24
  "up_proj",
 
25
  "o_proj",
26
  "v_proj",
27
+ "q_proj",
28
+ "gate_proj",
29
+ "down_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5794232b2ee5b76e73dbc7b568f45c5e7bde407a8676e5fe8a91fb0c665a951e
3
  size 35668592
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2c9769cc3e20938c52e84856d9503dc5c0ac655b4ca5e07772aae1c58f008e9
3
  size 35668592
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c73541313d0c94189bf7c46f17749d6f3c1981d626f56c44419af43a8ac2def6
3
  size 5432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66b1502ea703ca20bf23fe8283e9579a8a32ae8648447b71e55cb6e204ceabec
3
  size 5432