baby-dev commited on
Commit
c726df5
·
verified ·
1 Parent(s): 74ea2f4

End of training

Browse files
README.md CHANGED
@@ -88,7 +88,7 @@ s2_attention: null
88
  sample_packing: false
89
  save_steps: 150
90
  saves_per_epoch: null
91
- sequence_len: 512
92
  strict: false
93
  tf32: true
94
  tokenizer_type: AutoTokenizer
@@ -113,7 +113,7 @@ xformers_attention: null
113
 
114
  This model is a fine-tuned version of [peft-internal-testing/tiny-dummy-qwen2](https://huggingface.co/peft-internal-testing/tiny-dummy-qwen2) on the None dataset.
115
  It achieves the following results on the evaluation set:
116
- - Loss: 11.8973
117
 
118
  ## Model description
119
 
@@ -147,29 +147,22 @@ The following hyperparameters were used during training:
147
 
148
  | Training Loss | Epoch | Step | Validation Loss |
149
  |:-------------:|:-------:|:----:|:---------------:|
150
- | No log | 0.0083 | 1 | 11.9303 |
151
- | 12.0988 | 1.2474 | 150 | 11.9072 |
152
- | 11.9131 | 2.4948 | 300 | 11.9031 |
153
- | 11.9047 | 3.7422 | 450 | 11.9016 |
154
- | 11.8997 | 4.9896 | 600 | 11.9008 |
155
- | 12.0835 | 6.2370 | 750 | 11.9000 |
156
- | 11.9007 | 7.4844 | 900 | 11.8994 |
157
- | 11.8979 | 8.7318 | 1050 | 11.8993 |
158
- | 11.9027 | 9.9792 | 1200 | 11.8992 |
159
- | 12.0819 | 11.2266 | 1350 | 11.8992 |
160
- | 11.8975 | 12.4740 | 1500 | 11.8990 |
161
- | 11.895 | 13.7214 | 1650 | 11.8986 |
162
- | 11.8948 | 14.9688 | 1800 | 11.8983 |
163
- | 12.073 | 16.2162 | 1950 | 11.8983 |
164
- | 11.8973 | 17.4636 | 2100 | 11.8983 |
165
- | 11.9017 | 18.7110 | 2250 | 11.8980 |
166
- | 11.8985 | 19.9584 | 2400 | 11.8980 |
167
- | 12.0808 | 21.2058 | 2550 | 11.8978 |
168
- | 11.8989 | 22.4532 | 2700 | 11.8977 |
169
- | 11.9002 | 23.7006 | 2850 | 11.8976 |
170
- | 11.8964 | 24.9480 | 3000 | 11.8970 |
171
- | 12.076 | 26.1954 | 3150 | 11.8974 |
172
- | 11.8944 | 27.4428 | 3300 | 11.8973 |
173
 
174
 
175
  ### Framework versions
 
88
  sample_packing: false
89
  save_steps: 150
90
  saves_per_epoch: null
91
+ sequence_len: 1024
92
  strict: false
93
  tf32: true
94
  tokenizer_type: AutoTokenizer
 
113
 
114
  This model is a fine-tuned version of [peft-internal-testing/tiny-dummy-qwen2](https://huggingface.co/peft-internal-testing/tiny-dummy-qwen2) on the None dataset.
115
  It achieves the following results on the evaluation set:
116
+ - Loss: 11.8980
117
 
118
  ## Model description
119
 
 
147
 
148
  | Training Loss | Epoch | Step | Validation Loss |
149
  |:-------------:|:-------:|:----:|:---------------:|
150
+ | No log | 0.0083 | 1 | 11.9304 |
151
+ | 12.0987 | 1.2474 | 150 | 11.9076 |
152
+ | 11.9124 | 2.4948 | 300 | 11.9027 |
153
+ | 11.9043 | 3.7422 | 450 | 11.9013 |
154
+ | 11.8992 | 4.9896 | 600 | 11.9006 |
155
+ | 12.0835 | 6.2370 | 750 | 11.8999 |
156
+ | 11.9004 | 7.4844 | 900 | 11.8995 |
157
+ | 11.8977 | 8.7318 | 1050 | 11.8993 |
158
+ | 11.9026 | 9.9792 | 1200 | 11.8991 |
159
+ | 12.0817 | 11.2266 | 1350 | 11.8989 |
160
+ | 11.8973 | 12.4740 | 1500 | 11.8987 |
161
+ | 11.8949 | 13.7214 | 1650 | 11.8983 |
162
+ | 11.8948 | 14.9688 | 1800 | 11.8980 |
163
+ | 12.0731 | 16.2162 | 1950 | 11.8979 |
164
+ | 11.8973 | 17.4636 | 2100 | 11.8981 |
165
+ | 11.9018 | 18.7110 | 2250 | 11.8980 |
 
 
 
 
 
 
 
166
 
167
 
168
  ### Framework versions
adapter_config.json CHANGED
@@ -20,13 +20,13 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
 
23
  "up_proj",
24
  "v_proj",
25
- "o_proj",
26
- "q_proj",
27
- "k_proj",
28
  "gate_proj",
29
- "down_proj"
 
 
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
+ "q_proj",
24
  "up_proj",
25
  "v_proj",
 
 
 
26
  "gate_proj",
27
+ "o_proj",
28
+ "down_proj",
29
+ "k_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
32
  "use_dora": false,
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8719e910058d7ba6d3435444bd260fd79a7bdb7b7c65084c67fc07319ab1e51a
3
  size 55170
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:165aa5a7fe839e33187b0a24780193720ab54c01ee9cd9e7439bd62b11559d96
3
  size 55170
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:81e5ab98e42c7e76cc4f5d889c4fb95cc270a249936b7ec6cefbaf3b9db9259e
3
  size 48552
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e31650036ec9efb202ee8636e1531426a51e5266c17e790c73a54a89444ea43f
3
  size 48552
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5a5ec26a829c75ef841327fb0f732cf98b837e8f8b5bcfa618f2b4b10d9c95ec
3
  size 6776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:725245d9276b1b31065d52f0424e7d9738d3ec36c6b7c2667247ce2d916acdc3
3
  size 6776