End of training
Browse files- README.md +18 -25
- adapter_config.json +4 -4
- adapter_model.bin +1 -1
- adapter_model.safetensors +1 -1
- training_args.bin +1 -1
README.md
CHANGED
|
@@ -88,7 +88,7 @@ s2_attention: null
|
|
| 88 |
sample_packing: false
|
| 89 |
save_steps: 150
|
| 90 |
saves_per_epoch: null
|
| 91 |
-
sequence_len:
|
| 92 |
strict: false
|
| 93 |
tf32: true
|
| 94 |
tokenizer_type: AutoTokenizer
|
|
@@ -113,7 +113,7 @@ xformers_attention: null
|
|
| 113 |
|
| 114 |
This model is a fine-tuned version of [peft-internal-testing/tiny-dummy-qwen2](https://huggingface.co/peft-internal-testing/tiny-dummy-qwen2) on the None dataset.
|
| 115 |
It achieves the following results on the evaluation set:
|
| 116 |
-
- Loss: 11.
|
| 117 |
|
| 118 |
## Model description
|
| 119 |
|
|
@@ -147,29 +147,22 @@ The following hyperparameters were used during training:
|
|
| 147 |
|
| 148 |
| Training Loss | Epoch | Step | Validation Loss |
|
| 149 |
|:-------------:|:-------:|:----:|:---------------:|
|
| 150 |
-
| No log | 0.0083 | 1 | 11.
|
| 151 |
-
| 12.
|
| 152 |
-
| 11.
|
| 153 |
-
| 11.
|
| 154 |
-
| 11.
|
| 155 |
-
| 12.0835 | 6.2370 | 750 | 11.
|
| 156 |
-
| 11.
|
| 157 |
-
| 11.
|
| 158 |
-
| 11.
|
| 159 |
-
| 12.
|
| 160 |
-
| 11.
|
| 161 |
-
| 11.
|
| 162 |
-
| 11.8948 | 14.9688 | 1800 | 11.
|
| 163 |
-
| 12.
|
| 164 |
-
| 11.8973 | 17.4636 | 2100 | 11.
|
| 165 |
-
| 11.
|
| 166 |
-
| 11.8985 | 19.9584 | 2400 | 11.8980 |
|
| 167 |
-
| 12.0808 | 21.2058 | 2550 | 11.8978 |
|
| 168 |
-
| 11.8989 | 22.4532 | 2700 | 11.8977 |
|
| 169 |
-
| 11.9002 | 23.7006 | 2850 | 11.8976 |
|
| 170 |
-
| 11.8964 | 24.9480 | 3000 | 11.8970 |
|
| 171 |
-
| 12.076 | 26.1954 | 3150 | 11.8974 |
|
| 172 |
-
| 11.8944 | 27.4428 | 3300 | 11.8973 |
|
| 173 |
|
| 174 |
|
| 175 |
### Framework versions
|
|
|
|
| 88 |
sample_packing: false
|
| 89 |
save_steps: 150
|
| 90 |
saves_per_epoch: null
|
| 91 |
+
sequence_len: 1024
|
| 92 |
strict: false
|
| 93 |
tf32: true
|
| 94 |
tokenizer_type: AutoTokenizer
|
|
|
|
| 113 |
|
| 114 |
This model is a fine-tuned version of [peft-internal-testing/tiny-dummy-qwen2](https://huggingface.co/peft-internal-testing/tiny-dummy-qwen2) on the None dataset.
|
| 115 |
It achieves the following results on the evaluation set:
|
| 116 |
+
- Loss: 11.8980
|
| 117 |
|
| 118 |
## Model description
|
| 119 |
|
|
|
|
| 147 |
|
| 148 |
| Training Loss | Epoch | Step | Validation Loss |
|
| 149 |
|:-------------:|:-------:|:----:|:---------------:|
|
| 150 |
+
| No log | 0.0083 | 1 | 11.9304 |
|
| 151 |
+
| 12.0987 | 1.2474 | 150 | 11.9076 |
|
| 152 |
+
| 11.9124 | 2.4948 | 300 | 11.9027 |
|
| 153 |
+
| 11.9043 | 3.7422 | 450 | 11.9013 |
|
| 154 |
+
| 11.8992 | 4.9896 | 600 | 11.9006 |
|
| 155 |
+
| 12.0835 | 6.2370 | 750 | 11.8999 |
|
| 156 |
+
| 11.9004 | 7.4844 | 900 | 11.8995 |
|
| 157 |
+
| 11.8977 | 8.7318 | 1050 | 11.8993 |
|
| 158 |
+
| 11.9026 | 9.9792 | 1200 | 11.8991 |
|
| 159 |
+
| 12.0817 | 11.2266 | 1350 | 11.8989 |
|
| 160 |
+
| 11.8973 | 12.4740 | 1500 | 11.8987 |
|
| 161 |
+
| 11.8949 | 13.7214 | 1650 | 11.8983 |
|
| 162 |
+
| 11.8948 | 14.9688 | 1800 | 11.8980 |
|
| 163 |
+
| 12.0731 | 16.2162 | 1950 | 11.8979 |
|
| 164 |
+
| 11.8973 | 17.4636 | 2100 | 11.8981 |
|
| 165 |
+
| 11.9018 | 18.7110 | 2250 | 11.8980 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 166 |
|
| 167 |
|
| 168 |
### Framework versions
|
adapter_config.json
CHANGED
|
@@ -20,13 +20,13 @@
|
|
| 20 |
"rank_pattern": {},
|
| 21 |
"revision": null,
|
| 22 |
"target_modules": [
|
|
|
|
| 23 |
"up_proj",
|
| 24 |
"v_proj",
|
| 25 |
-
"o_proj",
|
| 26 |
-
"q_proj",
|
| 27 |
-
"k_proj",
|
| 28 |
"gate_proj",
|
| 29 |
-
"
|
|
|
|
|
|
|
| 30 |
],
|
| 31 |
"task_type": "CAUSAL_LM",
|
| 32 |
"use_dora": false,
|
|
|
|
| 20 |
"rank_pattern": {},
|
| 21 |
"revision": null,
|
| 22 |
"target_modules": [
|
| 23 |
+
"q_proj",
|
| 24 |
"up_proj",
|
| 25 |
"v_proj",
|
|
|
|
|
|
|
|
|
|
| 26 |
"gate_proj",
|
| 27 |
+
"o_proj",
|
| 28 |
+
"down_proj",
|
| 29 |
+
"k_proj"
|
| 30 |
],
|
| 31 |
"task_type": "CAUSAL_LM",
|
| 32 |
"use_dora": false,
|
adapter_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 55170
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:165aa5a7fe839e33187b0a24780193720ab54c01ee9cd9e7439bd62b11559d96
|
| 3 |
size 55170
|
adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 48552
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e31650036ec9efb202ee8636e1531426a51e5266c17e790c73a54a89444ea43f
|
| 3 |
size 48552
|
training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 6776
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:725245d9276b1b31065d52f0424e7d9738d3ec36c6b7c2667247ce2d916acdc3
|
| 3 |
size 6776
|