End of training

Browse files

Files changed (5) hide show

README.md +18 -25
adapter_config.json +4 -4
adapter_model.bin +1 -1
adapter_model.safetensors +1 -1
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -88,7 +88,7 @@ s2_attention: null
 sample_packing: false
 save_steps: 150
 saves_per_epoch: null
-sequence_len: 512
 strict: false
 tf32: true
 tokenizer_type: AutoTokenizer
@@ -113,7 +113,7 @@ xformers_attention: null
 This model is a fine-tuned version of [peft-internal-testing/tiny-dummy-qwen2](https://huggingface.co/peft-internal-testing/tiny-dummy-qwen2) on the None dataset.
 It achieves the following results on the evaluation set:
-- Loss: 11.8973
 ## Model description
@@ -147,29 +147,22 @@ The following hyperparameters were used during training:
 | Training Loss | Epoch   | Step | Validation Loss |
 |:-------------:|:-------:|:----:|:---------------:|
-| No log        | 0.0083  | 1    | 11.9303         |
-| 12.0988       | 1.2474  | 150  | 11.9072         |
-| 11.9131       | 2.4948  | 300  | 11.9031         |
-| 11.9047       | 3.7422  | 450  | 11.9016         |
-| 11.8997       | 4.9896  | 600  | 11.9008         |
-| 12.0835       | 6.2370  | 750  | 11.9000         |
-| 11.9007       | 7.4844  | 900  | 11.8994         |
-| 11.8979       | 8.7318  | 1050 | 11.8993         |
-| 11.9027       | 9.9792  | 1200 | 11.8992         |
-| 12.0819       | 11.2266 | 1350 | 11.8992         |
-| 11.8975       | 12.4740 | 1500 | 11.8990         |
-| 11.895        | 13.7214 | 1650 | 11.8986         |
-| 11.8948       | 14.9688 | 1800 | 11.8983         |
-| 12.073        | 16.2162 | 1950 | 11.8983         |
-| 11.8973       | 17.4636 | 2100 | 11.8983         |
-| 11.9017       | 18.7110 | 2250 | 11.8980         |
-| 11.8985       | 19.9584 | 2400 | 11.8980         |
-| 12.0808       | 21.2058 | 2550 | 11.8978         |
-| 11.8989       | 22.4532 | 2700 | 11.8977         |
-| 11.9002       | 23.7006 | 2850 | 11.8976         |
-| 11.8964       | 24.9480 | 3000 | 11.8970         |
-| 12.076        | 26.1954 | 3150 | 11.8974         |
-| 11.8944       | 27.4428 | 3300 | 11.8973         |
 ### Framework versions

 sample_packing: false
 save_steps: 150
 saves_per_epoch: null
+sequence_len: 1024
 strict: false
 tf32: true
 tokenizer_type: AutoTokenizer
 This model is a fine-tuned version of [peft-internal-testing/tiny-dummy-qwen2](https://huggingface.co/peft-internal-testing/tiny-dummy-qwen2) on the None dataset.
 It achieves the following results on the evaluation set:
+- Loss: 11.8980
 ## Model description
 | Training Loss | Epoch   | Step | Validation Loss |
 |:-------------:|:-------:|:----:|:---------------:|
+| No log        | 0.0083  | 1    | 11.9304         |
+| 12.0987       | 1.2474  | 150  | 11.9076         |
+| 11.9124       | 2.4948  | 300  | 11.9027         |
+| 11.9043       | 3.7422  | 450  | 11.9013         |
+| 11.8992       | 4.9896  | 600  | 11.9006         |
+| 12.0835       | 6.2370  | 750  | 11.8999         |
+| 11.9004       | 7.4844  | 900  | 11.8995         |
+| 11.8977       | 8.7318  | 1050 | 11.8993         |
+| 11.9026       | 9.9792  | 1200 | 11.8991         |
+| 12.0817       | 11.2266 | 1350 | 11.8989         |
+| 11.8973       | 12.4740 | 1500 | 11.8987         |
+| 11.8949       | 13.7214 | 1650 | 11.8983         |
+| 11.8948       | 14.9688 | 1800 | 11.8980         |
+| 12.0731       | 16.2162 | 1950 | 11.8979         |
+| 11.8973       | 17.4636 | 2100 | 11.8981         |
+| 11.9018       | 18.7110 | 2250 | 11.8980         |
 ### Framework versions

adapter_config.json CHANGED Viewed

@@ -20,13 +20,13 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "up_proj",
     "v_proj",
-    "o_proj",
-    "q_proj",
-    "k_proj",
     "gate_proj",
-    "down_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "q_proj",
     "up_proj",
     "v_proj",
     "gate_proj",
+    "o_proj",
+    "down_proj",
+    "k_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

adapter_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8719e910058d7ba6d3435444bd260fd79a7bdb7b7c65084c67fc07319ab1e51a
 size 55170

 version https://git-lfs.github.com/spec/v1
+oid sha256:165aa5a7fe839e33187b0a24780193720ab54c01ee9cd9e7439bd62b11559d96
 size 55170

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:81e5ab98e42c7e76cc4f5d889c4fb95cc270a249936b7ec6cefbaf3b9db9259e
 size 48552

 version https://git-lfs.github.com/spec/v1
+oid sha256:e31650036ec9efb202ee8636e1531426a51e5266c17e790c73a54a89444ea43f
 size 48552

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5a5ec26a829c75ef841327fb0f732cf98b837e8f8b5bcfa618f2b4b10d9c95ec
 size 6776

 version https://git-lfs.github.com/spec/v1
+oid sha256:725245d9276b1b31065d52f0424e7d9738d3ec36c6b7c2667247ce2d916acdc3
 size 6776