ai-maker-space/llama381binstruct_summarize_short

Browse files

Files changed (5) hide show

README.md +23 -23
adapter_config.json +4 -4
adapter_model.safetensors +1 -1
runs/Sep18_21-33-30_82c46e19b62f/events.out.tfevents.1726695213.82c46e19b62f.2652.0 +3 -0
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -20,7 +20,7 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [NousResearch/Meta-Llama-3.1-8B-Instruct](https://huggingface.co/NousResearch/Meta-Llama-3.1-8B-Instruct) on the generator dataset.
 It achieves the following results on the evaluation set:
-- Loss: 2.6246
 ## Model description
@@ -50,28 +50,28 @@ The following hyperparameters were used during training:
 ### Training results
-| Training Loss | Epoch | Step | Validation Loss |
-|:-------------:|:-----:|:----:|:---------------:|
-| 1.7914        | 1.25  | 25   | 1.7526          |
-| 0.7424        | 2.5   | 50   | 1.7774          |
-| 0.394         | 3.75  | 75   | 1.9085          |
-| 0.2196        | 5.0   | 100  | 2.0526          |
-| 0.0768        | 6.25  | 125  | 2.1213          |
-| 0.0295        | 7.5   | 150  | 2.3753          |
-| 0.0261        | 8.75  | 175  | 2.2799          |
-| 0.0222        | 10.0  | 200  | 2.3409          |
-| 0.0149        | 11.25 | 225  | 2.3087          |
-| 0.0067        | 12.5  | 250  | 2.3435          |
-| 0.0061        | 13.75 | 275  | 2.4163          |
-| 0.004         | 15.0  | 300  | 2.4519          |
-| 0.0033        | 16.25 | 325  | 2.5373          |
-| 0.0027        | 17.5  | 350  | 2.5686          |
-| 0.002         | 18.75 | 375  | 2.5895          |
-| 0.0024        | 20.0  | 400  | 2.6051          |
-| 0.0023        | 21.25 | 425  | 2.6133          |
-| 0.0018        | 22.5  | 450  | 2.6202          |
-| 0.0016        | 23.75 | 475  | 2.6238          |
-| 0.0019        | 25.0  | 500  | 2.6246          |
 ### Framework versions

 This model is a fine-tuned version of [NousResearch/Meta-Llama-3.1-8B-Instruct](https://huggingface.co/NousResearch/Meta-Llama-3.1-8B-Instruct) on the generator dataset.
 It achieves the following results on the evaluation set:
+- Loss: 2.3454
 ## Model description
 ### Training results
+| Training Loss | Epoch   | Step | Validation Loss |
+|:-------------:|:-------:|:----:|:---------------:|
+| 1.9049        | 1.1905  | 25   | 1.3198          |
+| 0.8341        | 2.3810  | 50   | 1.3278          |
+| 0.4251        | 3.5714  | 75   | 1.4843          |
+| 0.1704        | 4.7619  | 100  | 1.6038          |
+| 0.0957        | 5.9524  | 125  | 1.8290          |
+| 0.0481        | 7.1429  | 150  | 2.0425          |
+| 0.0378        | 8.3333  | 175  | 1.9429          |
+| 0.0143        | 9.5238  | 200  | 2.2196          |
+| 0.012         | 10.7143 | 225  | 2.1719          |
+| 0.013         | 11.9048 | 250  | 2.1323          |
+| 0.0069        | 13.0952 | 275  | 2.1442          |
+| 0.0043        | 14.2857 | 300  | 2.1642          |
+| 0.0033        | 15.4762 | 325  | 2.2483          |
+| 0.0029        | 16.6667 | 350  | 2.2829          |
+| 0.0024        | 17.8571 | 375  | 2.2906          |
+| 0.0027        | 19.0476 | 400  | 2.3140          |
+| 0.0023        | 20.2381 | 425  | 2.3269          |
+| 0.0019        | 21.4286 | 450  | 2.3399          |
+| 0.0021        | 22.6190 | 475  | 2.3443          |
+| 0.0022        | 23.8095 | 500  | 2.3454          |
 ### Framework versions

adapter_config.json CHANGED Viewed

@@ -21,12 +21,12 @@
   "revision": null,
   "target_modules": [
     "down_proj",
-    "k_proj",
-    "v_proj",
-    "gate_proj",
     "o_proj",
     "q_proj",
-    "up_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

   "revision": null,
   "target_modules": [
     "down_proj",
     "o_proj",
+    "up_proj",
     "q_proj",
+    "gate_proj",
+    "v_proj",
+    "k_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c8cfb16d5c5bc2bec121a2dad49e3f1cdc3bd0ddbfbbc5429c7cf3f8ff32edc8
 size 167832240

 version https://git-lfs.github.com/spec/v1
+oid sha256:e92a608fcb382fbec54223c5fe122ca7d2dcf7fb8ad2d5b712798d446fbba5fd
 size 167832240

runs/Sep18_21-33-30_82c46e19b62f/events.out.tfevents.1726695213.82c46e19b62f.2652.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c30173100b51cde7b7eaca1cf1e718e172f6ba112c19d71b30a1c88d0294db8c
+size 22329

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:44800469de3ba4e7ca7a8882d16a3068b126e9892e0b65a439497807f2abf195
 size 5496

 version https://git-lfs.github.com/spec/v1
+oid sha256:020cb700c7723301496558ec8d635e6cb1d0944759cef52f98964e152839eb4f
 size 5496