deman539/llama381binstruct_summarize_short_merged

Browse files

Files changed (7) hide show

.gitattributes +1 -0
README.md +26 -26
adapter_config.json +4 -4
adapter_model.safetensors +1 -1
runs/Sep29_14-19-26_5a3674a12739/events.out.tfevents.1727619607.5a3674a12739.6297.0 +3 -0
tokenizer.json +0 -0
training_args.bin +1 -1

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text

README.md CHANGED Viewed

@@ -20,7 +20,7 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [NousResearch/Meta-Llama-3.1-8B-Instruct](https://huggingface.co/NousResearch/Meta-Llama-3.1-8B-Instruct) on the generator dataset.
 It achieves the following results on the evaluation set:
-- Loss: 2.7944
 ## Model description
@@ -52,32 +52,32 @@ The following hyperparameters were used during training:
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
-| 1.782         | 1.25  | 25   | 1.8304          |
-| 0.7787        | 2.5   | 50   | 1.7098          |
-| 0.3977        | 3.75  | 75   | 1.8415          |
-| 0.1875        | 5.0   | 100  | 1.9322          |
-| 0.0907        | 6.25  | 125  | 2.1723          |
-| 0.0304        | 7.5   | 150  | 2.4278          |
-| 0.0195        | 8.75  | 175  | 2.4924          |
-| 0.02          | 10.0  | 200  | 2.4250          |
-| 0.0194        | 11.25 | 225  | 2.5124          |
-| 0.0069        | 12.5  | 250  | 2.5796          |
-| 0.0048        | 13.75 | 275  | 2.5765          |
-| 0.0055        | 15.0  | 300  | 2.6462          |
-| 0.0038        | 16.25 | 325  | 2.6535          |
-| 0.0023        | 17.5  | 350  | 2.6849          |
-| 0.0026        | 18.75 | 375  | 2.7281          |
-| 0.0021        | 20.0  | 400  | 2.7575          |
-| 0.0018        | 21.25 | 425  | 2.7753          |
-| 0.0016        | 22.5  | 450  | 2.7865          |
-| 0.0016        | 23.75 | 475  | 2.7929          |
-| 0.0017        | 25.0  | 500  | 2.7944          |
 ### Framework versions
-- PEFT 0.12.0
-- Transformers 4.44.2
-- Pytorch 2.4.0+cu121
-- Datasets 3.0.0
-- Tokenizers 0.19.1

 This model is a fine-tuned version of [NousResearch/Meta-Llama-3.1-8B-Instruct](https://huggingface.co/NousResearch/Meta-Llama-3.1-8B-Instruct) on the generator dataset.
 It achieves the following results on the evaluation set:
+- Loss: 1.9597
 ## Model description
 | Training Loss | Epoch | Step | Validation Loss |
 |:-------------:|:-----:|:----:|:---------------:|
+| 1.5955        | 2.5   | 25   | 1.1871          |
+| 0.4382        | 5.0   | 50   | 1.3249          |
+| 0.1055        | 7.5   | 75   | 1.6587          |
+| 0.0314        | 10.0  | 100  | 1.7388          |
+| 0.018         | 12.5  | 125  | 1.7894          |
+| 0.0062        | 15.0  | 150  | 1.8581          |
+| 0.0167        | 17.5  | 175  | 1.8562          |
+| 0.0034        | 20.0  | 200  | 1.8495          |
+| 0.0038        | 22.5  | 225  | 1.8778          |
+| 0.0016        | 25.0  | 250  | 1.8412          |
+| 0.0012        | 27.5  | 275  | 1.8858          |
+| 0.0011        | 30.0  | 300  | 1.9118          |
+| 0.001         | 32.5  | 325  | 1.9290          |
+| 0.0009        | 35.0  | 350  | 1.9401          |
+| 0.0008        | 37.5  | 375  | 1.9467          |
+| 0.0008        | 40.0  | 400  | 1.9519          |
+| 0.0008        | 42.5  | 425  | 1.9556          |
+| 0.0007        | 45.0  | 450  | 1.9579          |
+| 0.0007        | 47.5  | 475  | 1.9597          |
+| 0.0006        | 50.0  | 500  | 1.9597          |
 ### Framework versions
+- PEFT 0.13.0
+- Transformers 4.45.1
+- Pytorch 2.4.1+cu121
+- Datasets 3.0.1
+- Tokenizers 0.20.0

adapter_config.json CHANGED Viewed

@@ -21,12 +21,12 @@
   "revision": null,
   "target_modules": [
     "down_proj",
-    "k_proj",
-    "up_proj",
-    "gate_proj",
     "o_proj",
     "q_proj",
-    "v_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

   "revision": null,
   "target_modules": [
     "down_proj",
+    "v_proj",
     "o_proj",
+    "k_proj",
     "q_proj",
+    "up_proj",
+    "gate_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:16df53eea17cad29723b7a007ff3bd5715fbcda0335fd36d95d205d62ee6d29e
 size 167832240

 version https://git-lfs.github.com/spec/v1
+oid sha256:d063aacc9c484f7333d23589be7f14ee4755f05aa69aba5ac4de6a9e0a06c31b
 size 167832240

runs/Sep29_14-19-26_5a3674a12739/events.out.tfevents.1727619607.5a3674a12739.6297.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4b9a6133399da59c770cb5ebd2a4d0417414a662f496cc100781da67f8fb122b
+size 22377

tokenizer.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:544f5ecd7307de5490611396486f54fcb31ba4f87e50adeea034b751e65da865
 size 5496

 version https://git-lfs.github.com/spec/v1
+oid sha256:7b63821b0257e5fea24fca4b9f38844a8fb3733dacb9d1ec7c179152c2aa71d1
 size 5496