AmirlyPhd commited on
Commit
a14f02b
·
verified ·
1 Parent(s): d4f3db0

AmirlyPhd/IeeeNfsuV4

Browse files
README.md CHANGED
@@ -4,7 +4,7 @@ tags:
4
  - trl
5
  - sft
6
  - generated_from_trainer
7
- base_model: NousResearch/Llama-2-7b-chat-hf
8
  model-index:
9
  - name: trained_weigths
10
  results: []
@@ -15,9 +15,9 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  # trained_weigths
17
 
18
- This model is a fine-tuned version of [NousResearch/Llama-2-7b-chat-hf](https://huggingface.co/NousResearch/Llama-2-7b-chat-hf) on the None dataset.
19
  It achieves the following results on the evaluation set:
20
- - Loss: 1.0769
21
 
22
  ## Model description
23
 
@@ -52,13 +52,13 @@ The following hyperparameters were used during training:
52
 
53
  | Training Loss | Epoch | Step | Validation Loss |
54
  |:-------------:|:-----:|:----:|:---------------:|
55
- | 1.016 | 0.14 | 30 | 1.0769 |
56
 
57
 
58
  ### Framework versions
59
 
60
  - PEFT 0.7.2.dev0
61
  - Transformers 4.36.2
62
- - Pytorch 2.1.2
63
  - Datasets 2.16.1
64
  - Tokenizers 0.15.2
 
4
  - trl
5
  - sft
6
  - generated_from_trainer
7
+ base_model: NousResearch/Llama-2-7b-hf
8
  model-index:
9
  - name: trained_weigths
10
  results: []
 
15
 
16
  # trained_weigths
17
 
18
+ This model is a fine-tuned version of [NousResearch/Llama-2-7b-hf](https://huggingface.co/NousResearch/Llama-2-7b-hf) on the None dataset.
19
  It achieves the following results on the evaluation set:
20
+ - Loss: 1.0609
21
 
22
  ## Model description
23
 
 
52
 
53
  | Training Loss | Epoch | Step | Validation Loss |
54
  |:-------------:|:-----:|:----:|:---------------:|
55
+ | 1.7687 | 0.03 | 30 | 1.0609 |
56
 
57
 
58
  ### Framework versions
59
 
60
  - PEFT 0.7.2.dev0
61
  - Transformers 4.36.2
62
+ - Pytorch 2.1.2+cu121
63
  - Datasets 2.16.1
64
  - Tokenizers 0.15.2
adapter_config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "alpha_pattern": {},
3
  "auto_mapping": null,
4
- "base_model_name_or_path": "NousResearch/Llama-2-7b-chat-hf",
5
  "bias": "none",
6
  "fan_in_fan_out": false,
7
  "inference_mode": true,
@@ -10,21 +10,21 @@
10
  "layers_to_transform": null,
11
  "loftq_config": {},
12
  "lora_alpha": 16,
13
- "lora_dropout": 0.05,
14
  "megatron_config": null,
15
  "megatron_core": "megatron.core",
16
  "modules_to_save": null,
17
  "peft_type": "LORA",
18
- "r": 16,
19
  "rank_pattern": {},
20
  "revision": null,
21
  "target_modules": [
 
 
22
  "up_proj",
 
23
  "o_proj",
24
  "q_proj",
25
- "v_proj",
26
- "down_proj",
27
- "k_proj",
28
  "gate_proj"
29
  ],
30
  "task_type": "CAUSAL_LM",
 
1
  {
2
  "alpha_pattern": {},
3
  "auto_mapping": null,
4
+ "base_model_name_or_path": "NousResearch/Llama-2-7b-hf",
5
  "bias": "none",
6
  "fan_in_fan_out": false,
7
  "inference_mode": true,
 
10
  "layers_to_transform": null,
11
  "loftq_config": {},
12
  "lora_alpha": 16,
13
+ "lora_dropout": 0.1,
14
  "megatron_config": null,
15
  "megatron_core": "megatron.core",
16
  "modules_to_save": null,
17
  "peft_type": "LORA",
18
+ "r": 64,
19
  "rank_pattern": {},
20
  "revision": null,
21
  "target_modules": [
22
+ "k_proj",
23
+ "down_proj",
24
  "up_proj",
25
+ "v_proj",
26
  "o_proj",
27
  "q_proj",
 
 
 
28
  "gate_proj"
29
  ],
30
  "task_type": "CAUSAL_LM",
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:158d1050804eb9731fa191c77ce90a4bc847918191471c7948504250cb8cc20a
3
- size 1208643928
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77f4bb35eaa42501a6e59b55e0a44d44722004922ef35e18ae879f8786229320
3
+ size 1688334680
added_tokens.json CHANGED
@@ -1,5 +1,4 @@
1
  {
2
- "<pad>": 32000,
3
- "<|im_end|>": 32001,
4
- "<|im_start|>": 32002
5
  }
 
1
  {
2
+ "<|im_end|>": 32000,
3
+ "<|im_start|>": 32001
 
4
  }
runs/Apr07_00-37-46_7e75014457a5/events.out.tfevents.1712450278.7e75014457a5.225.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe667fee2ad0ec58972b12bf60c778874aba4c99cb85f5ae1838e91afd639a67
3
+ size 7112
runs/Apr07_01-03-35_7e75014457a5/events.out.tfevents.1712451829.7e75014457a5.225.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b6d75626ab3e09ca3b982b9216ad6864c67d08ae0346d474ecc44a5f8fc6e49
3
+ size 7726
tokenizer.json CHANGED
@@ -37,15 +37,6 @@
37
  },
38
  {
39
  "id": 32000,
40
- "content": "<pad>",
41
- "single_word": false,
42
- "lstrip": false,
43
- "rstrip": false,
44
- "normalized": true,
45
- "special": false
46
- },
47
- {
48
- "id": 32001,
49
  "content": "<|im_end|>",
50
  "single_word": false,
51
  "lstrip": false,
@@ -54,7 +45,7 @@
54
  "special": true
55
  },
56
  {
57
- "id": 32002,
58
  "content": "<|im_start|>",
59
  "single_word": false,
60
  "lstrip": false,
 
37
  },
38
  {
39
  "id": 32000,
 
 
 
 
 
 
 
 
 
40
  "content": "<|im_end|>",
41
  "single_word": false,
42
  "lstrip": false,
 
45
  "special": true
46
  },
47
  {
48
+ "id": 32001,
49
  "content": "<|im_start|>",
50
  "single_word": false,
51
  "lstrip": false,
tokenizer_config.json CHANGED
@@ -27,14 +27,6 @@
27
  "special": true
28
  },
29
  "32000": {
30
- "content": "<pad>",
31
- "lstrip": false,
32
- "normalized": true,
33
- "rstrip": false,
34
- "single_word": false,
35
- "special": false
36
- },
37
- "32001": {
38
  "content": "<|im_end|>",
39
  "lstrip": false,
40
  "normalized": false,
@@ -42,7 +34,7 @@
42
  "single_word": false,
43
  "special": true
44
  },
45
- "32002": {
46
  "content": "<|im_start|>",
47
  "lstrip": false,
48
  "normalized": false,
 
27
  "special": true
28
  },
29
  "32000": {
 
 
 
 
 
 
 
 
30
  "content": "<|im_end|>",
31
  "lstrip": false,
32
  "normalized": false,
 
34
  "single_word": false,
35
  "special": true
36
  },
37
+ "32001": {
38
  "content": "<|im_start|>",
39
  "lstrip": false,
40
  "normalized": false,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1f0ea969c0eb983369b23a6a92f57fc0c08f143822ddd216feabaa5270d5d2cc
3
  size 4664
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdc9d240b008498f5df064c69c1dc7f025073ab6e2dff33c62a2622c18d8e448
3
  size 4664