AIencoder commited on
Commit
86155a9
·
verified ·
1 Parent(s): 85f68d3

Upload folder using huggingface_hub

Browse files
README.md CHANGED
@@ -2,7 +2,6 @@
2
  base_model:
3
  - Qwen/Qwen2.5-Coder-7B-Instruct
4
  - Qwen/Qwen2.5-Math-7B-Instruct
5
- - Qwen/Qwen2.5-7B-Instruct
6
  library_name: transformers
7
  tags:
8
  - mergekit
@@ -16,7 +15,7 @@ This is a merge of pre-trained language models created using [mergekit](https://
16
  ## Merge Details
17
  ### Merge Method
18
 
19
- This model was merged using the [DARE TIES](https://arxiv.org/abs/2311.03099) merge method using [Qwen/Qwen2.5-7B-Instruct](https://huggingface.co/Qwen/Qwen2.5-7B-Instruct) as a base.
20
 
21
  ### Models Merged
22
 
@@ -30,21 +29,22 @@ The following YAML configuration was used to produce this model:
30
 
31
  ```yaml
32
 
33
- models:
34
- - model: Qwen/Qwen2.5-Coder-7B-Instruct
35
- parameters:
36
- weight: 0.5
37
- density: 0.7
38
- - model: Qwen/Qwen2.5-Math-7B-Instruct
39
- parameters:
40
- weight: 0.5
41
- density: 0.6
42
- merge_method: dare_ties
43
- base_model: Qwen/Qwen2.5-7B-Instruct
44
  parameters:
45
- int8_mask: true
46
- normalize: true
 
 
 
 
47
  dtype: bfloat16
48
- tokenizer_source: Qwen/Qwen2.5-7B-Instruct
49
 
50
  ```
 
2
  base_model:
3
  - Qwen/Qwen2.5-Coder-7B-Instruct
4
  - Qwen/Qwen2.5-Math-7B-Instruct
 
5
  library_name: transformers
6
  tags:
7
  - mergekit
 
15
  ## Merge Details
16
  ### Merge Method
17
 
18
+ This model was merged using the [SLERP](https://en.wikipedia.org/wiki/Slerp) merge method.
19
 
20
  ### Models Merged
21
 
 
29
 
30
  ```yaml
31
 
32
+ slices:
33
+ - sources:
34
+ - model: Qwen/Qwen2.5-Coder-7B-Instruct
35
+ layer_range: [0, 28]
36
+ - model: Qwen/Qwen2.5-Math-7B-Instruct
37
+ layer_range: [0, 28]
38
+ merge_method: slerp
39
+ base_model: Qwen/Qwen2.5-Coder-7B-Instruct
 
 
 
40
  parameters:
41
+ t:
42
+ - filter: self_attn
43
+ value: [0, 0.5, 0.3, 0.7, 1]
44
+ - filter: mlp
45
+ value: [1, 0.5, 0.7, 0.3, 0]
46
+ - value: 0.5
47
  dtype: bfloat16
48
+ tokenizer_source: Qwen/Qwen2.5-Coder-7B-Instruct
49
 
50
  ```
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "Qwen/Qwen2.5-7B-Instruct",
3
  "architectures": [
4
  "Qwen2ForCausalLM"
5
  ],
@@ -22,7 +22,7 @@
22
  "sliding_window": null,
23
  "tie_word_embeddings": false,
24
  "torch_dtype": "bfloat16",
25
- "transformers_version": "4.46.0",
26
  "use_cache": true,
27
  "use_sliding_window": false,
28
  "vocab_size": 151665
 
1
  {
2
+ "_name_or_path": "Qwen/Qwen2.5-Coder-7B-Instruct",
3
  "architectures": [
4
  "Qwen2ForCausalLM"
5
  ],
 
22
  "sliding_window": null,
23
  "tie_word_embeddings": false,
24
  "torch_dtype": "bfloat16",
25
+ "transformers_version": "4.46.3",
26
  "use_cache": true,
27
  "use_sliding_window": false,
28
  "vocab_size": 151665
mergekit_config.yml CHANGED
@@ -1,17 +1,18 @@
1
 
2
- models:
3
- - model: Qwen/Qwen2.5-Coder-7B-Instruct
4
- parameters:
5
- weight: 0.5
6
- density: 0.7
7
- - model: Qwen/Qwen2.5-Math-7B-Instruct
8
- parameters:
9
- weight: 0.5
10
- density: 0.6
11
- merge_method: dare_ties
12
- base_model: Qwen/Qwen2.5-7B-Instruct
13
  parameters:
14
- int8_mask: true
15
- normalize: true
 
 
 
 
16
  dtype: bfloat16
17
- tokenizer_source: Qwen/Qwen2.5-7B-Instruct
 
1
 
2
+ slices:
3
+ - sources:
4
+ - model: Qwen/Qwen2.5-Coder-7B-Instruct
5
+ layer_range: [0, 28]
6
+ - model: Qwen/Qwen2.5-Math-7B-Instruct
7
+ layer_range: [0, 28]
8
+ merge_method: slerp
9
+ base_model: Qwen/Qwen2.5-Coder-7B-Instruct
 
 
 
10
  parameters:
11
+ t:
12
+ - filter: self_attn
13
+ value: [0, 0.5, 0.3, 0.7, 1]
14
+ - filter: mlp
15
+ value: [1, 0.5, 0.7, 0.3, 0]
16
+ - value: 0.5
17
  dtype: bfloat16
18
+ tokenizer_source: Qwen/Qwen2.5-Coder-7B-Instruct
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8399c73c766e9f26a2ca99c810544d216aea55754c3ec33bb9044b9c6ed29e68
3
  size 4970978712
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fbb57e71de29f5269e1a4089f706ed9ef5f7a464d011f05f66fa22349d4efdd7
3
  size 4970978712
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:61569ef3ba6f4496866fe951e522d0509e683633ab051129004eab56147610b1
3
  size 4932751032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63b17ef77e02df2c0befca0bb94f29553d03934c090bd33cc9fad620d81dde07
3
  size 4932751032
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f8245efd3a5bca7c1573ea3ade0e53bf6b86c2712de2e51eca9e0231f1f0dcd
3
  size 4991495808
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfefb13cc23ce874a2f7c725922f0586b6ec5d84afe9bdfb8491d52012f0f048
3
  size 4991495808
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:93399810496e39daf39cfe8afb84a87f7ca842deb55087cd1d3525993fdf5b51
3
  size 330326240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c852c55f2f2e2654dc910179922bc561a7e668b2b739a9adfa4eb8602f99681
3
  size 330326240
tokenizer_config.json CHANGED
@@ -199,7 +199,7 @@
199
  "clean_up_tokenization_spaces": false,
200
  "eos_token": "<|im_end|>",
201
  "errors": "replace",
202
- "model_max_length": 131072,
203
  "pad_token": "<|endoftext|>",
204
  "split_special_tokens": false,
205
  "tokenizer_class": "Qwen2Tokenizer",
 
199
  "clean_up_tokenization_spaces": false,
200
  "eos_token": "<|im_end|>",
201
  "errors": "replace",
202
+ "model_max_length": 32768,
203
  "pad_token": "<|endoftext|>",
204
  "split_special_tokens": false,
205
  "tokenizer_class": "Qwen2Tokenizer",