Upload folder using huggingface_hub
Browse files- README.md +24 -12
- config.json +3 -3
- mergekit_config.yml +20 -6
- model-00001-of-00004.safetensors +2 -2
- model-00002-of-00004.safetensors +2 -2
- model-00003-of-00004.safetensors +2 -2
- model-00004-of-00004.safetensors +2 -2
- special_tokens_map.json +12 -1
- tokenizer.json +2 -2
- tokenizer_config.json +15 -6
README.md
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
---
|
| 2 |
base_model:
|
| 3 |
-
-
|
| 4 |
-
-
|
| 5 |
library_name: transformers
|
| 6 |
tags:
|
| 7 |
- mergekit
|
|
@@ -15,13 +15,12 @@ This is a merge of pre-trained language models created using [mergekit](https://
|
|
| 15 |
## Merge Details
|
| 16 |
### Merge Method
|
| 17 |
|
| 18 |
-
This model was merged using the
|
| 19 |
|
| 20 |
### Models Merged
|
| 21 |
|
| 22 |
The following models were included in the merge:
|
| 23 |
-
* [
|
| 24 |
-
* [ClaudioItaly/Intelligence-7](https://huggingface.co/ClaudioItaly/Intelligence-7)
|
| 25 |
|
| 26 |
### Configuration
|
| 27 |
|
|
@@ -29,12 +28,25 @@ The following YAML configuration was used to produce this model:
|
|
| 29 |
|
| 30 |
```yaml
|
| 31 |
models:
|
| 32 |
-
- model:
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
parameters:
|
| 38 |
-
|
| 39 |
-
|
|
|
|
| 40 |
```
|
|
|
|
| 1 |
---
|
| 2 |
base_model:
|
| 3 |
+
- happzy2633/qwen2.5-7b-ins-v3
|
| 4 |
+
- AIDC-AI/Marco-o1
|
| 5 |
library_name: transformers
|
| 6 |
tags:
|
| 7 |
- mergekit
|
|
|
|
| 15 |
## Merge Details
|
| 16 |
### Merge Method
|
| 17 |
|
| 18 |
+
This model was merged using the [TIES](https://arxiv.org/abs/2306.01708) merge method using [happzy2633/qwen2.5-7b-ins-v3](https://huggingface.co/happzy2633/qwen2.5-7b-ins-v3) as a base.
|
| 19 |
|
| 20 |
### Models Merged
|
| 21 |
|
| 22 |
The following models were included in the merge:
|
| 23 |
+
* [AIDC-AI/Marco-o1](https://huggingface.co/AIDC-AI/Marco-o1)
|
|
|
|
| 24 |
|
| 25 |
### Configuration
|
| 26 |
|
|
|
|
| 28 |
|
| 29 |
```yaml
|
| 30 |
models:
|
| 31 |
+
- model: AIDC-AI/Marco-o1
|
| 32 |
+
parameters:
|
| 33 |
+
density: [1, 0.7, 0.1] # density gradient
|
| 34 |
+
weight: 1.0
|
| 35 |
+
- model: happzy2633/qwen2.5-7b-ins-v3
|
| 36 |
+
parameters:
|
| 37 |
+
density: 0.5
|
| 38 |
+
weight: [0, 0.3, 0.7, 1] # weight gradient
|
| 39 |
+
- model: AIDC-AI/Marco-o1
|
| 40 |
+
parameters:
|
| 41 |
+
density: 0.33
|
| 42 |
+
weight:
|
| 43 |
+
- filter: mlp
|
| 44 |
+
value: 0.5
|
| 45 |
+
- value: 0
|
| 46 |
+
merge_method: ties
|
| 47 |
+
base_model: happzy2633/qwen2.5-7b-ins-v3
|
| 48 |
parameters:
|
| 49 |
+
normalize: true
|
| 50 |
+
int8_mask: true
|
| 51 |
+
dtype: float16
|
| 52 |
```
|
config.json
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
{
|
| 2 |
-
"_name_or_path": "
|
| 3 |
"architectures": [
|
| 4 |
"Qwen2ForCausalLM"
|
| 5 |
],
|
|
@@ -21,9 +21,9 @@
|
|
| 21 |
"rope_theta": 1000000.0,
|
| 22 |
"sliding_window": null,
|
| 23 |
"tie_word_embeddings": false,
|
| 24 |
-
"torch_dtype": "
|
| 25 |
"transformers_version": "4.46.2",
|
| 26 |
-
"use_cache":
|
| 27 |
"use_sliding_window": false,
|
| 28 |
"vocab_size": 152064
|
| 29 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"_name_or_path": "happzy2633/qwen2.5-7b-ins-v3",
|
| 3 |
"architectures": [
|
| 4 |
"Qwen2ForCausalLM"
|
| 5 |
],
|
|
|
|
| 21 |
"rope_theta": 1000000.0,
|
| 22 |
"sliding_window": null,
|
| 23 |
"tie_word_embeddings": false,
|
| 24 |
+
"torch_dtype": "float16",
|
| 25 |
"transformers_version": "4.46.2",
|
| 26 |
+
"use_cache": false,
|
| 27 |
"use_sliding_window": false,
|
| 28 |
"vocab_size": 152064
|
| 29 |
}
|
mergekit_config.yml
CHANGED
|
@@ -1,8 +1,22 @@
|
|
| 1 |
models:
|
| 2 |
-
- model:
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
parameters:
|
| 8 |
-
|
|
|
|
|
|
|
|
|
| 1 |
models:
|
| 2 |
+
- model: AIDC-AI/Marco-o1
|
| 3 |
+
parameters:
|
| 4 |
+
density: [1, 0.7, 0.1] # density gradient
|
| 5 |
+
weight: 1.0
|
| 6 |
+
- model: happzy2633/qwen2.5-7b-ins-v3
|
| 7 |
+
parameters:
|
| 8 |
+
density: 0.5
|
| 9 |
+
weight: [0, 0.3, 0.7, 1] # weight gradient
|
| 10 |
+
- model: AIDC-AI/Marco-o1
|
| 11 |
+
parameters:
|
| 12 |
+
density: 0.33
|
| 13 |
+
weight:
|
| 14 |
+
- filter: mlp
|
| 15 |
+
value: 0.5
|
| 16 |
+
- value: 0
|
| 17 |
+
merge_method: ties
|
| 18 |
+
base_model: happzy2633/qwen2.5-7b-ins-v3
|
| 19 |
parameters:
|
| 20 |
+
normalize: true
|
| 21 |
+
int8_mask: true
|
| 22 |
+
dtype: float16
|
model-00001-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2496e72dc061417a212b6c6d83bc42006cac006efd9e38148fb7dca4fa4da3cd
|
| 3 |
+
size 4976698704
|
model-00002-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:558f8e670dafdf536fce38d7200bb31e6292cdd054ad8433aa9300791e39cee4
|
| 3 |
+
size 4932750912
|
model-00003-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:abe7b5ace24afc233903816dfa84e0303dc82e37ae6914bb8569e8e1befd9746
|
| 3 |
+
size 4991495680
|
model-00004-of-00004.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4ac9e5d2c7fc9b8a870117239346888729c0342611979a3fc021ec8cf141cf8e
|
| 3 |
+
size 330326224
|
special_tokens_map.json
CHANGED
|
@@ -1,7 +1,18 @@
|
|
| 1 |
{
|
| 2 |
"additional_special_tokens": [
|
| 3 |
"<|im_start|>",
|
| 4 |
-
"<|im_end|>"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
],
|
| 6 |
"eos_token": {
|
| 7 |
"content": "<|im_end|>",
|
|
|
|
| 1 |
{
|
| 2 |
"additional_special_tokens": [
|
| 3 |
"<|im_start|>",
|
| 4 |
+
"<|im_end|>",
|
| 5 |
+
"<|object_ref_start|>",
|
| 6 |
+
"<|object_ref_end|>",
|
| 7 |
+
"<|box_start|>",
|
| 8 |
+
"<|box_end|>",
|
| 9 |
+
"<|quad_start|>",
|
| 10 |
+
"<|quad_end|>",
|
| 11 |
+
"<|vision_start|>",
|
| 12 |
+
"<|vision_end|>",
|
| 13 |
+
"<|vision_pad|>",
|
| 14 |
+
"<|image_pad|>",
|
| 15 |
+
"<|video_pad|>"
|
| 16 |
],
|
| 17 |
"eos_token": {
|
| 18 |
"content": "<|im_end|>",
|
tokenizer.json
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9c5ae00e602b8860cbd784ba82a8aa14e8feecec692e7076590d014d7b7fdafa
|
| 3 |
+
size 11421896
|
tokenizer_config.json
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
{
|
|
|
|
| 2 |
"add_prefix_space": false,
|
| 3 |
"added_tokens_decoder": {
|
| 4 |
"151643": {
|
|
@@ -180,20 +181,28 @@
|
|
| 180 |
},
|
| 181 |
"additional_special_tokens": [
|
| 182 |
"<|im_start|>",
|
| 183 |
-
"<|im_end|>"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 184 |
],
|
| 185 |
"bos_token": null,
|
| 186 |
-
"chat_template": "{%
|
| 187 |
"clean_up_tokenization_spaces": false,
|
| 188 |
"eos_token": "<|im_end|>",
|
| 189 |
"errors": "replace",
|
| 190 |
-
"max_length": 4096,
|
| 191 |
"model_max_length": 131072,
|
| 192 |
"pad_token": "<|endoftext|>",
|
|
|
|
| 193 |
"split_special_tokens": false,
|
| 194 |
-
"stride": 0,
|
| 195 |
"tokenizer_class": "Qwen2Tokenizer",
|
| 196 |
-
"truncation_side": "right",
|
| 197 |
-
"truncation_strategy": "longest_first",
|
| 198 |
"unk_token": null
|
| 199 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"add_bos_token": false,
|
| 3 |
"add_prefix_space": false,
|
| 4 |
"added_tokens_decoder": {
|
| 5 |
"151643": {
|
|
|
|
| 181 |
},
|
| 182 |
"additional_special_tokens": [
|
| 183 |
"<|im_start|>",
|
| 184 |
+
"<|im_end|>",
|
| 185 |
+
"<|object_ref_start|>",
|
| 186 |
+
"<|object_ref_end|>",
|
| 187 |
+
"<|box_start|>",
|
| 188 |
+
"<|box_end|>",
|
| 189 |
+
"<|quad_start|>",
|
| 190 |
+
"<|quad_end|>",
|
| 191 |
+
"<|vision_start|>",
|
| 192 |
+
"<|vision_end|>",
|
| 193 |
+
"<|vision_pad|>",
|
| 194 |
+
"<|image_pad|>",
|
| 195 |
+
"<|video_pad|>"
|
| 196 |
],
|
| 197 |
"bos_token": null,
|
| 198 |
+
"chat_template": "{% set system_message = 'You are a helpful assistant.' %}{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% endif %}{% if system_message is defined %}{{ '<|im_start|>system\n' + system_message + '<|im_end|>\n' }}{% endif %}{% for message in loop_messages %}{% set content = message['content'] %}{% if message['role'] == 'user' %}{{ '<|im_start|>user\n' + content + '<|im_end|>\n<|im_start|>assistant\n' }}{% elif message['role'] == 'assistant' %}{{ content + '<|im_end|>' + '\n' }}{% endif %}{% endfor %}",
|
| 199 |
"clean_up_tokenization_spaces": false,
|
| 200 |
"eos_token": "<|im_end|>",
|
| 201 |
"errors": "replace",
|
|
|
|
| 202 |
"model_max_length": 131072,
|
| 203 |
"pad_token": "<|endoftext|>",
|
| 204 |
+
"padding_side": "right",
|
| 205 |
"split_special_tokens": false,
|
|
|
|
| 206 |
"tokenizer_class": "Qwen2Tokenizer",
|
|
|
|
|
|
|
| 207 |
"unk_token": null
|
| 208 |
}
|