async0x42 commited on
Commit
308bf4a
·
verified ·
1 Parent(s): 4f82159

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +1 -0
  2. README.md +38 -0
  3. chat_template.jinja +154 -0
  4. config.json +1100 -0
  5. generation_config.json +13 -0
  6. model-00001-of-00046.safetensors +3 -0
  7. model-00003-of-00046.safetensors +3 -0
  8. model-00004-of-00046.safetensors +3 -0
  9. model-00005-of-00046.safetensors +3 -0
  10. model-00006-of-00046.safetensors +3 -0
  11. model-00008-of-00046.safetensors +3 -0
  12. model-00009-of-00046.safetensors +3 -0
  13. model-00010-of-00046.safetensors +3 -0
  14. model-00011-of-00046.safetensors +3 -0
  15. model-00012-of-00046.safetensors +3 -0
  16. model-00013-of-00046.safetensors +3 -0
  17. model-00014-of-00046.safetensors +3 -0
  18. model-00015-of-00046.safetensors +3 -0
  19. model-00016-of-00046.safetensors +3 -0
  20. model-00017-of-00046.safetensors +3 -0
  21. model-00018-of-00046.safetensors +3 -0
  22. model-00019-of-00046.safetensors +3 -0
  23. model-00020-of-00046.safetensors +3 -0
  24. model-00021-of-00046.safetensors +3 -0
  25. model-00022-of-00046.safetensors +3 -0
  26. model-00023-of-00046.safetensors +3 -0
  27. model-00024-of-00046.safetensors +3 -0
  28. model-00025-of-00046.safetensors +3 -0
  29. model-00026-of-00046.safetensors +3 -0
  30. model-00027-of-00046.safetensors +3 -0
  31. model-00028-of-00046.safetensors +3 -0
  32. model-00029-of-00046.safetensors +3 -0
  33. model-00030-of-00046.safetensors +3 -0
  34. model-00031-of-00046.safetensors +3 -0
  35. model-00032-of-00046.safetensors +3 -0
  36. model-00033-of-00046.safetensors +3 -0
  37. model-00034-of-00046.safetensors +3 -0
  38. model-00035-of-00046.safetensors +3 -0
  39. model-00036-of-00046.safetensors +3 -0
  40. model-00037-of-00046.safetensors +3 -0
  41. model-00038-of-00046.safetensors +3 -0
  42. model-00039-of-00046.safetensors +3 -0
  43. model-00040-of-00046.safetensors +3 -0
  44. model-00041-of-00046.safetensors +3 -0
  45. model-00042-of-00046.safetensors +3 -0
  46. model-00044-of-00046.safetensors +3 -0
  47. model-00045-of-00046.safetensors +3 -0
  48. model-00046-of-00046.safetensors +3 -0
  49. model.safetensors.index.json +0 -0
  50. tokenizer.json +3 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - unsloth
4
+ - mlx
5
+ base_model: unsloth/Qwen3.5-397B-A17B
6
+ library_name: mlx
7
+ license: apache-2.0
8
+ license_link: https://huggingface.co/Qwen/Qwen3.5-397B-A17B/blob/main/LICENSE
9
+ pipeline_tag: text-generation
10
+ ---
11
+
12
+ # async0x42/Qwen3.5-397B-A17B
13
+
14
+ This model [async0x42/Qwen3.5-397B-A17B](https://huggingface.co/async0x42/Qwen3.5-397B-A17B) was
15
+ converted to MLX format from [unsloth/Qwen3.5-397B-A17B](https://huggingface.co/unsloth/Qwen3.5-397B-A17B)
16
+ using mlx-lm version **0.30.7**.
17
+
18
+ ## Use with mlx
19
+
20
+ ```bash
21
+ pip install mlx-lm
22
+ ```
23
+
24
+ ```python
25
+ from mlx_lm import load, generate
26
+
27
+ model, tokenizer = load("async0x42/Qwen3.5-397B-A17B")
28
+
29
+ prompt = "hello"
30
+
31
+ if tokenizer.chat_template is not None:
32
+ messages = [{"role": "user", "content": prompt}]
33
+ prompt = tokenizer.apply_chat_template(
34
+ messages, add_generation_prompt=True, return_dict=False,
35
+ )
36
+
37
+ response = generate(model, tokenizer, prompt=prompt, verbose=True)
38
+ ```
chat_template.jinja ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- set image_count = namespace(value=0) %}
2
+ {%- set video_count = namespace(value=0) %}
3
+ {%- macro render_content(content, do_vision_count, is_system_content=false) %}
4
+ {%- if content is string %}
5
+ {{- content }}
6
+ {%- elif content is iterable and content is not mapping %}
7
+ {%- for item in content %}
8
+ {%- if 'image' in item or 'image_url' in item or item.type == 'image' %}
9
+ {%- if is_system_content %}
10
+ {{- raise_exception('System message cannot contain images.') }}
11
+ {%- endif %}
12
+ {%- if do_vision_count %}
13
+ {%- set image_count.value = image_count.value + 1 %}
14
+ {%- endif %}
15
+ {%- if add_vision_id %}
16
+ {{- 'Picture ' ~ image_count.value ~ ': ' }}
17
+ {%- endif %}
18
+ {{- '<|vision_start|><|image_pad|><|vision_end|>' }}
19
+ {%- elif 'video' in item or item.type == 'video' %}
20
+ {%- if is_system_content %}
21
+ {{- raise_exception('System message cannot contain videos.') }}
22
+ {%- endif %}
23
+ {%- if do_vision_count %}
24
+ {%- set video_count.value = video_count.value + 1 %}
25
+ {%- endif %}
26
+ {%- if add_vision_id %}
27
+ {{- 'Video ' ~ video_count.value ~ ': ' }}
28
+ {%- endif %}
29
+ {{- '<|vision_start|><|video_pad|><|vision_end|>' }}
30
+ {%- elif 'text' in item %}
31
+ {{- item.text }}
32
+ {%- else %}
33
+ {{- raise_exception('Unexpected item type in content.') }}
34
+ {%- endif %}
35
+ {%- endfor %}
36
+ {%- elif content is none or content is undefined %}
37
+ {{- '' }}
38
+ {%- else %}
39
+ {{- raise_exception('Unexpected content type.') }}
40
+ {%- endif %}
41
+ {%- endmacro %}
42
+ {%- if not messages %}
43
+ {{- raise_exception('No messages provided.') }}
44
+ {%- endif %}
45
+ {%- if tools and tools is iterable and tools is not mapping %}
46
+ {{- '<|im_start|>system\n' }}
47
+ {{- "# Tools\n\nYou have access to the following functions:\n\n<tools>" }}
48
+ {%- for tool in tools %}
49
+ {{- "\n" }}
50
+ {{- tool | tojson }}
51
+ {%- endfor %}
52
+ {{- "\n</tools>" }}
53
+ {{- '\n\nIf you choose to call a function ONLY reply in the following format with NO suffix:\n\n<tool_call>\n<function=example_function_name>\n<parameter=example_parameter_1>\nvalue_1\n</parameter>\n<parameter=example_parameter_2>\nThis is the value for the second parameter\nthat can span\nmultiple lines\n</parameter>\n</function>\n</tool_call>\n\n<IMPORTANT>\nReminder:\n- Function calls MUST follow the specified format: an inner <function=...></function> block must be nested within <tool_call></tool_call> XML tags\n- Required parameters MUST be specified\n- You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after\n- If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls\n</IMPORTANT>' }}
54
+ {%- if messages[0].role == 'system' %}
55
+ {%- set content = render_content(messages[0].content, false, true)|trim %}
56
+ {%- if content %}
57
+ {{- '\n\n' + content }}
58
+ {%- endif %}
59
+ {%- endif %}
60
+ {{- '<|im_end|>\n' }}
61
+ {%- else %}
62
+ {%- if messages[0].role == 'system' %}
63
+ {%- set content = render_content(messages[0].content, false, true)|trim %}
64
+ {{- '<|im_start|>system\n' + content + '<|im_end|>\n' }}
65
+ {%- endif %}
66
+ {%- endif %}
67
+ {%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
68
+ {%- for message in messages[::-1] %}
69
+ {%- set index = (messages|length - 1) - loop.index0 %}
70
+ {%- if ns.multi_step_tool and message.role == "user" %}
71
+ {%- set content = render_content(message.content, false)|trim %}
72
+ {%- if not(content.startswith('<tool_response>') and content.endswith('</tool_response>')) %}
73
+ {%- set ns.multi_step_tool = false %}
74
+ {%- set ns.last_query_index = index %}
75
+ {%- endif %}
76
+ {%- endif %}
77
+ {%- endfor %}
78
+ {%- if ns.multi_step_tool %}
79
+ {{- raise_exception('No user query found in messages.') }}
80
+ {%- endif %}
81
+ {%- for message in messages %}
82
+ {%- set content = render_content(message.content, true)|trim %}
83
+ {%- if message.role == "system" %}
84
+ {%- if not loop.first %}
85
+ {{- raise_exception('System message must be at the beginning.') }}
86
+ {%- endif %}
87
+ {%- elif message.role == "user" %}
88
+ {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
89
+ {%- elif message.role == "assistant" %}
90
+ {%- set reasoning_content = '' %}
91
+ {%- if message.reasoning_content is string %}
92
+ {%- set reasoning_content = message.reasoning_content %}
93
+ {%- else %}
94
+ {%- if '</think>' in content %}
95
+ {%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
96
+ {%- set content = content.split('</think>')[-1].lstrip('\n') %}
97
+ {%- endif %}
98
+ {%- endif %}
99
+ {%- set reasoning_content = reasoning_content|trim %}
100
+ {%- if loop.index0 > ns.last_query_index %}
101
+ {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content + '\n</think>\n\n' + content }}
102
+ {%- else %}
103
+ {{- '<|im_start|>' + message.role + '\n' + content }}
104
+ {%- endif %}
105
+ {%- if message.tool_calls and message.tool_calls is iterable and message.tool_calls is not mapping %}
106
+ {%- for tool_call in message.tool_calls %}
107
+ {%- if tool_call.function is defined %}
108
+ {%- set tool_call = tool_call.function %}
109
+ {%- endif %}
110
+ {%- if loop.first %}
111
+ {%- if content|trim %}
112
+ {{- '\n\n<tool_call>\n<function=' + tool_call.name + '>\n' }}
113
+ {%- else %}
114
+ {{- '<tool_call>\n<function=' + tool_call.name + '>\n' }}
115
+ {%- endif %}
116
+ {%- else %}
117
+ {{- '\n<tool_call>\n<function=' + tool_call.name + '>\n' }}
118
+ {%- endif %}
119
+ {%- if tool_call.arguments is defined %}
120
+ {%- for args_name, args_value in tool_call.arguments|items %}
121
+ {{- '<parameter=' + args_name + '>\n' }}
122
+ {%- set args_value = args_value | tojson | safe if args_value is mapping or (args_value is sequence and args_value is not string) else args_value | string %}
123
+ {{- args_value }}
124
+ {{- '\n</parameter>\n' }}
125
+ {%- endfor %}
126
+ {%- endif %}
127
+ {{- '</function>\n</tool_call>' }}
128
+ {%- endfor %}
129
+ {%- endif %}
130
+ {{- '<|im_end|>\n' }}
131
+ {%- elif message.role == "tool" %}
132
+ {%- if loop.previtem and loop.previtem.role != "tool" %}
133
+ {{- '<|im_start|>user' }}
134
+ {%- endif %}
135
+ {{- '\n<tool_response>\n' }}
136
+ {{- content }}
137
+ {{- '\n</tool_response>' }}
138
+ {%- if not loop.last and loop.nextitem.role != "tool" %}
139
+ {{- '<|im_end|>\n' }}
140
+ {%- elif loop.last %}
141
+ {{- '<|im_end|>\n' }}
142
+ {%- endif %}
143
+ {%- else %}
144
+ {{- raise_exception('Unexpected message role.') }}
145
+ {%- endif %}
146
+ {%- endfor %}
147
+ {%- if add_generation_prompt %}
148
+ {{- '<|im_start|>assistant\n' }}
149
+ {%- if enable_thinking is defined and enable_thinking is false %}
150
+ {{- '<think>\n\n</think>\n\n' }}
151
+ {%- else %}
152
+ {{- '<think>\n' }}
153
+ {%- endif %}
154
+ {%- endif %}
config.json ADDED
@@ -0,0 +1,1100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen3_5MoeForConditionalGeneration"
4
+ ],
5
+ "eos_token_id": [
6
+ 248046,
7
+ 248044
8
+ ],
9
+ "image_token_id": 248056,
10
+ "model_type": "qwen3_5_moe",
11
+ "pad_token_id": 248055,
12
+ "quantization": {
13
+ "group_size": 64,
14
+ "bits": 4,
15
+ "mode": "affine",
16
+ "language_model.model.layers.0.mlp.gate": {
17
+ "group_size": 64,
18
+ "bits": 8
19
+ },
20
+ "language_model.model.layers.0.mlp.shared_expert_gate": {
21
+ "group_size": 64,
22
+ "bits": 8
23
+ },
24
+ "language_model.model.layers.1.mlp.gate": {
25
+ "group_size": 64,
26
+ "bits": 8
27
+ },
28
+ "language_model.model.layers.1.mlp.shared_expert_gate": {
29
+ "group_size": 64,
30
+ "bits": 8
31
+ },
32
+ "language_model.model.layers.2.mlp.gate": {
33
+ "group_size": 64,
34
+ "bits": 8
35
+ },
36
+ "language_model.model.layers.2.mlp.shared_expert_gate": {
37
+ "group_size": 64,
38
+ "bits": 8
39
+ },
40
+ "language_model.model.layers.3.mlp.gate": {
41
+ "group_size": 64,
42
+ "bits": 8
43
+ },
44
+ "language_model.model.layers.3.mlp.shared_expert_gate": {
45
+ "group_size": 64,
46
+ "bits": 8
47
+ },
48
+ "language_model.model.layers.4.mlp.gate": {
49
+ "group_size": 64,
50
+ "bits": 8
51
+ },
52
+ "language_model.model.layers.4.mlp.shared_expert_gate": {
53
+ "group_size": 64,
54
+ "bits": 8
55
+ },
56
+ "language_model.model.layers.5.mlp.gate": {
57
+ "group_size": 64,
58
+ "bits": 8
59
+ },
60
+ "language_model.model.layers.5.mlp.shared_expert_gate": {
61
+ "group_size": 64,
62
+ "bits": 8
63
+ },
64
+ "language_model.model.layers.6.mlp.gate": {
65
+ "group_size": 64,
66
+ "bits": 8
67
+ },
68
+ "language_model.model.layers.6.mlp.shared_expert_gate": {
69
+ "group_size": 64,
70
+ "bits": 8
71
+ },
72
+ "language_model.model.layers.7.mlp.gate": {
73
+ "group_size": 64,
74
+ "bits": 8
75
+ },
76
+ "language_model.model.layers.7.mlp.shared_expert_gate": {
77
+ "group_size": 64,
78
+ "bits": 8
79
+ },
80
+ "language_model.model.layers.8.mlp.gate": {
81
+ "group_size": 64,
82
+ "bits": 8
83
+ },
84
+ "language_model.model.layers.8.mlp.shared_expert_gate": {
85
+ "group_size": 64,
86
+ "bits": 8
87
+ },
88
+ "language_model.model.layers.9.mlp.gate": {
89
+ "group_size": 64,
90
+ "bits": 8
91
+ },
92
+ "language_model.model.layers.9.mlp.shared_expert_gate": {
93
+ "group_size": 64,
94
+ "bits": 8
95
+ },
96
+ "language_model.model.layers.10.mlp.gate": {
97
+ "group_size": 64,
98
+ "bits": 8
99
+ },
100
+ "language_model.model.layers.10.mlp.shared_expert_gate": {
101
+ "group_size": 64,
102
+ "bits": 8
103
+ },
104
+ "language_model.model.layers.11.mlp.gate": {
105
+ "group_size": 64,
106
+ "bits": 8
107
+ },
108
+ "language_model.model.layers.11.mlp.shared_expert_gate": {
109
+ "group_size": 64,
110
+ "bits": 8
111
+ },
112
+ "language_model.model.layers.12.mlp.gate": {
113
+ "group_size": 64,
114
+ "bits": 8
115
+ },
116
+ "language_model.model.layers.12.mlp.shared_expert_gate": {
117
+ "group_size": 64,
118
+ "bits": 8
119
+ },
120
+ "language_model.model.layers.13.mlp.gate": {
121
+ "group_size": 64,
122
+ "bits": 8
123
+ },
124
+ "language_model.model.layers.13.mlp.shared_expert_gate": {
125
+ "group_size": 64,
126
+ "bits": 8
127
+ },
128
+ "language_model.model.layers.14.mlp.gate": {
129
+ "group_size": 64,
130
+ "bits": 8
131
+ },
132
+ "language_model.model.layers.14.mlp.shared_expert_gate": {
133
+ "group_size": 64,
134
+ "bits": 8
135
+ },
136
+ "language_model.model.layers.15.mlp.gate": {
137
+ "group_size": 64,
138
+ "bits": 8
139
+ },
140
+ "language_model.model.layers.15.mlp.shared_expert_gate": {
141
+ "group_size": 64,
142
+ "bits": 8
143
+ },
144
+ "language_model.model.layers.16.mlp.gate": {
145
+ "group_size": 64,
146
+ "bits": 8
147
+ },
148
+ "language_model.model.layers.16.mlp.shared_expert_gate": {
149
+ "group_size": 64,
150
+ "bits": 8
151
+ },
152
+ "language_model.model.layers.17.mlp.gate": {
153
+ "group_size": 64,
154
+ "bits": 8
155
+ },
156
+ "language_model.model.layers.17.mlp.shared_expert_gate": {
157
+ "group_size": 64,
158
+ "bits": 8
159
+ },
160
+ "language_model.model.layers.18.mlp.gate": {
161
+ "group_size": 64,
162
+ "bits": 8
163
+ },
164
+ "language_model.model.layers.18.mlp.shared_expert_gate": {
165
+ "group_size": 64,
166
+ "bits": 8
167
+ },
168
+ "language_model.model.layers.19.mlp.gate": {
169
+ "group_size": 64,
170
+ "bits": 8
171
+ },
172
+ "language_model.model.layers.19.mlp.shared_expert_gate": {
173
+ "group_size": 64,
174
+ "bits": 8
175
+ },
176
+ "language_model.model.layers.20.mlp.gate": {
177
+ "group_size": 64,
178
+ "bits": 8
179
+ },
180
+ "language_model.model.layers.20.mlp.shared_expert_gate": {
181
+ "group_size": 64,
182
+ "bits": 8
183
+ },
184
+ "language_model.model.layers.21.mlp.gate": {
185
+ "group_size": 64,
186
+ "bits": 8
187
+ },
188
+ "language_model.model.layers.21.mlp.shared_expert_gate": {
189
+ "group_size": 64,
190
+ "bits": 8
191
+ },
192
+ "language_model.model.layers.22.mlp.gate": {
193
+ "group_size": 64,
194
+ "bits": 8
195
+ },
196
+ "language_model.model.layers.22.mlp.shared_expert_gate": {
197
+ "group_size": 64,
198
+ "bits": 8
199
+ },
200
+ "language_model.model.layers.23.mlp.gate": {
201
+ "group_size": 64,
202
+ "bits": 8
203
+ },
204
+ "language_model.model.layers.23.mlp.shared_expert_gate": {
205
+ "group_size": 64,
206
+ "bits": 8
207
+ },
208
+ "language_model.model.layers.24.mlp.gate": {
209
+ "group_size": 64,
210
+ "bits": 8
211
+ },
212
+ "language_model.model.layers.24.mlp.shared_expert_gate": {
213
+ "group_size": 64,
214
+ "bits": 8
215
+ },
216
+ "language_model.model.layers.25.mlp.gate": {
217
+ "group_size": 64,
218
+ "bits": 8
219
+ },
220
+ "language_model.model.layers.25.mlp.shared_expert_gate": {
221
+ "group_size": 64,
222
+ "bits": 8
223
+ },
224
+ "language_model.model.layers.26.mlp.gate": {
225
+ "group_size": 64,
226
+ "bits": 8
227
+ },
228
+ "language_model.model.layers.26.mlp.shared_expert_gate": {
229
+ "group_size": 64,
230
+ "bits": 8
231
+ },
232
+ "language_model.model.layers.27.mlp.gate": {
233
+ "group_size": 64,
234
+ "bits": 8
235
+ },
236
+ "language_model.model.layers.27.mlp.shared_expert_gate": {
237
+ "group_size": 64,
238
+ "bits": 8
239
+ },
240
+ "language_model.model.layers.28.mlp.gate": {
241
+ "group_size": 64,
242
+ "bits": 8
243
+ },
244
+ "language_model.model.layers.28.mlp.shared_expert_gate": {
245
+ "group_size": 64,
246
+ "bits": 8
247
+ },
248
+ "language_model.model.layers.29.mlp.gate": {
249
+ "group_size": 64,
250
+ "bits": 8
251
+ },
252
+ "language_model.model.layers.29.mlp.shared_expert_gate": {
253
+ "group_size": 64,
254
+ "bits": 8
255
+ },
256
+ "language_model.model.layers.30.mlp.gate": {
257
+ "group_size": 64,
258
+ "bits": 8
259
+ },
260
+ "language_model.model.layers.30.mlp.shared_expert_gate": {
261
+ "group_size": 64,
262
+ "bits": 8
263
+ },
264
+ "language_model.model.layers.31.mlp.gate": {
265
+ "group_size": 64,
266
+ "bits": 8
267
+ },
268
+ "language_model.model.layers.31.mlp.shared_expert_gate": {
269
+ "group_size": 64,
270
+ "bits": 8
271
+ },
272
+ "language_model.model.layers.32.mlp.gate": {
273
+ "group_size": 64,
274
+ "bits": 8
275
+ },
276
+ "language_model.model.layers.32.mlp.shared_expert_gate": {
277
+ "group_size": 64,
278
+ "bits": 8
279
+ },
280
+ "language_model.model.layers.33.mlp.gate": {
281
+ "group_size": 64,
282
+ "bits": 8
283
+ },
284
+ "language_model.model.layers.33.mlp.shared_expert_gate": {
285
+ "group_size": 64,
286
+ "bits": 8
287
+ },
288
+ "language_model.model.layers.34.mlp.gate": {
289
+ "group_size": 64,
290
+ "bits": 8
291
+ },
292
+ "language_model.model.layers.34.mlp.shared_expert_gate": {
293
+ "group_size": 64,
294
+ "bits": 8
295
+ },
296
+ "language_model.model.layers.35.mlp.gate": {
297
+ "group_size": 64,
298
+ "bits": 8
299
+ },
300
+ "language_model.model.layers.35.mlp.shared_expert_gate": {
301
+ "group_size": 64,
302
+ "bits": 8
303
+ },
304
+ "language_model.model.layers.36.mlp.gate": {
305
+ "group_size": 64,
306
+ "bits": 8
307
+ },
308
+ "language_model.model.layers.36.mlp.shared_expert_gate": {
309
+ "group_size": 64,
310
+ "bits": 8
311
+ },
312
+ "language_model.model.layers.37.mlp.gate": {
313
+ "group_size": 64,
314
+ "bits": 8
315
+ },
316
+ "language_model.model.layers.37.mlp.shared_expert_gate": {
317
+ "group_size": 64,
318
+ "bits": 8
319
+ },
320
+ "language_model.model.layers.38.mlp.gate": {
321
+ "group_size": 64,
322
+ "bits": 8
323
+ },
324
+ "language_model.model.layers.38.mlp.shared_expert_gate": {
325
+ "group_size": 64,
326
+ "bits": 8
327
+ },
328
+ "language_model.model.layers.39.mlp.gate": {
329
+ "group_size": 64,
330
+ "bits": 8
331
+ },
332
+ "language_model.model.layers.39.mlp.shared_expert_gate": {
333
+ "group_size": 64,
334
+ "bits": 8
335
+ },
336
+ "language_model.model.layers.40.mlp.gate": {
337
+ "group_size": 64,
338
+ "bits": 8
339
+ },
340
+ "language_model.model.layers.40.mlp.shared_expert_gate": {
341
+ "group_size": 64,
342
+ "bits": 8
343
+ },
344
+ "language_model.model.layers.41.mlp.gate": {
345
+ "group_size": 64,
346
+ "bits": 8
347
+ },
348
+ "language_model.model.layers.41.mlp.shared_expert_gate": {
349
+ "group_size": 64,
350
+ "bits": 8
351
+ },
352
+ "language_model.model.layers.42.mlp.gate": {
353
+ "group_size": 64,
354
+ "bits": 8
355
+ },
356
+ "language_model.model.layers.42.mlp.shared_expert_gate": {
357
+ "group_size": 64,
358
+ "bits": 8
359
+ },
360
+ "language_model.model.layers.43.mlp.gate": {
361
+ "group_size": 64,
362
+ "bits": 8
363
+ },
364
+ "language_model.model.layers.43.mlp.shared_expert_gate": {
365
+ "group_size": 64,
366
+ "bits": 8
367
+ },
368
+ "language_model.model.layers.44.mlp.gate": {
369
+ "group_size": 64,
370
+ "bits": 8
371
+ },
372
+ "language_model.model.layers.44.mlp.shared_expert_gate": {
373
+ "group_size": 64,
374
+ "bits": 8
375
+ },
376
+ "language_model.model.layers.45.mlp.gate": {
377
+ "group_size": 64,
378
+ "bits": 8
379
+ },
380
+ "language_model.model.layers.45.mlp.shared_expert_gate": {
381
+ "group_size": 64,
382
+ "bits": 8
383
+ },
384
+ "language_model.model.layers.46.mlp.gate": {
385
+ "group_size": 64,
386
+ "bits": 8
387
+ },
388
+ "language_model.model.layers.46.mlp.shared_expert_gate": {
389
+ "group_size": 64,
390
+ "bits": 8
391
+ },
392
+ "language_model.model.layers.47.mlp.gate": {
393
+ "group_size": 64,
394
+ "bits": 8
395
+ },
396
+ "language_model.model.layers.47.mlp.shared_expert_gate": {
397
+ "group_size": 64,
398
+ "bits": 8
399
+ },
400
+ "language_model.model.layers.48.mlp.gate": {
401
+ "group_size": 64,
402
+ "bits": 8
403
+ },
404
+ "language_model.model.layers.48.mlp.shared_expert_gate": {
405
+ "group_size": 64,
406
+ "bits": 8
407
+ },
408
+ "language_model.model.layers.49.mlp.gate": {
409
+ "group_size": 64,
410
+ "bits": 8
411
+ },
412
+ "language_model.model.layers.49.mlp.shared_expert_gate": {
413
+ "group_size": 64,
414
+ "bits": 8
415
+ },
416
+ "language_model.model.layers.50.mlp.gate": {
417
+ "group_size": 64,
418
+ "bits": 8
419
+ },
420
+ "language_model.model.layers.50.mlp.shared_expert_gate": {
421
+ "group_size": 64,
422
+ "bits": 8
423
+ },
424
+ "language_model.model.layers.51.mlp.gate": {
425
+ "group_size": 64,
426
+ "bits": 8
427
+ },
428
+ "language_model.model.layers.51.mlp.shared_expert_gate": {
429
+ "group_size": 64,
430
+ "bits": 8
431
+ },
432
+ "language_model.model.layers.52.mlp.gate": {
433
+ "group_size": 64,
434
+ "bits": 8
435
+ },
436
+ "language_model.model.layers.52.mlp.shared_expert_gate": {
437
+ "group_size": 64,
438
+ "bits": 8
439
+ },
440
+ "language_model.model.layers.53.mlp.gate": {
441
+ "group_size": 64,
442
+ "bits": 8
443
+ },
444
+ "language_model.model.layers.53.mlp.shared_expert_gate": {
445
+ "group_size": 64,
446
+ "bits": 8
447
+ },
448
+ "language_model.model.layers.54.mlp.gate": {
449
+ "group_size": 64,
450
+ "bits": 8
451
+ },
452
+ "language_model.model.layers.54.mlp.shared_expert_gate": {
453
+ "group_size": 64,
454
+ "bits": 8
455
+ },
456
+ "language_model.model.layers.55.mlp.gate": {
457
+ "group_size": 64,
458
+ "bits": 8
459
+ },
460
+ "language_model.model.layers.55.mlp.shared_expert_gate": {
461
+ "group_size": 64,
462
+ "bits": 8
463
+ },
464
+ "language_model.model.layers.56.mlp.gate": {
465
+ "group_size": 64,
466
+ "bits": 8
467
+ },
468
+ "language_model.model.layers.56.mlp.shared_expert_gate": {
469
+ "group_size": 64,
470
+ "bits": 8
471
+ },
472
+ "language_model.model.layers.57.mlp.gate": {
473
+ "group_size": 64,
474
+ "bits": 8
475
+ },
476
+ "language_model.model.layers.57.mlp.shared_expert_gate": {
477
+ "group_size": 64,
478
+ "bits": 8
479
+ },
480
+ "language_model.model.layers.58.mlp.gate": {
481
+ "group_size": 64,
482
+ "bits": 8
483
+ },
484
+ "language_model.model.layers.58.mlp.shared_expert_gate": {
485
+ "group_size": 64,
486
+ "bits": 8
487
+ },
488
+ "language_model.model.layers.59.mlp.gate": {
489
+ "group_size": 64,
490
+ "bits": 8
491
+ },
492
+ "language_model.model.layers.59.mlp.shared_expert_gate": {
493
+ "group_size": 64,
494
+ "bits": 8
495
+ }
496
+ },
497
+ "quantization_config": {
498
+ "group_size": 64,
499
+ "bits": 4,
500
+ "mode": "affine",
501
+ "language_model.model.layers.0.mlp.gate": {
502
+ "group_size": 64,
503
+ "bits": 8
504
+ },
505
+ "language_model.model.layers.0.mlp.shared_expert_gate": {
506
+ "group_size": 64,
507
+ "bits": 8
508
+ },
509
+ "language_model.model.layers.1.mlp.gate": {
510
+ "group_size": 64,
511
+ "bits": 8
512
+ },
513
+ "language_model.model.layers.1.mlp.shared_expert_gate": {
514
+ "group_size": 64,
515
+ "bits": 8
516
+ },
517
+ "language_model.model.layers.2.mlp.gate": {
518
+ "group_size": 64,
519
+ "bits": 8
520
+ },
521
+ "language_model.model.layers.2.mlp.shared_expert_gate": {
522
+ "group_size": 64,
523
+ "bits": 8
524
+ },
525
+ "language_model.model.layers.3.mlp.gate": {
526
+ "group_size": 64,
527
+ "bits": 8
528
+ },
529
+ "language_model.model.layers.3.mlp.shared_expert_gate": {
530
+ "group_size": 64,
531
+ "bits": 8
532
+ },
533
+ "language_model.model.layers.4.mlp.gate": {
534
+ "group_size": 64,
535
+ "bits": 8
536
+ },
537
+ "language_model.model.layers.4.mlp.shared_expert_gate": {
538
+ "group_size": 64,
539
+ "bits": 8
540
+ },
541
+ "language_model.model.layers.5.mlp.gate": {
542
+ "group_size": 64,
543
+ "bits": 8
544
+ },
545
+ "language_model.model.layers.5.mlp.shared_expert_gate": {
546
+ "group_size": 64,
547
+ "bits": 8
548
+ },
549
+ "language_model.model.layers.6.mlp.gate": {
550
+ "group_size": 64,
551
+ "bits": 8
552
+ },
553
+ "language_model.model.layers.6.mlp.shared_expert_gate": {
554
+ "group_size": 64,
555
+ "bits": 8
556
+ },
557
+ "language_model.model.layers.7.mlp.gate": {
558
+ "group_size": 64,
559
+ "bits": 8
560
+ },
561
+ "language_model.model.layers.7.mlp.shared_expert_gate": {
562
+ "group_size": 64,
563
+ "bits": 8
564
+ },
565
+ "language_model.model.layers.8.mlp.gate": {
566
+ "group_size": 64,
567
+ "bits": 8
568
+ },
569
+ "language_model.model.layers.8.mlp.shared_expert_gate": {
570
+ "group_size": 64,
571
+ "bits": 8
572
+ },
573
+ "language_model.model.layers.9.mlp.gate": {
574
+ "group_size": 64,
575
+ "bits": 8
576
+ },
577
+ "language_model.model.layers.9.mlp.shared_expert_gate": {
578
+ "group_size": 64,
579
+ "bits": 8
580
+ },
581
+ "language_model.model.layers.10.mlp.gate": {
582
+ "group_size": 64,
583
+ "bits": 8
584
+ },
585
+ "language_model.model.layers.10.mlp.shared_expert_gate": {
586
+ "group_size": 64,
587
+ "bits": 8
588
+ },
589
+ "language_model.model.layers.11.mlp.gate": {
590
+ "group_size": 64,
591
+ "bits": 8
592
+ },
593
+ "language_model.model.layers.11.mlp.shared_expert_gate": {
594
+ "group_size": 64,
595
+ "bits": 8
596
+ },
597
+ "language_model.model.layers.12.mlp.gate": {
598
+ "group_size": 64,
599
+ "bits": 8
600
+ },
601
+ "language_model.model.layers.12.mlp.shared_expert_gate": {
602
+ "group_size": 64,
603
+ "bits": 8
604
+ },
605
+ "language_model.model.layers.13.mlp.gate": {
606
+ "group_size": 64,
607
+ "bits": 8
608
+ },
609
+ "language_model.model.layers.13.mlp.shared_expert_gate": {
610
+ "group_size": 64,
611
+ "bits": 8
612
+ },
613
+ "language_model.model.layers.14.mlp.gate": {
614
+ "group_size": 64,
615
+ "bits": 8
616
+ },
617
+ "language_model.model.layers.14.mlp.shared_expert_gate": {
618
+ "group_size": 64,
619
+ "bits": 8
620
+ },
621
+ "language_model.model.layers.15.mlp.gate": {
622
+ "group_size": 64,
623
+ "bits": 8
624
+ },
625
+ "language_model.model.layers.15.mlp.shared_expert_gate": {
626
+ "group_size": 64,
627
+ "bits": 8
628
+ },
629
+ "language_model.model.layers.16.mlp.gate": {
630
+ "group_size": 64,
631
+ "bits": 8
632
+ },
633
+ "language_model.model.layers.16.mlp.shared_expert_gate": {
634
+ "group_size": 64,
635
+ "bits": 8
636
+ },
637
+ "language_model.model.layers.17.mlp.gate": {
638
+ "group_size": 64,
639
+ "bits": 8
640
+ },
641
+ "language_model.model.layers.17.mlp.shared_expert_gate": {
642
+ "group_size": 64,
643
+ "bits": 8
644
+ },
645
+ "language_model.model.layers.18.mlp.gate": {
646
+ "group_size": 64,
647
+ "bits": 8
648
+ },
649
+ "language_model.model.layers.18.mlp.shared_expert_gate": {
650
+ "group_size": 64,
651
+ "bits": 8
652
+ },
653
+ "language_model.model.layers.19.mlp.gate": {
654
+ "group_size": 64,
655
+ "bits": 8
656
+ },
657
+ "language_model.model.layers.19.mlp.shared_expert_gate": {
658
+ "group_size": 64,
659
+ "bits": 8
660
+ },
661
+ "language_model.model.layers.20.mlp.gate": {
662
+ "group_size": 64,
663
+ "bits": 8
664
+ },
665
+ "language_model.model.layers.20.mlp.shared_expert_gate": {
666
+ "group_size": 64,
667
+ "bits": 8
668
+ },
669
+ "language_model.model.layers.21.mlp.gate": {
670
+ "group_size": 64,
671
+ "bits": 8
672
+ },
673
+ "language_model.model.layers.21.mlp.shared_expert_gate": {
674
+ "group_size": 64,
675
+ "bits": 8
676
+ },
677
+ "language_model.model.layers.22.mlp.gate": {
678
+ "group_size": 64,
679
+ "bits": 8
680
+ },
681
+ "language_model.model.layers.22.mlp.shared_expert_gate": {
682
+ "group_size": 64,
683
+ "bits": 8
684
+ },
685
+ "language_model.model.layers.23.mlp.gate": {
686
+ "group_size": 64,
687
+ "bits": 8
688
+ },
689
+ "language_model.model.layers.23.mlp.shared_expert_gate": {
690
+ "group_size": 64,
691
+ "bits": 8
692
+ },
693
+ "language_model.model.layers.24.mlp.gate": {
694
+ "group_size": 64,
695
+ "bits": 8
696
+ },
697
+ "language_model.model.layers.24.mlp.shared_expert_gate": {
698
+ "group_size": 64,
699
+ "bits": 8
700
+ },
701
+ "language_model.model.layers.25.mlp.gate": {
702
+ "group_size": 64,
703
+ "bits": 8
704
+ },
705
+ "language_model.model.layers.25.mlp.shared_expert_gate": {
706
+ "group_size": 64,
707
+ "bits": 8
708
+ },
709
+ "language_model.model.layers.26.mlp.gate": {
710
+ "group_size": 64,
711
+ "bits": 8
712
+ },
713
+ "language_model.model.layers.26.mlp.shared_expert_gate": {
714
+ "group_size": 64,
715
+ "bits": 8
716
+ },
717
+ "language_model.model.layers.27.mlp.gate": {
718
+ "group_size": 64,
719
+ "bits": 8
720
+ },
721
+ "language_model.model.layers.27.mlp.shared_expert_gate": {
722
+ "group_size": 64,
723
+ "bits": 8
724
+ },
725
+ "language_model.model.layers.28.mlp.gate": {
726
+ "group_size": 64,
727
+ "bits": 8
728
+ },
729
+ "language_model.model.layers.28.mlp.shared_expert_gate": {
730
+ "group_size": 64,
731
+ "bits": 8
732
+ },
733
+ "language_model.model.layers.29.mlp.gate": {
734
+ "group_size": 64,
735
+ "bits": 8
736
+ },
737
+ "language_model.model.layers.29.mlp.shared_expert_gate": {
738
+ "group_size": 64,
739
+ "bits": 8
740
+ },
741
+ "language_model.model.layers.30.mlp.gate": {
742
+ "group_size": 64,
743
+ "bits": 8
744
+ },
745
+ "language_model.model.layers.30.mlp.shared_expert_gate": {
746
+ "group_size": 64,
747
+ "bits": 8
748
+ },
749
+ "language_model.model.layers.31.mlp.gate": {
750
+ "group_size": 64,
751
+ "bits": 8
752
+ },
753
+ "language_model.model.layers.31.mlp.shared_expert_gate": {
754
+ "group_size": 64,
755
+ "bits": 8
756
+ },
757
+ "language_model.model.layers.32.mlp.gate": {
758
+ "group_size": 64,
759
+ "bits": 8
760
+ },
761
+ "language_model.model.layers.32.mlp.shared_expert_gate": {
762
+ "group_size": 64,
763
+ "bits": 8
764
+ },
765
+ "language_model.model.layers.33.mlp.gate": {
766
+ "group_size": 64,
767
+ "bits": 8
768
+ },
769
+ "language_model.model.layers.33.mlp.shared_expert_gate": {
770
+ "group_size": 64,
771
+ "bits": 8
772
+ },
773
+ "language_model.model.layers.34.mlp.gate": {
774
+ "group_size": 64,
775
+ "bits": 8
776
+ },
777
+ "language_model.model.layers.34.mlp.shared_expert_gate": {
778
+ "group_size": 64,
779
+ "bits": 8
780
+ },
781
+ "language_model.model.layers.35.mlp.gate": {
782
+ "group_size": 64,
783
+ "bits": 8
784
+ },
785
+ "language_model.model.layers.35.mlp.shared_expert_gate": {
786
+ "group_size": 64,
787
+ "bits": 8
788
+ },
789
+ "language_model.model.layers.36.mlp.gate": {
790
+ "group_size": 64,
791
+ "bits": 8
792
+ },
793
+ "language_model.model.layers.36.mlp.shared_expert_gate": {
794
+ "group_size": 64,
795
+ "bits": 8
796
+ },
797
+ "language_model.model.layers.37.mlp.gate": {
798
+ "group_size": 64,
799
+ "bits": 8
800
+ },
801
+ "language_model.model.layers.37.mlp.shared_expert_gate": {
802
+ "group_size": 64,
803
+ "bits": 8
804
+ },
805
+ "language_model.model.layers.38.mlp.gate": {
806
+ "group_size": 64,
807
+ "bits": 8
808
+ },
809
+ "language_model.model.layers.38.mlp.shared_expert_gate": {
810
+ "group_size": 64,
811
+ "bits": 8
812
+ },
813
+ "language_model.model.layers.39.mlp.gate": {
814
+ "group_size": 64,
815
+ "bits": 8
816
+ },
817
+ "language_model.model.layers.39.mlp.shared_expert_gate": {
818
+ "group_size": 64,
819
+ "bits": 8
820
+ },
821
+ "language_model.model.layers.40.mlp.gate": {
822
+ "group_size": 64,
823
+ "bits": 8
824
+ },
825
+ "language_model.model.layers.40.mlp.shared_expert_gate": {
826
+ "group_size": 64,
827
+ "bits": 8
828
+ },
829
+ "language_model.model.layers.41.mlp.gate": {
830
+ "group_size": 64,
831
+ "bits": 8
832
+ },
833
+ "language_model.model.layers.41.mlp.shared_expert_gate": {
834
+ "group_size": 64,
835
+ "bits": 8
836
+ },
837
+ "language_model.model.layers.42.mlp.gate": {
838
+ "group_size": 64,
839
+ "bits": 8
840
+ },
841
+ "language_model.model.layers.42.mlp.shared_expert_gate": {
842
+ "group_size": 64,
843
+ "bits": 8
844
+ },
845
+ "language_model.model.layers.43.mlp.gate": {
846
+ "group_size": 64,
847
+ "bits": 8
848
+ },
849
+ "language_model.model.layers.43.mlp.shared_expert_gate": {
850
+ "group_size": 64,
851
+ "bits": 8
852
+ },
853
+ "language_model.model.layers.44.mlp.gate": {
854
+ "group_size": 64,
855
+ "bits": 8
856
+ },
857
+ "language_model.model.layers.44.mlp.shared_expert_gate": {
858
+ "group_size": 64,
859
+ "bits": 8
860
+ },
861
+ "language_model.model.layers.45.mlp.gate": {
862
+ "group_size": 64,
863
+ "bits": 8
864
+ },
865
+ "language_model.model.layers.45.mlp.shared_expert_gate": {
866
+ "group_size": 64,
867
+ "bits": 8
868
+ },
869
+ "language_model.model.layers.46.mlp.gate": {
870
+ "group_size": 64,
871
+ "bits": 8
872
+ },
873
+ "language_model.model.layers.46.mlp.shared_expert_gate": {
874
+ "group_size": 64,
875
+ "bits": 8
876
+ },
877
+ "language_model.model.layers.47.mlp.gate": {
878
+ "group_size": 64,
879
+ "bits": 8
880
+ },
881
+ "language_model.model.layers.47.mlp.shared_expert_gate": {
882
+ "group_size": 64,
883
+ "bits": 8
884
+ },
885
+ "language_model.model.layers.48.mlp.gate": {
886
+ "group_size": 64,
887
+ "bits": 8
888
+ },
889
+ "language_model.model.layers.48.mlp.shared_expert_gate": {
890
+ "group_size": 64,
891
+ "bits": 8
892
+ },
893
+ "language_model.model.layers.49.mlp.gate": {
894
+ "group_size": 64,
895
+ "bits": 8
896
+ },
897
+ "language_model.model.layers.49.mlp.shared_expert_gate": {
898
+ "group_size": 64,
899
+ "bits": 8
900
+ },
901
+ "language_model.model.layers.50.mlp.gate": {
902
+ "group_size": 64,
903
+ "bits": 8
904
+ },
905
+ "language_model.model.layers.50.mlp.shared_expert_gate": {
906
+ "group_size": 64,
907
+ "bits": 8
908
+ },
909
+ "language_model.model.layers.51.mlp.gate": {
910
+ "group_size": 64,
911
+ "bits": 8
912
+ },
913
+ "language_model.model.layers.51.mlp.shared_expert_gate": {
914
+ "group_size": 64,
915
+ "bits": 8
916
+ },
917
+ "language_model.model.layers.52.mlp.gate": {
918
+ "group_size": 64,
919
+ "bits": 8
920
+ },
921
+ "language_model.model.layers.52.mlp.shared_expert_gate": {
922
+ "group_size": 64,
923
+ "bits": 8
924
+ },
925
+ "language_model.model.layers.53.mlp.gate": {
926
+ "group_size": 64,
927
+ "bits": 8
928
+ },
929
+ "language_model.model.layers.53.mlp.shared_expert_gate": {
930
+ "group_size": 64,
931
+ "bits": 8
932
+ },
933
+ "language_model.model.layers.54.mlp.gate": {
934
+ "group_size": 64,
935
+ "bits": 8
936
+ },
937
+ "language_model.model.layers.54.mlp.shared_expert_gate": {
938
+ "group_size": 64,
939
+ "bits": 8
940
+ },
941
+ "language_model.model.layers.55.mlp.gate": {
942
+ "group_size": 64,
943
+ "bits": 8
944
+ },
945
+ "language_model.model.layers.55.mlp.shared_expert_gate": {
946
+ "group_size": 64,
947
+ "bits": 8
948
+ },
949
+ "language_model.model.layers.56.mlp.gate": {
950
+ "group_size": 64,
951
+ "bits": 8
952
+ },
953
+ "language_model.model.layers.56.mlp.shared_expert_gate": {
954
+ "group_size": 64,
955
+ "bits": 8
956
+ },
957
+ "language_model.model.layers.57.mlp.gate": {
958
+ "group_size": 64,
959
+ "bits": 8
960
+ },
961
+ "language_model.model.layers.57.mlp.shared_expert_gate": {
962
+ "group_size": 64,
963
+ "bits": 8
964
+ },
965
+ "language_model.model.layers.58.mlp.gate": {
966
+ "group_size": 64,
967
+ "bits": 8
968
+ },
969
+ "language_model.model.layers.58.mlp.shared_expert_gate": {
970
+ "group_size": 64,
971
+ "bits": 8
972
+ },
973
+ "language_model.model.layers.59.mlp.gate": {
974
+ "group_size": 64,
975
+ "bits": 8
976
+ },
977
+ "language_model.model.layers.59.mlp.shared_expert_gate": {
978
+ "group_size": 64,
979
+ "bits": 8
980
+ }
981
+ },
982
+ "text_config": {
983
+ "attention_bias": false,
984
+ "attention_dropout": 0.0,
985
+ "attn_output_gate": true,
986
+ "bos_token_id": null,
987
+ "torch_dtype": "bfloat16",
988
+ "eos_token_id": 248044,
989
+ "full_attention_interval": 4,
990
+ "head_dim": 256,
991
+ "hidden_act": "silu",
992
+ "hidden_size": 4096,
993
+ "initializer_range": 0.02,
994
+ "layer_types": [
995
+ "linear_attention",
996
+ "linear_attention",
997
+ "linear_attention",
998
+ "full_attention",
999
+ "linear_attention",
1000
+ "linear_attention",
1001
+ "linear_attention",
1002
+ "full_attention",
1003
+ "linear_attention",
1004
+ "linear_attention",
1005
+ "linear_attention",
1006
+ "full_attention",
1007
+ "linear_attention",
1008
+ "linear_attention",
1009
+ "linear_attention",
1010
+ "full_attention",
1011
+ "linear_attention",
1012
+ "linear_attention",
1013
+ "linear_attention",
1014
+ "full_attention",
1015
+ "linear_attention",
1016
+ "linear_attention",
1017
+ "linear_attention",
1018
+ "full_attention",
1019
+ "linear_attention",
1020
+ "linear_attention",
1021
+ "linear_attention",
1022
+ "full_attention",
1023
+ "linear_attention",
1024
+ "linear_attention",
1025
+ "linear_attention",
1026
+ "full_attention",
1027
+ "linear_attention",
1028
+ "linear_attention",
1029
+ "linear_attention",
1030
+ "full_attention",
1031
+ "linear_attention",
1032
+ "linear_attention",
1033
+ "linear_attention",
1034
+ "full_attention",
1035
+ "linear_attention",
1036
+ "linear_attention",
1037
+ "linear_attention",
1038
+ "full_attention",
1039
+ "linear_attention",
1040
+ "linear_attention",
1041
+ "linear_attention",
1042
+ "full_attention",
1043
+ "linear_attention",
1044
+ "linear_attention",
1045
+ "linear_attention",
1046
+ "full_attention",
1047
+ "linear_attention",
1048
+ "linear_attention",
1049
+ "linear_attention",
1050
+ "full_attention",
1051
+ "linear_attention",
1052
+ "linear_attention",
1053
+ "linear_attention",
1054
+ "full_attention"
1055
+ ],
1056
+ "linear_conv_kernel_dim": 4,
1057
+ "linear_key_head_dim": 128,
1058
+ "linear_num_key_heads": 16,
1059
+ "linear_num_value_heads": 64,
1060
+ "linear_value_head_dim": 128,
1061
+ "mamba_ssm_dtype": "float32",
1062
+ "max_position_embeddings": 262144,
1063
+ "mlp_only_layers": [],
1064
+ "model_type": "qwen3_5_moe_text",
1065
+ "moe_intermediate_size": 1024,
1066
+ "mtp_num_hidden_layers": 1,
1067
+ "mtp_use_dedicated_embeddings": false,
1068
+ "num_attention_heads": 32,
1069
+ "num_experts": 512,
1070
+ "num_experts_per_tok": 10,
1071
+ "num_hidden_layers": 60,
1072
+ "num_key_value_heads": 2,
1073
+ "output_router_logits": false,
1074
+ "pad_token_id": null,
1075
+ "partial_rotary_factor": 0.25,
1076
+ "rms_norm_eps": 1e-06,
1077
+ "rope_parameters": {
1078
+ "mrope_interleaved": true,
1079
+ "mrope_section": [
1080
+ 11,
1081
+ 11,
1082
+ 10
1083
+ ],
1084
+ "partial_rotary_factor": 0.25,
1085
+ "rope_theta": 10000000,
1086
+ "type": "default"
1087
+ },
1088
+ "router_aux_loss_coef": 0.001,
1089
+ "shared_expert_intermediate_size": 1024,
1090
+ "tie_word_embeddings": false,
1091
+ "use_cache": true,
1092
+ "vocab_size": 248320
1093
+ },
1094
+ "tie_word_embeddings": false,
1095
+ "transformers_version": "5.2.0.dev0",
1096
+ "unsloth_fixed": true,
1097
+ "video_token_id": 248057,
1098
+ "vision_end_token_id": 248054,
1099
+ "vision_start_token_id": 248053
1100
+ }
generation_config.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 248044,
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 248046,
6
+ 248044
7
+ ],
8
+ "pad_token_id": 248044,
9
+ "temperature": 0.6,
10
+ "top_k": 20,
11
+ "top_p": 0.95,
12
+ "transformers_version": "4.57.0.dev0"
13
+ }
model-00001-of-00046.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:456f43273664ed997cfac6afd3f2bf87128a58e446638d22ebd4558dc0e0b065
3
+ size 4340497708
model-00003-of-00046.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:596c6caf10767f1908374ee29d296050621e64071677381c692cf43cec988950
3
+ size 4900154294
model-00004-of-00046.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34e2052a9b7d86701e0545489d59d64989bfde88fc6418bd01beb99f518dc69f
3
+ size 4983411966
model-00005-of-00046.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5bc5539bf604b1ed55831b20372f25ae7cd5f11c3c0fe9337579cb78d63e9d15
3
+ size 4907625938
model-00006-of-00046.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3261a10f09f2d7f6940e487fa386d0a6e28a099a303725ea3ad99b9756b995e
3
+ size 4900154304
model-00008-of-00046.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1466d947b41bf0adec40b83ba757d872231c5de695a113f99bf70024bb9288e
3
+ size 4907625991
model-00009-of-00046.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07cf03eeb0e5330be24517bfd174b74fa390496fa7ad34f2a333ff0448078a94
3
+ size 4900154335
model-00010-of-00046.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9370421c66939b687cb5b80ba13bde37ea7058a5a6ab8c5238ac8d86dff21de
3
+ size 4983412044
model-00011-of-00046.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d6b965542c00d00a830e5c7ac0dc8ba67f214ae9c12909d2563af5f5bc5a6cd
3
+ size 4907626042
model-00012-of-00046.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b41683da8a88716fc09a276ffbc6d5f586b51aab4b50b699d7d92a0382ee68d
3
+ size 4900154341
model-00013-of-00046.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f945159c34c3950861377047d0a76fddf03d52b86f8538a952dd41f501bf1584
3
+ size 4983412006
model-00014-of-00046.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78cb948d81783b7af0adaf26ac0ce3b2a3b2f0947668c7758381c5a7d88f27ca
3
+ size 4907625982
model-00015-of-00046.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0835c4b9255019cf18953e5ddb53cf9ea4b859fb075e123dced68d92006d8ae7
3
+ size 4900154347
model-00016-of-00046.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56a37f302b1a143e2188fa41b1dcca5195831013dde094ac90f21276276e1d23
3
+ size 4983412022
model-00017-of-00046.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ebe9a433436778b03390b191992b3288aab11dfdda0215bd855cacbcbb4fabca
3
+ size 4907625978
model-00018-of-00046.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c6cf934936edc29c7e8ecb9ab269e19d0b372134e20608c17bc6fd0ab90e9fe
3
+ size 4900154337
model-00019-of-00046.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8f67f0e735e6bc7c7302f6de5291c626be427b33cac75b38d21e02b4d66bd87
3
+ size 4983412026
model-00020-of-00046.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75dfea398d0a2ce6f7f7cc1414ab87ad055ae36367afe3854393005e9e48d446
3
+ size 4907626030
model-00021-of-00046.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30b4207945236e718ecf672a027caea75a681ae18675fd0a7380fb7552a28cdb
3
+ size 4900154317
model-00022-of-00046.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2426aeb3633c89d528802a2f1f50e502e393595aad160bf3c8a7e50e91988cf6
3
+ size 4983412020
model-00023-of-00046.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb02b479cc6536a7a9050473900bc9ea57a4d7fe0bb68a0ffe2226ac58d053ad
3
+ size 4907626038
model-00024-of-00046.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da885e4fc646e7f6f07e4d17f8adf430041bc352eb446a0b3eb15e994c4f75b6
3
+ size 4900154347
model-00025-of-00046.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94e1a1a8ab8acffaa5f02cbe1cecee65786e77a08c12f33025f59dbc33029e33
3
+ size 4983412042
model-00026-of-00046.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ecca87d3e1c65e84850288367ec225b3f153a5571f05679a8802f1e02fa2beda
3
+ size 4907626030
model-00027-of-00046.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14d5f543cae9d2632bc3047c9b0de42ae64f6e481ddf31019ab21f34cd009da6
3
+ size 4900154347
model-00028-of-00046.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e832ef6e4e001da92ba51db5fc08f6c96297a713c6da712248de8ffb100e0a08
3
+ size 4983412016
model-00029-of-00046.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8c1ac245616cb92a74555e059e10d14c5cf9dc007756e00ace8cc0146c65988
3
+ size 4907626034
model-00030-of-00046.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd257dff03daa1c8405ab9c1d3c3496c4818a9b0926bcd8bf404d4b4f6d8c18b
3
+ size 4900154331
model-00031-of-00046.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90224fb9892156d31d97dd2b5645e6f770d0d66833e3e9c0ebb7cd72fc63babf
3
+ size 4983412016
model-00032-of-00046.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25b11f7b31b86956085697c0d5ac21d4c5a6ec33357e8dd8ea7c31afafb01b2d
3
+ size 4907625970
model-00033-of-00046.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75a5add9b57b9c94412c67571cda2156ffe15a505fc5943e582eb9b1d493452b
3
+ size 4900154343
model-00034-of-00046.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e122e0b84463927313096721a50e2725989878bbb6c46533966541f7f4f3e27
3
+ size 4983411996
model-00035-of-00046.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0b75f8378deaa6215ad0aef360d8c78309324da4a36a9ec138e5cf805ac6bdc
3
+ size 4907626016
model-00036-of-00046.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b54a7d707f3f4a7d05c855afab589ac8064b1528ce5afc4eb7a1e2e54079a44c
3
+ size 4900154277
model-00037-of-00046.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b79a88926ef85567e9929319aefd053aef0bf2555121b5fde8d98b5c254130c
3
+ size 4983412018
model-00038-of-00046.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da1b469ede82e329ed09c8aee85d8a144281cbe2ca4f968a32d3dc22dc0ff153
3
+ size 4907625958
model-00039-of-00046.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9eb67fc3e83bd0b2381872d2d98c2dad7c4d06647dbf09ef177f1f2f330a7af
3
+ size 4900154343
model-00040-of-00046.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62675676f34af5108094e48de0edcaae330ec6c19076e371f335f0c11b0c92dd
3
+ size 4983412050
model-00041-of-00046.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e60f3b831c327015b7f8e137b31410ef2d9c7ffd700713bb820be6a6c7ab9f8
3
+ size 4907625976
model-00042-of-00046.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13e94fabe68719c6b9a0c14ef94d1dd557409adafc366abd9a6f4c62234e7505
3
+ size 4900154343
model-00044-of-00046.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09a42bfc9c34c4cc07756d9a2b2364629dc4cf5a2dbfd39c66cdb76063f393a2
3
+ size 4907626038
model-00045-of-00046.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd995517c9460cf792adc9a96a4b5ecc95e23f72c0d60602b10110d4ef3b7a1d
3
+ size 4900154347
model-00046-of-00046.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:726cda5891e13d726197cd592f7586d11560db314e1adae6fcd270dddfaaab31
3
+ size 1787181790
model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87a7830d63fcf43bf241c3c5242e96e62dd3fdc29224ca26fed8ea333db72de4
3
+ size 19989343