lmmy commited on
Commit
c9e9a9d
·
verified ·
1 Parent(s): 110e792

Add files using upload-large-folder tool

Browse files
Files changed (50) hide show
  1. .gitattributes +1 -0
  2. README.md +27 -0
  3. added_tokens.json +56 -0
  4. chat_template.jinja +159 -0
  5. config.json +614 -0
  6. generation_config.json +7 -0
  7. merges.txt +0 -0
  8. model-00001-of-00038.safetensors +3 -0
  9. model-00002-of-00038.safetensors +3 -0
  10. model-00003-of-00038.safetensors +3 -0
  11. model-00004-of-00038.safetensors +3 -0
  12. model-00005-of-00038.safetensors +3 -0
  13. model-00006-of-00038.safetensors +3 -0
  14. model-00007-of-00038.safetensors +3 -0
  15. model-00008-of-00038.safetensors +3 -0
  16. model-00009-of-00038.safetensors +3 -0
  17. model-00010-of-00038.safetensors +3 -0
  18. model-00011-of-00038.safetensors +3 -0
  19. model-00012-of-00038.safetensors +3 -0
  20. model-00013-of-00038.safetensors +3 -0
  21. model-00014-of-00038.safetensors +3 -0
  22. model-00015-of-00038.safetensors +3 -0
  23. model-00016-of-00038.safetensors +3 -0
  24. model-00017-of-00038.safetensors +3 -0
  25. model-00018-of-00038.safetensors +3 -0
  26. model-00019-of-00038.safetensors +3 -0
  27. model-00020-of-00038.safetensors +3 -0
  28. model-00021-of-00038.safetensors +3 -0
  29. model-00022-of-00038.safetensors +3 -0
  30. model-00023-of-00038.safetensors +3 -0
  31. model-00024-of-00038.safetensors +3 -0
  32. model-00025-of-00038.safetensors +3 -0
  33. model-00026-of-00038.safetensors +3 -0
  34. model-00027-of-00038.safetensors +3 -0
  35. model-00028-of-00038.safetensors +3 -0
  36. model-00029-of-00038.safetensors +3 -0
  37. model-00030-of-00038.safetensors +3 -0
  38. model-00031-of-00038.safetensors +3 -0
  39. model-00032-of-00038.safetensors +3 -0
  40. model-00033-of-00038.safetensors +3 -0
  41. model-00034-of-00038.safetensors +3 -0
  42. model-00035-of-00038.safetensors +3 -0
  43. model-00036-of-00038.safetensors +3 -0
  44. model-00037-of-00038.safetensors +3 -0
  45. model-00038-of-00038.safetensors +3 -0
  46. model.safetensors.index.json +0 -0
  47. special_tokens_map.json +75 -0
  48. tokenizer.json +3 -0
  49. tokenizer_config.json +497 -0
  50. vocab.json +0 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ pipeline_tag: text-generation
3
+ license: mit
4
+ library_name: transformers
5
+ tags:
6
+ - mlx
7
+ base_model: MiniMaxAI/MiniMax-M2
8
+ ---
9
+ ## 💫 Community Model> MiniMax-M2 by MiniMaxAI
10
+
11
+ _👾 [LM Studio](https://lmstudio.ai) Community models highlights program. Highlighting new & noteworthy models by the community. Join the conversation on [Discord](https://discord.gg/aPQfnNkxGC)_.
12
+
13
+ **Model creator**: [MiniMaxAI](https://huggingface.co/MiniMaxAI)<br>
14
+ **Original model**: [MiniMax-M2](https://huggingface.co/MiniMaxAI/MiniMax-M2)<br>
15
+ **MLX quantization**: provided by [LM Studio team](https://x.com/lmstudio) using [mlx_lm](https://github.com/ml-explore/mlx-lm)<br>
16
+
17
+ ## Technical Details
18
+
19
+ 6-bit quantized version of MiniMax-M2 using MLX, optimized for Apple Silicon.
20
+
21
+ ## Special thanks
22
+
23
+ 🙏 Special thanks to the [Apple Machine Learning Research](https://github.com/ml-explore) team for creating [MLX](https://github.com/ml-explore/mlx).
24
+
25
+ ## Disclaimers
26
+
27
+ LM Studio is not the creator, originator, or owner of any Model featured in the Community Model Program. Each Community Model is created and provided by third parties. LM Studio does not endorse, support, represent or guarantee the completeness, truthfulness, accuracy, or reliability of any Community Model. You understand that Community Models can produce content that might be offensive, harmful, inaccurate or otherwise inappropriate, or deceptive. Each Community Model is the sole responsibility of the person or entity who originated such Model. LM Studio may not monitor or control the Community Models and cannot, and does not, take responsibility for any such Model. LM Studio disclaims all warranties or guarantees about the accuracy, reliability or benefits of the Community Models. LM Studio further disclaims any warranty that the Community Model will meet your requirements, be secure, uninterrupted or available at any time or location, or error-free, viruses-free, or that any errors will be corrected, or otherwise. You will be solely responsible for any damage resulting from your use of or access to the Community Models, your downloading of any Community Model, or use of any other Community Model provided by or through LM Studio.
added_tokens.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</minimax:tool_call>": 200053,
3
+ "</think>": 200051,
4
+ "<add_file>": 200036,
5
+ "<code_context>": 200043,
6
+ "<code_interpreter>": 200023,
7
+ "<commit_after>": 200018,
8
+ "<commit_before>": 200016,
9
+ "<commit_message>": 200040,
10
+ "<commit_msg>": 200017,
11
+ "<delete_file>": 200037,
12
+ "<edit_file>": 200039,
13
+ "<empty_output>": 200015,
14
+ "<empty_source_file>": 200041,
15
+ "<file_content>": 200044,
16
+ "<file_sep>": 200049,
17
+ "<filename>": 200006,
18
+ "<filepath>": 200048,
19
+ "<fim_middle>": 200002,
20
+ "<fim_pad>": 200004,
21
+ "<fim_prefix>": 200001,
22
+ "<fim_suffix>": 200003,
23
+ "<function_call>": 200022,
24
+ "<gh_stars>": 200007,
25
+ "<issue_closed>": 200010,
26
+ "<issue_comment>": 200009,
27
+ "<issue_start>": 200008,
28
+ "<jupyter_code>": 200013,
29
+ "<jupyter_error>": 200035,
30
+ "<jupyter_output>": 200014,
31
+ "<jupyter_start>": 200011,
32
+ "<jupyter_text>": 200012,
33
+ "<minimax:tool_call>": 200052,
34
+ "<pr_start>": 200046,
35
+ "<rename_file>": 200038,
36
+ "<repo_struct>": 200042,
37
+ "<reponame>": 200005,
38
+ "<review_comment>": 200047,
39
+ "<source_files>": 200045,
40
+ "<think>": 200050,
41
+ "[e~[": 200020,
42
+ "]!d~[": 200021,
43
+ "]!p~[": 200000,
44
+ "]<]end of image[>[": 200030,
45
+ "]<]end of speech[>[": 200028,
46
+ "]<]end of video[>[": 200032,
47
+ "]<]image[>[": 200025,
48
+ "]<]speech[>[": 200024,
49
+ "]<]start of image[>[": 200029,
50
+ "]<]start of speech[>[": 200027,
51
+ "]<]start of video[>[": 200031,
52
+ "]<]video[>[": 200026,
53
+ "]<]vision pad[>[": 200033,
54
+ "]~!b[": 200034,
55
+ "]~b]": 200019
56
+ }
chat_template.jinja ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {# ----------‑‑‑ special token variables ‑‑‑---------- #}
2
+ {%- set toolcall_begin_token = '<minimax:tool_call>' -%}
3
+ {%- set toolcall_end_token = '</minimax:tool_call>' -%}
4
+ {#- Tool Rendering Functions ============================================== -#}
5
+ {%- macro render_tool_namespace(namespace_name, tool_list) -%}
6
+ {%- for tool in tool_list -%}
7
+ <tool>{{ tool.function | tojson(ensure_ascii=False) }}</tool>
8
+ {% endfor -%}
9
+ {%- endmacro -%}
10
+ {%- macro visible_text(content) -%}
11
+ {%- if content is string -%}
12
+ {{ content }}
13
+ {%- elif content is iterable and content is not mapping -%}
14
+ {%- for item in content -%}
15
+ {%- if item is mapping and item.type == 'text' -%}
16
+ {{- item.text }}
17
+ {%- elif item is string -%}
18
+ {{- item }}
19
+ {%- endif -%}
20
+ {%- endfor -%}
21
+ {%- else -%}
22
+ {{- content }}
23
+ {%- endif -%}
24
+ {%- endmacro -%}
25
+ {#- System Message Construction ============================================ -#}
26
+ {%- macro build_system_message(system_message) -%}
27
+ {%- if system_message and system_message.content -%}
28
+ {{- visible_text(system_message.content) }}
29
+ {%- else -%}
30
+ {%- if model_identity is not defined -%}
31
+ {%- set model_identity = "You are a helpful assistant." -%}
32
+ {%- endif -%}
33
+ {{- model_identity }}
34
+ {%- endif -%}
35
+
36
+ {#- Handle current_date -#}
37
+ {%- if system_message and system_message.current_date -%}
38
+ {{- '\n' ~ 'Current date: ' + system_message.current_date }}
39
+ {%- endif -%}
40
+ {#- Handle current_location -#}
41
+ {%- if system_message and system_message.current_location -%}
42
+ {{- '\n' ~ 'Current location: ' + system_message.current_location }}
43
+ {%- endif -%}
44
+ {%- endmacro -%}
45
+ {#- Main Template Logic ================================================= -#}
46
+ {#- Extract system message (only first message if it's system) -#}
47
+ {%- set system_message = none -%}
48
+ {%- set conversation_messages = messages -%}
49
+ {%- if messages and messages[0].role == "system" -%}
50
+ {%- set system_message = messages[0] -%}
51
+ {%- set conversation_messages = messages[1:] -%}
52
+ {%- endif -%}
53
+ {#- Get the last user message turn, for interleved thinking -#}
54
+ {%- set ns = namespace(last_user_index=-1) %}
55
+ {% for m in conversation_messages %}
56
+ {%- if m.role == 'user' %}
57
+ {% set ns.last_user_index = loop.index0 -%}
58
+ {%- endif %}
59
+ {%- endfor %}
60
+ {#- Render system message -#}
61
+ {{- ']~!b[' ~ ']~b]system' ~ '\n' }}
62
+ {{- build_system_message(system_message) }}
63
+ {#- Render tools if available -#}
64
+ {%- if tools -%}
65
+ {{- '\n\n' ~ '# Tools' ~ '\n' ~ 'You may call one or more tools to assist with the user query.\nHere are the tools available in JSONSchema format:' ~ '\n' }}
66
+ {{- '\n' ~ '<tools>' ~ '\n' }}
67
+ {{- render_tool_namespace("functions", tools) }}
68
+ {{- '</tools>' ~ '\n\n' }}
69
+ {{- 'When making tool calls, use XML format to invoke tools and pass parameters:' ~ '\n' }}
70
+ {{- '\n' ~ toolcall_begin_token }}
71
+ <invoke name="tool-name-1">
72
+ <parameter name="param-key-1">param-value-1</parameter>
73
+ <parameter name="param-key-2">param-value-2</parameter>
74
+ ...
75
+ </invoke>
76
+ {{- '\n' ~ toolcall_end_token }}
77
+ {%- endif -%}
78
+ {{- '[e~[\n' }}
79
+
80
+ {#- Render messages -#}
81
+ {%- set last_tool_call = namespace(name=none) -%}
82
+ {%- for message in conversation_messages -%}
83
+ {%- if message.role == 'assistant' -%}
84
+ {#- Only render reasoning_content if no user message follows -#}
85
+ {{- ']~b]ai' ~ '\n' }}
86
+
87
+ {%- set reasoning_content = '' %}
88
+ {%- set content = visible_text(message.content) %}
89
+ {%- if message.reasoning_content is string %}
90
+ {%- set reasoning_content = message.reasoning_content %}
91
+ {%- else %}
92
+ {%- if '</think>' in content %}
93
+ {%- set reasoning_content = content.split('</think>')[0].strip('\n').split('<think>')[-1].strip('\n') %}
94
+ {%- set content = content.split('</think>')[-1].strip('\n') %}
95
+ {%- endif %}
96
+ {%- endif %}
97
+ {%- if reasoning_content and loop.index0 > ns.last_user_index -%}
98
+ {{- '<think>' ~ '\n' ~ reasoning_content ~ '\n' ~ '</think>' ~ '\n\n' }}
99
+ {%- endif -%}
100
+ {%- if content -%}
101
+ {{- content }}
102
+ {%- endif -%}
103
+ {%- if message.tool_calls -%}
104
+ {{- '\n' ~ toolcall_begin_token ~ '\n' }}
105
+
106
+ {%- for tool_call in message.tool_calls -%}
107
+ {%- if tool_call.function %}
108
+ {%- set tool_call = tool_call.function %}
109
+ {%- endif %}
110
+ {{- '<invoke name="' + tool_call.name + '">' }}
111
+ {% set _args = tool_call.arguments %}
112
+ {%- for k, v in _args.items() %}
113
+ {{- '<parameter name="' + k + '">' }}
114
+ {{- v | tojson(ensure_ascii=False) if v is not string else v }}
115
+ {{- '</parameter>' }}
116
+ {% endfor %}
117
+ {{- '</invoke>' ~ '\n' }}
118
+ {%- endfor -%}
119
+
120
+ {{- toolcall_end_token}}
121
+ {%- set last_tool_call.name = message.tool_calls[-1].name -%}
122
+ {%- else -%}
123
+ {%- set last_tool_call.name = none -%}
124
+ {%- endif -%}
125
+ {{- '[e~[' ~ '\n' }}
126
+
127
+ {%- elif message.role == 'tool' -%}
128
+ {%- if last_tool_call.name is none -%}
129
+ {{- raise_exception("Message has tool role, but there was no previous assistant message with a tool call!") }}
130
+ {%- endif -%}
131
+ {%- if loop.first or (conversation_messages[loop.index0 - 1].role != 'tool') -%}
132
+ {{- ']~b]tool' }}
133
+ {%- endif -%}
134
+ {%- if message.content is string -%}
135
+ {{- '\n<response>' }}
136
+ {{- message.content }}
137
+ {{- '</response>' }}
138
+ {%- else -%}
139
+ {%- for tr in message.content -%}
140
+ {{- '\n<response>' }}
141
+ {{- tr.output if tr.output is defined else (tr.text if tr.type == 'text' and tr.text is defined else tr) }}
142
+ {{- '\n</response>' }}
143
+ {%- endfor -%}
144
+ {%- endif -%}
145
+ {%- if loop.last or (conversation_messages[loop.index0 + 1].role != 'tool') -%}
146
+ {{- '[e~[\n' -}}
147
+ {%- endif -%}
148
+
149
+ {%- elif message.role == 'user' -%}
150
+ {{- ']~b]user' ~ '\n' }}
151
+ {{- visible_text(message.content) }}
152
+ {{- '[e~[' ~ '\n' }}
153
+ {%- endif -%}
154
+ {%- endfor -%}
155
+
156
+ {#- Generation prompt -#}
157
+ {%- if add_generation_prompt -%}
158
+ {{- ']~b]ai' ~ '\n' ~ '<think>' ~ '\n' }}
159
+ {%- endif -%}
config.json ADDED
@@ -0,0 +1,614 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "MiniMaxM2ForCausalLM"
4
+ ],
5
+ "attention_dropout": 0.0,
6
+ "attn_type_list": [
7
+ 1,
8
+ 1,
9
+ 1,
10
+ 1,
11
+ 1,
12
+ 1,
13
+ 1,
14
+ 1,
15
+ 1,
16
+ 1,
17
+ 1,
18
+ 1,
19
+ 1,
20
+ 1,
21
+ 1,
22
+ 1,
23
+ 1,
24
+ 1,
25
+ 1,
26
+ 1,
27
+ 1,
28
+ 1,
29
+ 1,
30
+ 1,
31
+ 1,
32
+ 1,
33
+ 1,
34
+ 1,
35
+ 1,
36
+ 1,
37
+ 1,
38
+ 1,
39
+ 1,
40
+ 1,
41
+ 1,
42
+ 1,
43
+ 1,
44
+ 1,
45
+ 1,
46
+ 1,
47
+ 1,
48
+ 1,
49
+ 1,
50
+ 1,
51
+ 1,
52
+ 1,
53
+ 1,
54
+ 1,
55
+ 1,
56
+ 1,
57
+ 1,
58
+ 1,
59
+ 1,
60
+ 1,
61
+ 1,
62
+ 1,
63
+ 1,
64
+ 1,
65
+ 1,
66
+ 1,
67
+ 1,
68
+ 1
69
+ ],
70
+ "bos_token_id": null,
71
+ "eos_token_id": null,
72
+ "head_dim": 128,
73
+ "hidden_act": "silu",
74
+ "hidden_size": 3072,
75
+ "initializer_range": 0.02,
76
+ "intermediate_size": 1536,
77
+ "layernorm_full_attention_beta": 1.0,
78
+ "layernorm_linear_attention_beta": 1.0,
79
+ "layernorm_mlp_beta": 1.0,
80
+ "max_position_embeddings": 196608,
81
+ "mlp_intermediate_size": 8192,
82
+ "model_type": "minimax",
83
+ "mtp_transformer_layers": 1,
84
+ "num_attention_heads": 48,
85
+ "num_experts_per_tok": 8,
86
+ "num_hidden_layers": 62,
87
+ "num_key_value_heads": 8,
88
+ "num_local_experts": 256,
89
+ "num_mtp_modules": 3,
90
+ "output_router_logits": false,
91
+ "qk_norm_type": "per_layer",
92
+ "quantization": {
93
+ "group_size": 64,
94
+ "bits": 6,
95
+ "mode": "affine",
96
+ "model.layers.0.block_sparse_moe.gate": {
97
+ "group_size": 64,
98
+ "bits": 8
99
+ },
100
+ "model.layers.1.block_sparse_moe.gate": {
101
+ "group_size": 64,
102
+ "bits": 8
103
+ },
104
+ "model.layers.2.block_sparse_moe.gate": {
105
+ "group_size": 64,
106
+ "bits": 8
107
+ },
108
+ "model.layers.3.block_sparse_moe.gate": {
109
+ "group_size": 64,
110
+ "bits": 8
111
+ },
112
+ "model.layers.4.block_sparse_moe.gate": {
113
+ "group_size": 64,
114
+ "bits": 8
115
+ },
116
+ "model.layers.5.block_sparse_moe.gate": {
117
+ "group_size": 64,
118
+ "bits": 8
119
+ },
120
+ "model.layers.6.block_sparse_moe.gate": {
121
+ "group_size": 64,
122
+ "bits": 8
123
+ },
124
+ "model.layers.7.block_sparse_moe.gate": {
125
+ "group_size": 64,
126
+ "bits": 8
127
+ },
128
+ "model.layers.8.block_sparse_moe.gate": {
129
+ "group_size": 64,
130
+ "bits": 8
131
+ },
132
+ "model.layers.9.block_sparse_moe.gate": {
133
+ "group_size": 64,
134
+ "bits": 8
135
+ },
136
+ "model.layers.10.block_sparse_moe.gate": {
137
+ "group_size": 64,
138
+ "bits": 8
139
+ },
140
+ "model.layers.11.block_sparse_moe.gate": {
141
+ "group_size": 64,
142
+ "bits": 8
143
+ },
144
+ "model.layers.12.block_sparse_moe.gate": {
145
+ "group_size": 64,
146
+ "bits": 8
147
+ },
148
+ "model.layers.13.block_sparse_moe.gate": {
149
+ "group_size": 64,
150
+ "bits": 8
151
+ },
152
+ "model.layers.14.block_sparse_moe.gate": {
153
+ "group_size": 64,
154
+ "bits": 8
155
+ },
156
+ "model.layers.15.block_sparse_moe.gate": {
157
+ "group_size": 64,
158
+ "bits": 8
159
+ },
160
+ "model.layers.16.block_sparse_moe.gate": {
161
+ "group_size": 64,
162
+ "bits": 8
163
+ },
164
+ "model.layers.17.block_sparse_moe.gate": {
165
+ "group_size": 64,
166
+ "bits": 8
167
+ },
168
+ "model.layers.18.block_sparse_moe.gate": {
169
+ "group_size": 64,
170
+ "bits": 8
171
+ },
172
+ "model.layers.19.block_sparse_moe.gate": {
173
+ "group_size": 64,
174
+ "bits": 8
175
+ },
176
+ "model.layers.20.block_sparse_moe.gate": {
177
+ "group_size": 64,
178
+ "bits": 8
179
+ },
180
+ "model.layers.21.block_sparse_moe.gate": {
181
+ "group_size": 64,
182
+ "bits": 8
183
+ },
184
+ "model.layers.22.block_sparse_moe.gate": {
185
+ "group_size": 64,
186
+ "bits": 8
187
+ },
188
+ "model.layers.23.block_sparse_moe.gate": {
189
+ "group_size": 64,
190
+ "bits": 8
191
+ },
192
+ "model.layers.24.block_sparse_moe.gate": {
193
+ "group_size": 64,
194
+ "bits": 8
195
+ },
196
+ "model.layers.25.block_sparse_moe.gate": {
197
+ "group_size": 64,
198
+ "bits": 8
199
+ },
200
+ "model.layers.26.block_sparse_moe.gate": {
201
+ "group_size": 64,
202
+ "bits": 8
203
+ },
204
+ "model.layers.27.block_sparse_moe.gate": {
205
+ "group_size": 64,
206
+ "bits": 8
207
+ },
208
+ "model.layers.28.block_sparse_moe.gate": {
209
+ "group_size": 64,
210
+ "bits": 8
211
+ },
212
+ "model.layers.29.block_sparse_moe.gate": {
213
+ "group_size": 64,
214
+ "bits": 8
215
+ },
216
+ "model.layers.30.block_sparse_moe.gate": {
217
+ "group_size": 64,
218
+ "bits": 8
219
+ },
220
+ "model.layers.31.block_sparse_moe.gate": {
221
+ "group_size": 64,
222
+ "bits": 8
223
+ },
224
+ "model.layers.32.block_sparse_moe.gate": {
225
+ "group_size": 64,
226
+ "bits": 8
227
+ },
228
+ "model.layers.33.block_sparse_moe.gate": {
229
+ "group_size": 64,
230
+ "bits": 8
231
+ },
232
+ "model.layers.34.block_sparse_moe.gate": {
233
+ "group_size": 64,
234
+ "bits": 8
235
+ },
236
+ "model.layers.35.block_sparse_moe.gate": {
237
+ "group_size": 64,
238
+ "bits": 8
239
+ },
240
+ "model.layers.36.block_sparse_moe.gate": {
241
+ "group_size": 64,
242
+ "bits": 8
243
+ },
244
+ "model.layers.37.block_sparse_moe.gate": {
245
+ "group_size": 64,
246
+ "bits": 8
247
+ },
248
+ "model.layers.38.block_sparse_moe.gate": {
249
+ "group_size": 64,
250
+ "bits": 8
251
+ },
252
+ "model.layers.39.block_sparse_moe.gate": {
253
+ "group_size": 64,
254
+ "bits": 8
255
+ },
256
+ "model.layers.40.block_sparse_moe.gate": {
257
+ "group_size": 64,
258
+ "bits": 8
259
+ },
260
+ "model.layers.41.block_sparse_moe.gate": {
261
+ "group_size": 64,
262
+ "bits": 8
263
+ },
264
+ "model.layers.42.block_sparse_moe.gate": {
265
+ "group_size": 64,
266
+ "bits": 8
267
+ },
268
+ "model.layers.43.block_sparse_moe.gate": {
269
+ "group_size": 64,
270
+ "bits": 8
271
+ },
272
+ "model.layers.44.block_sparse_moe.gate": {
273
+ "group_size": 64,
274
+ "bits": 8
275
+ },
276
+ "model.layers.45.block_sparse_moe.gate": {
277
+ "group_size": 64,
278
+ "bits": 8
279
+ },
280
+ "model.layers.46.block_sparse_moe.gate": {
281
+ "group_size": 64,
282
+ "bits": 8
283
+ },
284
+ "model.layers.47.block_sparse_moe.gate": {
285
+ "group_size": 64,
286
+ "bits": 8
287
+ },
288
+ "model.layers.48.block_sparse_moe.gate": {
289
+ "group_size": 64,
290
+ "bits": 8
291
+ },
292
+ "model.layers.49.block_sparse_moe.gate": {
293
+ "group_size": 64,
294
+ "bits": 8
295
+ },
296
+ "model.layers.50.block_sparse_moe.gate": {
297
+ "group_size": 64,
298
+ "bits": 8
299
+ },
300
+ "model.layers.51.block_sparse_moe.gate": {
301
+ "group_size": 64,
302
+ "bits": 8
303
+ },
304
+ "model.layers.52.block_sparse_moe.gate": {
305
+ "group_size": 64,
306
+ "bits": 8
307
+ },
308
+ "model.layers.53.block_sparse_moe.gate": {
309
+ "group_size": 64,
310
+ "bits": 8
311
+ },
312
+ "model.layers.54.block_sparse_moe.gate": {
313
+ "group_size": 64,
314
+ "bits": 8
315
+ },
316
+ "model.layers.55.block_sparse_moe.gate": {
317
+ "group_size": 64,
318
+ "bits": 8
319
+ },
320
+ "model.layers.56.block_sparse_moe.gate": {
321
+ "group_size": 64,
322
+ "bits": 8
323
+ },
324
+ "model.layers.57.block_sparse_moe.gate": {
325
+ "group_size": 64,
326
+ "bits": 8
327
+ },
328
+ "model.layers.58.block_sparse_moe.gate": {
329
+ "group_size": 64,
330
+ "bits": 8
331
+ },
332
+ "model.layers.59.block_sparse_moe.gate": {
333
+ "group_size": 64,
334
+ "bits": 8
335
+ },
336
+ "model.layers.60.block_sparse_moe.gate": {
337
+ "group_size": 64,
338
+ "bits": 8
339
+ },
340
+ "model.layers.61.block_sparse_moe.gate": {
341
+ "group_size": 64,
342
+ "bits": 8
343
+ }
344
+ },
345
+ "quantization_config": {
346
+ "group_size": 64,
347
+ "bits": 6,
348
+ "mode": "affine",
349
+ "model.layers.0.block_sparse_moe.gate": {
350
+ "group_size": 64,
351
+ "bits": 8
352
+ },
353
+ "model.layers.1.block_sparse_moe.gate": {
354
+ "group_size": 64,
355
+ "bits": 8
356
+ },
357
+ "model.layers.2.block_sparse_moe.gate": {
358
+ "group_size": 64,
359
+ "bits": 8
360
+ },
361
+ "model.layers.3.block_sparse_moe.gate": {
362
+ "group_size": 64,
363
+ "bits": 8
364
+ },
365
+ "model.layers.4.block_sparse_moe.gate": {
366
+ "group_size": 64,
367
+ "bits": 8
368
+ },
369
+ "model.layers.5.block_sparse_moe.gate": {
370
+ "group_size": 64,
371
+ "bits": 8
372
+ },
373
+ "model.layers.6.block_sparse_moe.gate": {
374
+ "group_size": 64,
375
+ "bits": 8
376
+ },
377
+ "model.layers.7.block_sparse_moe.gate": {
378
+ "group_size": 64,
379
+ "bits": 8
380
+ },
381
+ "model.layers.8.block_sparse_moe.gate": {
382
+ "group_size": 64,
383
+ "bits": 8
384
+ },
385
+ "model.layers.9.block_sparse_moe.gate": {
386
+ "group_size": 64,
387
+ "bits": 8
388
+ },
389
+ "model.layers.10.block_sparse_moe.gate": {
390
+ "group_size": 64,
391
+ "bits": 8
392
+ },
393
+ "model.layers.11.block_sparse_moe.gate": {
394
+ "group_size": 64,
395
+ "bits": 8
396
+ },
397
+ "model.layers.12.block_sparse_moe.gate": {
398
+ "group_size": 64,
399
+ "bits": 8
400
+ },
401
+ "model.layers.13.block_sparse_moe.gate": {
402
+ "group_size": 64,
403
+ "bits": 8
404
+ },
405
+ "model.layers.14.block_sparse_moe.gate": {
406
+ "group_size": 64,
407
+ "bits": 8
408
+ },
409
+ "model.layers.15.block_sparse_moe.gate": {
410
+ "group_size": 64,
411
+ "bits": 8
412
+ },
413
+ "model.layers.16.block_sparse_moe.gate": {
414
+ "group_size": 64,
415
+ "bits": 8
416
+ },
417
+ "model.layers.17.block_sparse_moe.gate": {
418
+ "group_size": 64,
419
+ "bits": 8
420
+ },
421
+ "model.layers.18.block_sparse_moe.gate": {
422
+ "group_size": 64,
423
+ "bits": 8
424
+ },
425
+ "model.layers.19.block_sparse_moe.gate": {
426
+ "group_size": 64,
427
+ "bits": 8
428
+ },
429
+ "model.layers.20.block_sparse_moe.gate": {
430
+ "group_size": 64,
431
+ "bits": 8
432
+ },
433
+ "model.layers.21.block_sparse_moe.gate": {
434
+ "group_size": 64,
435
+ "bits": 8
436
+ },
437
+ "model.layers.22.block_sparse_moe.gate": {
438
+ "group_size": 64,
439
+ "bits": 8
440
+ },
441
+ "model.layers.23.block_sparse_moe.gate": {
442
+ "group_size": 64,
443
+ "bits": 8
444
+ },
445
+ "model.layers.24.block_sparse_moe.gate": {
446
+ "group_size": 64,
447
+ "bits": 8
448
+ },
449
+ "model.layers.25.block_sparse_moe.gate": {
450
+ "group_size": 64,
451
+ "bits": 8
452
+ },
453
+ "model.layers.26.block_sparse_moe.gate": {
454
+ "group_size": 64,
455
+ "bits": 8
456
+ },
457
+ "model.layers.27.block_sparse_moe.gate": {
458
+ "group_size": 64,
459
+ "bits": 8
460
+ },
461
+ "model.layers.28.block_sparse_moe.gate": {
462
+ "group_size": 64,
463
+ "bits": 8
464
+ },
465
+ "model.layers.29.block_sparse_moe.gate": {
466
+ "group_size": 64,
467
+ "bits": 8
468
+ },
469
+ "model.layers.30.block_sparse_moe.gate": {
470
+ "group_size": 64,
471
+ "bits": 8
472
+ },
473
+ "model.layers.31.block_sparse_moe.gate": {
474
+ "group_size": 64,
475
+ "bits": 8
476
+ },
477
+ "model.layers.32.block_sparse_moe.gate": {
478
+ "group_size": 64,
479
+ "bits": 8
480
+ },
481
+ "model.layers.33.block_sparse_moe.gate": {
482
+ "group_size": 64,
483
+ "bits": 8
484
+ },
485
+ "model.layers.34.block_sparse_moe.gate": {
486
+ "group_size": 64,
487
+ "bits": 8
488
+ },
489
+ "model.layers.35.block_sparse_moe.gate": {
490
+ "group_size": 64,
491
+ "bits": 8
492
+ },
493
+ "model.layers.36.block_sparse_moe.gate": {
494
+ "group_size": 64,
495
+ "bits": 8
496
+ },
497
+ "model.layers.37.block_sparse_moe.gate": {
498
+ "group_size": 64,
499
+ "bits": 8
500
+ },
501
+ "model.layers.38.block_sparse_moe.gate": {
502
+ "group_size": 64,
503
+ "bits": 8
504
+ },
505
+ "model.layers.39.block_sparse_moe.gate": {
506
+ "group_size": 64,
507
+ "bits": 8
508
+ },
509
+ "model.layers.40.block_sparse_moe.gate": {
510
+ "group_size": 64,
511
+ "bits": 8
512
+ },
513
+ "model.layers.41.block_sparse_moe.gate": {
514
+ "group_size": 64,
515
+ "bits": 8
516
+ },
517
+ "model.layers.42.block_sparse_moe.gate": {
518
+ "group_size": 64,
519
+ "bits": 8
520
+ },
521
+ "model.layers.43.block_sparse_moe.gate": {
522
+ "group_size": 64,
523
+ "bits": 8
524
+ },
525
+ "model.layers.44.block_sparse_moe.gate": {
526
+ "group_size": 64,
527
+ "bits": 8
528
+ },
529
+ "model.layers.45.block_sparse_moe.gate": {
530
+ "group_size": 64,
531
+ "bits": 8
532
+ },
533
+ "model.layers.46.block_sparse_moe.gate": {
534
+ "group_size": 64,
535
+ "bits": 8
536
+ },
537
+ "model.layers.47.block_sparse_moe.gate": {
538
+ "group_size": 64,
539
+ "bits": 8
540
+ },
541
+ "model.layers.48.block_sparse_moe.gate": {
542
+ "group_size": 64,
543
+ "bits": 8
544
+ },
545
+ "model.layers.49.block_sparse_moe.gate": {
546
+ "group_size": 64,
547
+ "bits": 8
548
+ },
549
+ "model.layers.50.block_sparse_moe.gate": {
550
+ "group_size": 64,
551
+ "bits": 8
552
+ },
553
+ "model.layers.51.block_sparse_moe.gate": {
554
+ "group_size": 64,
555
+ "bits": 8
556
+ },
557
+ "model.layers.52.block_sparse_moe.gate": {
558
+ "group_size": 64,
559
+ "bits": 8
560
+ },
561
+ "model.layers.53.block_sparse_moe.gate": {
562
+ "group_size": 64,
563
+ "bits": 8
564
+ },
565
+ "model.layers.54.block_sparse_moe.gate": {
566
+ "group_size": 64,
567
+ "bits": 8
568
+ },
569
+ "model.layers.55.block_sparse_moe.gate": {
570
+ "group_size": 64,
571
+ "bits": 8
572
+ },
573
+ "model.layers.56.block_sparse_moe.gate": {
574
+ "group_size": 64,
575
+ "bits": 8
576
+ },
577
+ "model.layers.57.block_sparse_moe.gate": {
578
+ "group_size": 64,
579
+ "bits": 8
580
+ },
581
+ "model.layers.58.block_sparse_moe.gate": {
582
+ "group_size": 64,
583
+ "bits": 8
584
+ },
585
+ "model.layers.59.block_sparse_moe.gate": {
586
+ "group_size": 64,
587
+ "bits": 8
588
+ },
589
+ "model.layers.60.block_sparse_moe.gate": {
590
+ "group_size": 64,
591
+ "bits": 8
592
+ },
593
+ "model.layers.61.block_sparse_moe.gate": {
594
+ "group_size": 64,
595
+ "bits": 8
596
+ }
597
+ },
598
+ "rms_norm_eps": 1e-06,
599
+ "rope_theta": 5000000,
600
+ "rotary_dim": 64,
601
+ "router_aux_loss_coef": 0.001,
602
+ "router_jitter_noise": 0.0,
603
+ "scoring_func": "sigmoid",
604
+ "shared_intermediate_size": 0,
605
+ "shared_moe_mode": "sigmoid",
606
+ "sliding_window": null,
607
+ "tie_word_embeddings": false,
608
+ "transformers_version": "4.46.1",
609
+ "use_cache": true,
610
+ "use_mtp": true,
611
+ "use_qk_norm": true,
612
+ "use_routing_bias": true,
613
+ "vocab_size": 200064
614
+ }
generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_sample": true,
3
+ "temperature": 1.0,
4
+ "top_p": 0.95,
5
+ "top_k": 40,
6
+ "transformers_version": "4.46.1"
7
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model-00001-of-00038.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d132f3f45b669b687df185c1f8664a255eb3217fb4de74686d4746e040f6099b
3
+ size 4498611286
model-00002-of-00038.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:585ec5cc3024d5425b87488e732005fb15a8a3f120a8b8eb872ae57c08493f68
3
+ size 4980732462
model-00003-of-00038.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb9e7528ea5afdd712ce860d7554c0da539924c02c7d74e4aab507bb3966b04d
3
+ size 4944035148
model-00004-of-00038.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ecf4dc0cc50a5a71a29fdb329337c060cb72ccaeb6325e7120f9df0fc54e6285
3
+ size 4980732494
model-00005-of-00038.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14e13e2a9fb42ee57d0b8286a7d82b4366bcc066051a9d76cc06b898e7733c52
3
+ size 4980732440
model-00006-of-00038.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9cb379ce0497a3a9482b4934d5a52fe66915d8a813ee0685f49a233d033619fe
3
+ size 4944035148
model-00007-of-00038.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0acc3bc7748ca4d912addf995084328920d9337201e246a4cd7548099bfab619
3
+ size 4980732543
model-00008-of-00038.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76b15447a277ced959c9fce219e66e4dc66f3756ecac458d80f88f6548ca0386
3
+ size 4980732511
model-00009-of-00038.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:492fbe4e41d20d0ec22edff0b89257606cab8f16b7b42ab42484565906cfc661
3
+ size 4944035183
model-00010-of-00038.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d5580b98c2866f3b9891e564c103a0ef6618cd12d07d481400b2f8836b05f058
3
+ size 4980732533
model-00011-of-00038.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e86ccd87baf3791c93a79098ea6d263a543ca900d66f300ade171a824c91b2c
3
+ size 4980732501
model-00012-of-00038.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:392f5dc656c7e882d72f926b13f721f2228ed99c945461de1d45f053933918c8
3
+ size 4944035153
model-00013-of-00038.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65065e81ac815d0ec69b70f3073b18d84423db0dc8b651e0790d73cc90e64c3c
3
+ size 4980732517
model-00014-of-00038.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5af2c5ae5c040a4608de192cf524d728f9019c81bbe206cb53bfbb93431fc24
3
+ size 4980732513
model-00015-of-00038.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3e42e69ab3c4890b5a4aabc61460f2611d3a8836b6b71a0f687df2a786ba956
3
+ size 4944035183
model-00016-of-00038.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b392c6a3390007463cb5b62a324eca66431e62ed6b05ffe57ba50c559091bb9
3
+ size 4980732545
model-00017-of-00038.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84f5f9e80a5ed182849269fef314defe45340924fdcf355fd86da819150fda68
3
+ size 4980732511
model-00018-of-00038.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92922013ea374800ecb6760e29b14967ebeb662a94e966418bb0f2b774442f34
3
+ size 4944035183
model-00019-of-00038.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3979a295f28ccd2295cf95103f342331d46e7f8f453b5165411506f0e2c7a14
3
+ size 4980732515
model-00020-of-00038.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a9d4959fe9f5c262df41cee3d05f32e3bb87f63612ebd63bd8536774d63739c
3
+ size 4980732513
model-00021-of-00038.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b16143b39042d3ec0d1370c2f0d16620305a792d879b11e706ac16f591d6b939
3
+ size 4944035175
model-00022-of-00038.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0b3cc63114f085e4da179006306287ba559265958029072eb7da1288e3ca1fb
3
+ size 4980732545
model-00023-of-00038.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51fae3d83a6955b6906e2474229f5c524ba0cf45bfa43190d5deaf6365347a5f
3
+ size 4980732493
model-00024-of-00038.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ea57b899408f51f6965885cc64d03a4377c84da84a90d8717a38decec37cac9
3
+ size 4944035173
model-00025-of-00038.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:922e77ac5ec7eb71eb31c0608af1550d247cdaa264c991ea179068f2eda8b92c
3
+ size 4980732553
model-00026-of-00038.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0995a21a08bc677c1c7faf753f42d5dd073636acec247efa69111a5eae1b500
3
+ size 4980732497
model-00027-of-00038.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ffaa3c59793a5413f8be7b0301b067765d8ef2baada49142662925dc4f8bbdb
3
+ size 4944035181
model-00028-of-00038.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c60f4cecc0b39e7e059d9bafecb739d3210182b305f72ad804bf6388c6aae539
3
+ size 4980732561
model-00029-of-00038.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b83da9e839a3835fa638ae1ce14d0c19fb837b26d7ca94f597665ea06b5c5ac
3
+ size 4980732505
model-00030-of-00038.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca8930769abfcbba15c03e6bd1c07dda302722cbef9ea9b2a5ae228a9512bac1
3
+ size 4944035169
model-00031-of-00038.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9767c1c00f0ac795614d3eb243e1043ef8d98df070b6f4deec793974169a09d3
3
+ size 4980732535
model-00032-of-00038.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9287bd5f61a6e042f38d08c6193248779b6733d36b2194e1491c6d211d08419c
3
+ size 4980732489
model-00033-of-00038.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e201fa92ae674c15c9b8a4e73d9ad54b992b8fe30d2e5484b9b9d33b8cf4bb3
3
+ size 4944035187
model-00034-of-00038.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69143bfc34eb6ab0d26cd3310af1b19ef8c6cf8549d755776984ad317d193b11
3
+ size 4980732551
model-00035-of-00038.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1aa34c4777e93e6f833604b8055de4ce3ba2667f8bae7588de30befbf4b318d1
3
+ size 4980732489
model-00036-of-00038.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e9373ed9e2e4593ef0d7b4deb1291fa0f6757e76639b7f42b13b0d447696da4
3
+ size 4944035185
model-00037-of-00038.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8fb67536eafcf5dc2b7575fe0254fdea23f6e949b811cc47603ce0a938e56e9
3
+ size 4980732521
model-00038-of-00038.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7480f64d9d77a0ca39f43f571dbeccb2e1a70be0868d082fc93724f5a6c9e0c
3
+ size 2462315047
model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
special_tokens_map.json ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<code_interpreter>",
4
+ "<commit_after>",
5
+ "<commit_before>",
6
+ "<commit_msg>",
7
+ "<empty_output>",
8
+ "<filename>",
9
+ "<fim_middle>",
10
+ "<fim_pad>",
11
+ "<fim_prefix>",
12
+ "<fim_suffix>",
13
+ "<function_call>",
14
+ "<gh_stars>",
15
+ "]<]speech[>[",
16
+ "]<]image[>[",
17
+ "]<]video[>[",
18
+ "]<]start of speech[>[",
19
+ "]<]end of speech[>[",
20
+ "]<]start of image[>[",
21
+ "]<]end of image[>[",
22
+ "]<]start of video[>[",
23
+ "]<]end of video[>[",
24
+ "]<]vision pad[>[",
25
+ "]~!b[",
26
+ "<issue_closed>",
27
+ "<issue_comment>",
28
+ "<issue_start>",
29
+ "<jupyter_code>",
30
+ "<jupyter_output>",
31
+ "<jupyter_start>",
32
+ "<jupyter_text>",
33
+ "<reponame>",
34
+ "[e~[",
35
+ "]!d~[",
36
+ "]!p~[",
37
+ "]~b]",
38
+ "<jupyter_error>",
39
+ "<add_file>",
40
+ "<delete_file>",
41
+ "<rename_file>",
42
+ "<edit_file>",
43
+ "<commit_message>",
44
+ "<empty_source_file>",
45
+ "<repo_struct>",
46
+ "<code_context>",
47
+ "<file_content>",
48
+ "<source_files>",
49
+ "<pr_start>",
50
+ "<review_comment>",
51
+ "<filepath>",
52
+ "<file_sep>"
53
+ ],
54
+ "bos_token": {
55
+ "content": "]~!b[",
56
+ "lstrip": false,
57
+ "normalized": false,
58
+ "rstrip": false,
59
+ "single_word": false
60
+ },
61
+ "eos_token": {
62
+ "content": "[e~[",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false
67
+ },
68
+ "unk_token": {
69
+ "content": "]!d~[",
70
+ "lstrip": false,
71
+ "normalized": false,
72
+ "rstrip": false,
73
+ "single_word": false
74
+ }
75
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7b90ed7f55d905175bc26771d6d7d33b40b46742f073675bc816fedaf482ea1
3
+ size 15522763
tokenizer_config.json ADDED
@@ -0,0 +1,497 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "200000": {
5
+ "content": "]!p~[",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "200001": {
13
+ "content": "<fim_prefix>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "200002": {
21
+ "content": "<fim_middle>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "200003": {
29
+ "content": "<fim_suffix>",
30
+ "lstrip": false,
31
+ "normalized": false,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "200004": {
37
+ "content": "<fim_pad>",
38
+ "lstrip": false,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ },
44
+ "200005": {
45
+ "content": "<reponame>",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false,
50
+ "special": true
51
+ },
52
+ "200006": {
53
+ "content": "<filename>",
54
+ "lstrip": false,
55
+ "normalized": false,
56
+ "rstrip": false,
57
+ "single_word": false,
58
+ "special": true
59
+ },
60
+ "200007": {
61
+ "content": "<gh_stars>",
62
+ "lstrip": false,
63
+ "normalized": false,
64
+ "rstrip": false,
65
+ "single_word": false,
66
+ "special": true
67
+ },
68
+ "200008": {
69
+ "content": "<issue_start>",
70
+ "lstrip": false,
71
+ "normalized": false,
72
+ "rstrip": false,
73
+ "single_word": false,
74
+ "special": true
75
+ },
76
+ "200009": {
77
+ "content": "<issue_comment>",
78
+ "lstrip": false,
79
+ "normalized": false,
80
+ "rstrip": false,
81
+ "single_word": false,
82
+ "special": true
83
+ },
84
+ "200010": {
85
+ "content": "<issue_closed>",
86
+ "lstrip": false,
87
+ "normalized": false,
88
+ "rstrip": false,
89
+ "single_word": false,
90
+ "special": true
91
+ },
92
+ "200011": {
93
+ "content": "<jupyter_start>",
94
+ "lstrip": false,
95
+ "normalized": false,
96
+ "rstrip": false,
97
+ "single_word": false,
98
+ "special": true
99
+ },
100
+ "200012": {
101
+ "content": "<jupyter_text>",
102
+ "lstrip": false,
103
+ "normalized": false,
104
+ "rstrip": false,
105
+ "single_word": false,
106
+ "special": true
107
+ },
108
+ "200013": {
109
+ "content": "<jupyter_code>",
110
+ "lstrip": false,
111
+ "normalized": false,
112
+ "rstrip": false,
113
+ "single_word": false,
114
+ "special": true
115
+ },
116
+ "200014": {
117
+ "content": "<jupyter_output>",
118
+ "lstrip": false,
119
+ "normalized": false,
120
+ "rstrip": false,
121
+ "single_word": false,
122
+ "special": true
123
+ },
124
+ "200015": {
125
+ "content": "<empty_output>",
126
+ "lstrip": false,
127
+ "normalized": false,
128
+ "rstrip": false,
129
+ "single_word": false,
130
+ "special": true
131
+ },
132
+ "200016": {
133
+ "content": "<commit_before>",
134
+ "lstrip": false,
135
+ "normalized": false,
136
+ "rstrip": false,
137
+ "single_word": false,
138
+ "special": true
139
+ },
140
+ "200017": {
141
+ "content": "<commit_msg>",
142
+ "lstrip": false,
143
+ "normalized": false,
144
+ "rstrip": false,
145
+ "single_word": false,
146
+ "special": true
147
+ },
148
+ "200018": {
149
+ "content": "<commit_after>",
150
+ "lstrip": false,
151
+ "normalized": false,
152
+ "rstrip": false,
153
+ "single_word": false,
154
+ "special": true
155
+ },
156
+ "200019": {
157
+ "content": "]~b]",
158
+ "lstrip": false,
159
+ "normalized": false,
160
+ "rstrip": false,
161
+ "single_word": false,
162
+ "special": true
163
+ },
164
+ "200020": {
165
+ "content": "[e~[",
166
+ "lstrip": false,
167
+ "normalized": false,
168
+ "rstrip": false,
169
+ "single_word": false,
170
+ "special": true
171
+ },
172
+ "200021": {
173
+ "content": "]!d~[",
174
+ "lstrip": false,
175
+ "normalized": false,
176
+ "rstrip": false,
177
+ "single_word": false,
178
+ "special": true
179
+ },
180
+ "200022": {
181
+ "content": "<function_call>",
182
+ "lstrip": false,
183
+ "normalized": false,
184
+ "rstrip": false,
185
+ "single_word": false,
186
+ "special": true
187
+ },
188
+ "200023": {
189
+ "content": "<code_interpreter>",
190
+ "lstrip": false,
191
+ "normalized": false,
192
+ "rstrip": false,
193
+ "single_word": false,
194
+ "special": true
195
+ },
196
+ "200024": {
197
+ "content": "]<]speech[>[",
198
+ "lstrip": false,
199
+ "normalized": false,
200
+ "rstrip": false,
201
+ "single_word": false,
202
+ "special": true
203
+ },
204
+ "200025": {
205
+ "content": "]<]image[>[",
206
+ "lstrip": false,
207
+ "normalized": false,
208
+ "rstrip": false,
209
+ "single_word": false,
210
+ "special": true
211
+ },
212
+ "200026": {
213
+ "content": "]<]video[>[",
214
+ "lstrip": false,
215
+ "normalized": false,
216
+ "rstrip": false,
217
+ "single_word": false,
218
+ "special": true
219
+ },
220
+ "200027": {
221
+ "content": "]<]start of speech[>[",
222
+ "lstrip": false,
223
+ "normalized": false,
224
+ "rstrip": false,
225
+ "single_word": false,
226
+ "special": true
227
+ },
228
+ "200028": {
229
+ "content": "]<]end of speech[>[",
230
+ "lstrip": false,
231
+ "normalized": false,
232
+ "rstrip": false,
233
+ "single_word": false,
234
+ "special": true
235
+ },
236
+ "200029": {
237
+ "content": "]<]start of image[>[",
238
+ "lstrip": false,
239
+ "normalized": false,
240
+ "rstrip": false,
241
+ "single_word": false,
242
+ "special": true
243
+ },
244
+ "200030": {
245
+ "content": "]<]end of image[>[",
246
+ "lstrip": false,
247
+ "normalized": false,
248
+ "rstrip": false,
249
+ "single_word": false,
250
+ "special": true
251
+ },
252
+ "200031": {
253
+ "content": "]<]start of video[>[",
254
+ "lstrip": false,
255
+ "normalized": false,
256
+ "rstrip": false,
257
+ "single_word": false,
258
+ "special": true
259
+ },
260
+ "200032": {
261
+ "content": "]<]end of video[>[",
262
+ "lstrip": false,
263
+ "normalized": false,
264
+ "rstrip": false,
265
+ "single_word": false,
266
+ "special": true
267
+ },
268
+ "200033": {
269
+ "content": "]<]vision pad[>[",
270
+ "lstrip": false,
271
+ "normalized": false,
272
+ "rstrip": false,
273
+ "single_word": false,
274
+ "special": true
275
+ },
276
+ "200034": {
277
+ "content": "]~!b[",
278
+ "lstrip": false,
279
+ "normalized": false,
280
+ "rstrip": false,
281
+ "single_word": false,
282
+ "special": true
283
+ },
284
+ "200035": {
285
+ "content": "<jupyter_error>",
286
+ "lstrip": false,
287
+ "normalized": false,
288
+ "rstrip": false,
289
+ "single_word": false,
290
+ "special": true
291
+ },
292
+ "200036": {
293
+ "content": "<add_file>",
294
+ "lstrip": false,
295
+ "normalized": false,
296
+ "rstrip": false,
297
+ "single_word": false,
298
+ "special": true
299
+ },
300
+ "200037": {
301
+ "content": "<delete_file>",
302
+ "lstrip": false,
303
+ "normalized": false,
304
+ "rstrip": false,
305
+ "single_word": false,
306
+ "special": true
307
+ },
308
+ "200038": {
309
+ "content": "<rename_file>",
310
+ "lstrip": false,
311
+ "normalized": false,
312
+ "rstrip": false,
313
+ "single_word": false,
314
+ "special": true
315
+ },
316
+ "200039": {
317
+ "content": "<edit_file>",
318
+ "lstrip": false,
319
+ "normalized": false,
320
+ "rstrip": false,
321
+ "single_word": false,
322
+ "special": true
323
+ },
324
+ "200040": {
325
+ "content": "<commit_message>",
326
+ "lstrip": false,
327
+ "normalized": false,
328
+ "rstrip": false,
329
+ "single_word": false,
330
+ "special": true
331
+ },
332
+ "200041": {
333
+ "content": "<empty_source_file>",
334
+ "lstrip": false,
335
+ "normalized": false,
336
+ "rstrip": false,
337
+ "single_word": false,
338
+ "special": true
339
+ },
340
+ "200042": {
341
+ "content": "<repo_struct>",
342
+ "lstrip": false,
343
+ "normalized": false,
344
+ "rstrip": false,
345
+ "single_word": false,
346
+ "special": true
347
+ },
348
+ "200043": {
349
+ "content": "<code_context>",
350
+ "lstrip": false,
351
+ "normalized": false,
352
+ "rstrip": false,
353
+ "single_word": false,
354
+ "special": true
355
+ },
356
+ "200044": {
357
+ "content": "<file_content>",
358
+ "lstrip": false,
359
+ "normalized": false,
360
+ "rstrip": false,
361
+ "single_word": false,
362
+ "special": true
363
+ },
364
+ "200045": {
365
+ "content": "<source_files>",
366
+ "lstrip": false,
367
+ "normalized": false,
368
+ "rstrip": false,
369
+ "single_word": false,
370
+ "special": true
371
+ },
372
+ "200046": {
373
+ "content": "<pr_start>",
374
+ "lstrip": false,
375
+ "normalized": false,
376
+ "rstrip": false,
377
+ "single_word": false,
378
+ "special": true
379
+ },
380
+ "200047": {
381
+ "content": "<review_comment>",
382
+ "lstrip": false,
383
+ "normalized": false,
384
+ "rstrip": false,
385
+ "single_word": false,
386
+ "special": true
387
+ },
388
+ "200048": {
389
+ "content": "<filepath>",
390
+ "lstrip": false,
391
+ "normalized": false,
392
+ "rstrip": false,
393
+ "single_word": false,
394
+ "special": true
395
+ },
396
+ "200049": {
397
+ "content": "<file_sep>",
398
+ "lstrip": false,
399
+ "normalized": false,
400
+ "rstrip": false,
401
+ "single_word": false,
402
+ "special": true
403
+ },
404
+ "200050": {
405
+ "content": "<think>",
406
+ "lstrip": false,
407
+ "normalized": false,
408
+ "rstrip": false,
409
+ "single_word": false,
410
+ "special": false
411
+ },
412
+ "200051": {
413
+ "content": "</think>",
414
+ "lstrip": false,
415
+ "normalized": false,
416
+ "rstrip": false,
417
+ "single_word": false,
418
+ "special": false
419
+ },
420
+ "200052": {
421
+ "content": "<minimax:tool_call>",
422
+ "lstrip": false,
423
+ "normalized": false,
424
+ "rstrip": false,
425
+ "single_word": false,
426
+ "special": false
427
+ },
428
+ "200053": {
429
+ "content": "</minimax:tool_call>",
430
+ "lstrip": false,
431
+ "normalized": false,
432
+ "rstrip": false,
433
+ "single_word": false,
434
+ "special": false
435
+ }
436
+ },
437
+ "additional_special_tokens": [
438
+ "<code_interpreter>",
439
+ "<commit_after>",
440
+ "<commit_before>",
441
+ "<commit_msg>",
442
+ "<empty_output>",
443
+ "<filename>",
444
+ "<fim_middle>",
445
+ "<fim_pad>",
446
+ "<fim_prefix>",
447
+ "<fim_suffix>",
448
+ "<function_call>",
449
+ "<gh_stars>",
450
+ "]<]speech[>[",
451
+ "]<]image[>[",
452
+ "]<]video[>[",
453
+ "]<]start of speech[>[",
454
+ "]<]end of speech[>[",
455
+ "]<]start of image[>[",
456
+ "]<]end of image[>[",
457
+ "]<]start of video[>[",
458
+ "]<]end of video[>[",
459
+ "]<]vision pad[>[",
460
+ "]~!b[",
461
+ "<issue_closed>",
462
+ "<issue_comment>",
463
+ "<issue_start>",
464
+ "<jupyter_code>",
465
+ "<jupyter_output>",
466
+ "<jupyter_start>",
467
+ "<jupyter_text>",
468
+ "<reponame>",
469
+ "[e~[",
470
+ "]!d~[",
471
+ "]!p~[",
472
+ "]~b]",
473
+ "<jupyter_error>",
474
+ "<add_file>",
475
+ "<delete_file>",
476
+ "<rename_file>",
477
+ "<edit_file>",
478
+ "<commit_message>",
479
+ "<empty_source_file>",
480
+ "<repo_struct>",
481
+ "<code_context>",
482
+ "<file_content>",
483
+ "<source_files>",
484
+ "<pr_start>",
485
+ "<review_comment>",
486
+ "<filepath>",
487
+ "<file_sep>"
488
+ ],
489
+ "bos_token": "]~!b[",
490
+ "clean_up_tokenization_spaces": false,
491
+ "eos_token": "[e~[",
492
+ "extra_special_tokens": {},
493
+ "model_max_length": 40960000,
494
+ "tokenizer_class": "GPT2Tokenizer",
495
+ "unk_token": "]!d~[",
496
+ "chat_template": "{# ----------\u2011\u2011\u2011 special token variables \u2011\u2011\u2011---------- #}\n{%- set toolcall_begin_token = '<minimax:tool_call>' -%}\n{%- set toolcall_end_token = '</minimax:tool_call>' -%}\n{#- Tool Rendering Functions ============================================== -#}\n{%- macro render_tool_namespace(namespace_name, tool_list) -%}\n{%- for tool in tool_list -%}\n<tool>{{ tool.function | tojson(ensure_ascii=False) }}</tool>\n{% endfor -%}\n{%- endmacro -%}\n{%- macro visible_text(content) -%}\n {%- if content is string -%}\n {{ content }}\n {%- elif content is iterable and content is not mapping -%}\n {%- for item in content -%}\n {%- if item is mapping and item.type == 'text' -%}\n {{- item.text }}\n {%- elif item is string -%}\n {{- item }}\n {%- endif -%}\n {%- endfor -%}\n {%- else -%}\n {{- content }}\n {%- endif -%}\n{%- endmacro -%}\n{#- System Message Construction ============================================ -#}\n{%- macro build_system_message(system_message) -%}\n {%- if system_message and system_message.content -%}\n {{- visible_text(system_message.content) }}\n {%- else -%}\n {%- if model_identity is not defined -%}\n {%- set model_identity = \"You are a helpful assistant.\" -%}\n {%- endif -%}\n {{- model_identity }}\n {%- endif -%}\n \n {#- Handle current_date -#}\n {%- if system_message and system_message.current_date -%}\n {{- '\\n' ~ 'Current date: ' + system_message.current_date }}\n {%- endif -%}\n {#- Handle current_location -#}\n {%- if system_message and system_message.current_location -%}\n {{- '\\n' ~ 'Current location: ' + system_message.current_location }}\n {%- endif -%}\n{%- endmacro -%}\n{#- Main Template Logic ================================================= -#}\n{#- Extract system message (only first message if it's system) -#}\n{%- set system_message = none -%}\n{%- set conversation_messages = messages -%}\n{%- if messages and messages[0].role == \"system\" -%}\n {%- set system_message = messages[0] -%}\n {%- set conversation_messages = messages[1:] -%}\n{%- endif -%}\n{#- Get the last user message turn, for interleved thinking -#}\n{%- set ns = namespace(last_user_index=-1) %}\n{% for m in conversation_messages %}\n {%- if m.role == 'user' %}\n {% set ns.last_user_index = loop.index0 -%}\n {%- endif %}\n{%- endfor %}\n{#- Render system message -#}\n{{- ']~!b[' ~ ']~b]system' ~ '\\n' }}\n{{- build_system_message(system_message) }}\n{#- Render tools if available -#}\n{%- if tools -%}\n {{- '\\n\\n' ~ '# Tools' ~ '\\n' ~ 'You may call one or more tools to assist with the user query.\\nHere are the tools available in JSONSchema format:' ~ '\\n' }}\n {{- '\\n' ~ '<tools>' ~ '\\n' }}\n {{- render_tool_namespace(\"functions\", tools) }}\n {{- '</tools>' ~ '\\n\\n' }}\n{{- 'When making tool calls, use XML format to invoke tools and pass parameters:' ~ '\\n' }}\n{{- '\\n' ~ toolcall_begin_token }}\n<invoke name=\"tool-name-1\">\n<parameter name=\"param-key-1\">param-value-1</parameter>\n<parameter name=\"param-key-2\">param-value-2</parameter>\n...\n</invoke>\n{{- '\\n' ~ toolcall_end_token }}\n{%- endif -%}\n{{- '[e~[\\n' }}\n\n{#- Render messages -#}\n{%- set last_tool_call = namespace(name=none) -%}\n{%- for message in conversation_messages -%}\n {%- if message.role == 'assistant' -%}\n {#- Only render reasoning_content if no user message follows -#}\n {{- ']~b]ai' ~ '\\n' }}\n\n {%- set reasoning_content = '' %}\n {%- set content = visible_text(message.content) %}\n {%- if message.reasoning_content is string %}\n {%- set reasoning_content = message.reasoning_content %}\n {%- else %}\n {%- if '</think>' in content %}\n {%- set reasoning_content = content.split('</think>')[0].strip('\\n').split('<think>')[-1].strip('\\n') %}\n {%- set content = content.split('</think>')[-1].strip('\\n') %}\n {%- endif %}\n {%- endif %}\n {%- if reasoning_content and loop.index0 > ns.last_user_index -%}\n {{- '<think>' ~ '\\n' ~ reasoning_content ~ '\\n' ~ '</think>' ~ '\\n\\n' }}\n {%- endif -%}\n {%- if content -%}\n {{- content }}\n {%- endif -%}\n {%- if message.tool_calls -%}\n {{- '\\n' ~ toolcall_begin_token ~ '\\n' }}\n\n {%- for tool_call in message.tool_calls -%}\n {%- if tool_call.function %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '<invoke name=\"' + tool_call.name + '\">' }}\n {% set _args = tool_call.arguments %}\n {%- for k, v in _args.items() %}\n {{- '<parameter name=\"' + k + '\">' }}\n {{- v | tojson(ensure_ascii=False) if v is not string else v }}\n {{- '</parameter>' }}\n {% endfor %}\n {{- '</invoke>' ~ '\\n' }}\n {%- endfor -%}\n \n {{- toolcall_end_token}}\n {%- set last_tool_call.name = message.tool_calls[-1].name -%}\n {%- else -%}\n {%- set last_tool_call.name = none -%}\n {%- endif -%}\n {{- '[e~[' ~ '\\n' }}\n \n {%- elif message.role == 'tool' -%}\n {%- if last_tool_call.name is none -%}\n {{- raise_exception(\"Message has tool role, but there was no previous assistant message with a tool call!\") }}\n {%- endif -%}\n {%- if loop.first or (conversation_messages[loop.index0 - 1].role != 'tool') -%}\n {{- ']~b]tool' }}\n {%- endif -%}\n {%- if message.content is string -%}\n {{- '\\n<response>' }}\n {{- message.content }}\n {{- '</response>' }}\n {%- else -%}\n {%- for tr in message.content -%}\n {{- '\\n<response>' }}\n {{- tr.output if tr.output is defined else (tr.text if tr.type == 'text' and tr.text is defined else tr) }}\n {{- '\\n</response>' }}\n {%- endfor -%}\n {%- endif -%}\n {%- if loop.last or (conversation_messages[loop.index0 + 1].role != 'tool') -%}\n {{- '[e~[\\n' -}}\n {%- endif -%}\n \n {%- elif message.role == 'user' -%}\n {{- ']~b]user' ~ '\\n' }}\n {{- visible_text(message.content) }}\n {{- '[e~[' ~ '\\n' }}\n {%- endif -%}\n{%- endfor -%}\n\n{#- Generation prompt -#}\n{%- if add_generation_prompt -%}\n{{- ']~b]ai' ~ '\\n' ~ '<think>' ~ '\\n' }}\n{%- endif -%}\n"
497
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff