Add files using upload-large-folder tool
Browse files- .gitattributes +1 -0
- README.md +27 -0
- added_tokens.json +56 -0
- chat_template.jinja +159 -0
- config.json +614 -0
- generation_config.json +7 -0
- merges.txt +0 -0
- model-00001-of-00038.safetensors +3 -0
- model-00002-of-00038.safetensors +3 -0
- model-00003-of-00038.safetensors +3 -0
- model-00004-of-00038.safetensors +3 -0
- model-00005-of-00038.safetensors +3 -0
- model-00006-of-00038.safetensors +3 -0
- model-00007-of-00038.safetensors +3 -0
- model-00008-of-00038.safetensors +3 -0
- model-00009-of-00038.safetensors +3 -0
- model-00010-of-00038.safetensors +3 -0
- model-00011-of-00038.safetensors +3 -0
- model-00012-of-00038.safetensors +3 -0
- model-00013-of-00038.safetensors +3 -0
- model-00014-of-00038.safetensors +3 -0
- model-00015-of-00038.safetensors +3 -0
- model-00016-of-00038.safetensors +3 -0
- model-00017-of-00038.safetensors +3 -0
- model-00018-of-00038.safetensors +3 -0
- model-00019-of-00038.safetensors +3 -0
- model-00020-of-00038.safetensors +3 -0
- model-00021-of-00038.safetensors +3 -0
- model-00022-of-00038.safetensors +3 -0
- model-00023-of-00038.safetensors +3 -0
- model-00024-of-00038.safetensors +3 -0
- model-00025-of-00038.safetensors +3 -0
- model-00026-of-00038.safetensors +3 -0
- model-00027-of-00038.safetensors +3 -0
- model-00028-of-00038.safetensors +3 -0
- model-00029-of-00038.safetensors +3 -0
- model-00030-of-00038.safetensors +3 -0
- model-00031-of-00038.safetensors +3 -0
- model-00032-of-00038.safetensors +3 -0
- model-00033-of-00038.safetensors +3 -0
- model-00034-of-00038.safetensors +3 -0
- model-00035-of-00038.safetensors +3 -0
- model-00036-of-00038.safetensors +3 -0
- model-00037-of-00038.safetensors +3 -0
- model-00038-of-00038.safetensors +3 -0
- model.safetensors.index.json +0 -0
- special_tokens_map.json +75 -0
- tokenizer.json +3 -0
- tokenizer_config.json +497 -0
- vocab.json +0 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
README.md
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
pipeline_tag: text-generation
|
| 3 |
+
license: mit
|
| 4 |
+
library_name: transformers
|
| 5 |
+
tags:
|
| 6 |
+
- mlx
|
| 7 |
+
base_model: MiniMaxAI/MiniMax-M2
|
| 8 |
+
---
|
| 9 |
+
## 💫 Community Model> MiniMax-M2 by MiniMaxAI
|
| 10 |
+
|
| 11 |
+
_👾 [LM Studio](https://lmstudio.ai) Community models highlights program. Highlighting new & noteworthy models by the community. Join the conversation on [Discord](https://discord.gg/aPQfnNkxGC)_.
|
| 12 |
+
|
| 13 |
+
**Model creator**: [MiniMaxAI](https://huggingface.co/MiniMaxAI)<br>
|
| 14 |
+
**Original model**: [MiniMax-M2](https://huggingface.co/MiniMaxAI/MiniMax-M2)<br>
|
| 15 |
+
**MLX quantization**: provided by [LM Studio team](https://x.com/lmstudio) using [mlx_lm](https://github.com/ml-explore/mlx-lm)<br>
|
| 16 |
+
|
| 17 |
+
## Technical Details
|
| 18 |
+
|
| 19 |
+
6-bit quantized version of MiniMax-M2 using MLX, optimized for Apple Silicon.
|
| 20 |
+
|
| 21 |
+
## Special thanks
|
| 22 |
+
|
| 23 |
+
🙏 Special thanks to the [Apple Machine Learning Research](https://github.com/ml-explore) team for creating [MLX](https://github.com/ml-explore/mlx).
|
| 24 |
+
|
| 25 |
+
## Disclaimers
|
| 26 |
+
|
| 27 |
+
LM Studio is not the creator, originator, or owner of any Model featured in the Community Model Program. Each Community Model is created and provided by third parties. LM Studio does not endorse, support, represent or guarantee the completeness, truthfulness, accuracy, or reliability of any Community Model. You understand that Community Models can produce content that might be offensive, harmful, inaccurate or otherwise inappropriate, or deceptive. Each Community Model is the sole responsibility of the person or entity who originated such Model. LM Studio may not monitor or control the Community Models and cannot, and does not, take responsibility for any such Model. LM Studio disclaims all warranties or guarantees about the accuracy, reliability or benefits of the Community Models. LM Studio further disclaims any warranty that the Community Model will meet your requirements, be secure, uninterrupted or available at any time or location, or error-free, viruses-free, or that any errors will be corrected, or otherwise. You will be solely responsible for any damage resulting from your use of or access to the Community Models, your downloading of any Community Model, or use of any other Community Model provided by or through LM Studio.
|
added_tokens.json
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"</minimax:tool_call>": 200053,
|
| 3 |
+
"</think>": 200051,
|
| 4 |
+
"<add_file>": 200036,
|
| 5 |
+
"<code_context>": 200043,
|
| 6 |
+
"<code_interpreter>": 200023,
|
| 7 |
+
"<commit_after>": 200018,
|
| 8 |
+
"<commit_before>": 200016,
|
| 9 |
+
"<commit_message>": 200040,
|
| 10 |
+
"<commit_msg>": 200017,
|
| 11 |
+
"<delete_file>": 200037,
|
| 12 |
+
"<edit_file>": 200039,
|
| 13 |
+
"<empty_output>": 200015,
|
| 14 |
+
"<empty_source_file>": 200041,
|
| 15 |
+
"<file_content>": 200044,
|
| 16 |
+
"<file_sep>": 200049,
|
| 17 |
+
"<filename>": 200006,
|
| 18 |
+
"<filepath>": 200048,
|
| 19 |
+
"<fim_middle>": 200002,
|
| 20 |
+
"<fim_pad>": 200004,
|
| 21 |
+
"<fim_prefix>": 200001,
|
| 22 |
+
"<fim_suffix>": 200003,
|
| 23 |
+
"<function_call>": 200022,
|
| 24 |
+
"<gh_stars>": 200007,
|
| 25 |
+
"<issue_closed>": 200010,
|
| 26 |
+
"<issue_comment>": 200009,
|
| 27 |
+
"<issue_start>": 200008,
|
| 28 |
+
"<jupyter_code>": 200013,
|
| 29 |
+
"<jupyter_error>": 200035,
|
| 30 |
+
"<jupyter_output>": 200014,
|
| 31 |
+
"<jupyter_start>": 200011,
|
| 32 |
+
"<jupyter_text>": 200012,
|
| 33 |
+
"<minimax:tool_call>": 200052,
|
| 34 |
+
"<pr_start>": 200046,
|
| 35 |
+
"<rename_file>": 200038,
|
| 36 |
+
"<repo_struct>": 200042,
|
| 37 |
+
"<reponame>": 200005,
|
| 38 |
+
"<review_comment>": 200047,
|
| 39 |
+
"<source_files>": 200045,
|
| 40 |
+
"<think>": 200050,
|
| 41 |
+
"[e~[": 200020,
|
| 42 |
+
"]!d~[": 200021,
|
| 43 |
+
"]!p~[": 200000,
|
| 44 |
+
"]<]end of image[>[": 200030,
|
| 45 |
+
"]<]end of speech[>[": 200028,
|
| 46 |
+
"]<]end of video[>[": 200032,
|
| 47 |
+
"]<]image[>[": 200025,
|
| 48 |
+
"]<]speech[>[": 200024,
|
| 49 |
+
"]<]start of image[>[": 200029,
|
| 50 |
+
"]<]start of speech[>[": 200027,
|
| 51 |
+
"]<]start of video[>[": 200031,
|
| 52 |
+
"]<]video[>[": 200026,
|
| 53 |
+
"]<]vision pad[>[": 200033,
|
| 54 |
+
"]~!b[": 200034,
|
| 55 |
+
"]~b]": 200019
|
| 56 |
+
}
|
chat_template.jinja
ADDED
|
@@ -0,0 +1,159 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{# ----------‑‑‑ special token variables ‑‑‑---------- #}
|
| 2 |
+
{%- set toolcall_begin_token = '<minimax:tool_call>' -%}
|
| 3 |
+
{%- set toolcall_end_token = '</minimax:tool_call>' -%}
|
| 4 |
+
{#- Tool Rendering Functions ============================================== -#}
|
| 5 |
+
{%- macro render_tool_namespace(namespace_name, tool_list) -%}
|
| 6 |
+
{%- for tool in tool_list -%}
|
| 7 |
+
<tool>{{ tool.function | tojson(ensure_ascii=False) }}</tool>
|
| 8 |
+
{% endfor -%}
|
| 9 |
+
{%- endmacro -%}
|
| 10 |
+
{%- macro visible_text(content) -%}
|
| 11 |
+
{%- if content is string -%}
|
| 12 |
+
{{ content }}
|
| 13 |
+
{%- elif content is iterable and content is not mapping -%}
|
| 14 |
+
{%- for item in content -%}
|
| 15 |
+
{%- if item is mapping and item.type == 'text' -%}
|
| 16 |
+
{{- item.text }}
|
| 17 |
+
{%- elif item is string -%}
|
| 18 |
+
{{- item }}
|
| 19 |
+
{%- endif -%}
|
| 20 |
+
{%- endfor -%}
|
| 21 |
+
{%- else -%}
|
| 22 |
+
{{- content }}
|
| 23 |
+
{%- endif -%}
|
| 24 |
+
{%- endmacro -%}
|
| 25 |
+
{#- System Message Construction ============================================ -#}
|
| 26 |
+
{%- macro build_system_message(system_message) -%}
|
| 27 |
+
{%- if system_message and system_message.content -%}
|
| 28 |
+
{{- visible_text(system_message.content) }}
|
| 29 |
+
{%- else -%}
|
| 30 |
+
{%- if model_identity is not defined -%}
|
| 31 |
+
{%- set model_identity = "You are a helpful assistant." -%}
|
| 32 |
+
{%- endif -%}
|
| 33 |
+
{{- model_identity }}
|
| 34 |
+
{%- endif -%}
|
| 35 |
+
|
| 36 |
+
{#- Handle current_date -#}
|
| 37 |
+
{%- if system_message and system_message.current_date -%}
|
| 38 |
+
{{- '\n' ~ 'Current date: ' + system_message.current_date }}
|
| 39 |
+
{%- endif -%}
|
| 40 |
+
{#- Handle current_location -#}
|
| 41 |
+
{%- if system_message and system_message.current_location -%}
|
| 42 |
+
{{- '\n' ~ 'Current location: ' + system_message.current_location }}
|
| 43 |
+
{%- endif -%}
|
| 44 |
+
{%- endmacro -%}
|
| 45 |
+
{#- Main Template Logic ================================================= -#}
|
| 46 |
+
{#- Extract system message (only first message if it's system) -#}
|
| 47 |
+
{%- set system_message = none -%}
|
| 48 |
+
{%- set conversation_messages = messages -%}
|
| 49 |
+
{%- if messages and messages[0].role == "system" -%}
|
| 50 |
+
{%- set system_message = messages[0] -%}
|
| 51 |
+
{%- set conversation_messages = messages[1:] -%}
|
| 52 |
+
{%- endif -%}
|
| 53 |
+
{#- Get the last user message turn, for interleved thinking -#}
|
| 54 |
+
{%- set ns = namespace(last_user_index=-1) %}
|
| 55 |
+
{% for m in conversation_messages %}
|
| 56 |
+
{%- if m.role == 'user' %}
|
| 57 |
+
{% set ns.last_user_index = loop.index0 -%}
|
| 58 |
+
{%- endif %}
|
| 59 |
+
{%- endfor %}
|
| 60 |
+
{#- Render system message -#}
|
| 61 |
+
{{- ']~!b[' ~ ']~b]system' ~ '\n' }}
|
| 62 |
+
{{- build_system_message(system_message) }}
|
| 63 |
+
{#- Render tools if available -#}
|
| 64 |
+
{%- if tools -%}
|
| 65 |
+
{{- '\n\n' ~ '# Tools' ~ '\n' ~ 'You may call one or more tools to assist with the user query.\nHere are the tools available in JSONSchema format:' ~ '\n' }}
|
| 66 |
+
{{- '\n' ~ '<tools>' ~ '\n' }}
|
| 67 |
+
{{- render_tool_namespace("functions", tools) }}
|
| 68 |
+
{{- '</tools>' ~ '\n\n' }}
|
| 69 |
+
{{- 'When making tool calls, use XML format to invoke tools and pass parameters:' ~ '\n' }}
|
| 70 |
+
{{- '\n' ~ toolcall_begin_token }}
|
| 71 |
+
<invoke name="tool-name-1">
|
| 72 |
+
<parameter name="param-key-1">param-value-1</parameter>
|
| 73 |
+
<parameter name="param-key-2">param-value-2</parameter>
|
| 74 |
+
...
|
| 75 |
+
</invoke>
|
| 76 |
+
{{- '\n' ~ toolcall_end_token }}
|
| 77 |
+
{%- endif -%}
|
| 78 |
+
{{- '[e~[\n' }}
|
| 79 |
+
|
| 80 |
+
{#- Render messages -#}
|
| 81 |
+
{%- set last_tool_call = namespace(name=none) -%}
|
| 82 |
+
{%- for message in conversation_messages -%}
|
| 83 |
+
{%- if message.role == 'assistant' -%}
|
| 84 |
+
{#- Only render reasoning_content if no user message follows -#}
|
| 85 |
+
{{- ']~b]ai' ~ '\n' }}
|
| 86 |
+
|
| 87 |
+
{%- set reasoning_content = '' %}
|
| 88 |
+
{%- set content = visible_text(message.content) %}
|
| 89 |
+
{%- if message.reasoning_content is string %}
|
| 90 |
+
{%- set reasoning_content = message.reasoning_content %}
|
| 91 |
+
{%- else %}
|
| 92 |
+
{%- if '</think>' in content %}
|
| 93 |
+
{%- set reasoning_content = content.split('</think>')[0].strip('\n').split('<think>')[-1].strip('\n') %}
|
| 94 |
+
{%- set content = content.split('</think>')[-1].strip('\n') %}
|
| 95 |
+
{%- endif %}
|
| 96 |
+
{%- endif %}
|
| 97 |
+
{%- if reasoning_content and loop.index0 > ns.last_user_index -%}
|
| 98 |
+
{{- '<think>' ~ '\n' ~ reasoning_content ~ '\n' ~ '</think>' ~ '\n\n' }}
|
| 99 |
+
{%- endif -%}
|
| 100 |
+
{%- if content -%}
|
| 101 |
+
{{- content }}
|
| 102 |
+
{%- endif -%}
|
| 103 |
+
{%- if message.tool_calls -%}
|
| 104 |
+
{{- '\n' ~ toolcall_begin_token ~ '\n' }}
|
| 105 |
+
|
| 106 |
+
{%- for tool_call in message.tool_calls -%}
|
| 107 |
+
{%- if tool_call.function %}
|
| 108 |
+
{%- set tool_call = tool_call.function %}
|
| 109 |
+
{%- endif %}
|
| 110 |
+
{{- '<invoke name="' + tool_call.name + '">' }}
|
| 111 |
+
{% set _args = tool_call.arguments %}
|
| 112 |
+
{%- for k, v in _args.items() %}
|
| 113 |
+
{{- '<parameter name="' + k + '">' }}
|
| 114 |
+
{{- v | tojson(ensure_ascii=False) if v is not string else v }}
|
| 115 |
+
{{- '</parameter>' }}
|
| 116 |
+
{% endfor %}
|
| 117 |
+
{{- '</invoke>' ~ '\n' }}
|
| 118 |
+
{%- endfor -%}
|
| 119 |
+
|
| 120 |
+
{{- toolcall_end_token}}
|
| 121 |
+
{%- set last_tool_call.name = message.tool_calls[-1].name -%}
|
| 122 |
+
{%- else -%}
|
| 123 |
+
{%- set last_tool_call.name = none -%}
|
| 124 |
+
{%- endif -%}
|
| 125 |
+
{{- '[e~[' ~ '\n' }}
|
| 126 |
+
|
| 127 |
+
{%- elif message.role == 'tool' -%}
|
| 128 |
+
{%- if last_tool_call.name is none -%}
|
| 129 |
+
{{- raise_exception("Message has tool role, but there was no previous assistant message with a tool call!") }}
|
| 130 |
+
{%- endif -%}
|
| 131 |
+
{%- if loop.first or (conversation_messages[loop.index0 - 1].role != 'tool') -%}
|
| 132 |
+
{{- ']~b]tool' }}
|
| 133 |
+
{%- endif -%}
|
| 134 |
+
{%- if message.content is string -%}
|
| 135 |
+
{{- '\n<response>' }}
|
| 136 |
+
{{- message.content }}
|
| 137 |
+
{{- '</response>' }}
|
| 138 |
+
{%- else -%}
|
| 139 |
+
{%- for tr in message.content -%}
|
| 140 |
+
{{- '\n<response>' }}
|
| 141 |
+
{{- tr.output if tr.output is defined else (tr.text if tr.type == 'text' and tr.text is defined else tr) }}
|
| 142 |
+
{{- '\n</response>' }}
|
| 143 |
+
{%- endfor -%}
|
| 144 |
+
{%- endif -%}
|
| 145 |
+
{%- if loop.last or (conversation_messages[loop.index0 + 1].role != 'tool') -%}
|
| 146 |
+
{{- '[e~[\n' -}}
|
| 147 |
+
{%- endif -%}
|
| 148 |
+
|
| 149 |
+
{%- elif message.role == 'user' -%}
|
| 150 |
+
{{- ']~b]user' ~ '\n' }}
|
| 151 |
+
{{- visible_text(message.content) }}
|
| 152 |
+
{{- '[e~[' ~ '\n' }}
|
| 153 |
+
{%- endif -%}
|
| 154 |
+
{%- endfor -%}
|
| 155 |
+
|
| 156 |
+
{#- Generation prompt -#}
|
| 157 |
+
{%- if add_generation_prompt -%}
|
| 158 |
+
{{- ']~b]ai' ~ '\n' ~ '<think>' ~ '\n' }}
|
| 159 |
+
{%- endif -%}
|
config.json
ADDED
|
@@ -0,0 +1,614 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"MiniMaxM2ForCausalLM"
|
| 4 |
+
],
|
| 5 |
+
"attention_dropout": 0.0,
|
| 6 |
+
"attn_type_list": [
|
| 7 |
+
1,
|
| 8 |
+
1,
|
| 9 |
+
1,
|
| 10 |
+
1,
|
| 11 |
+
1,
|
| 12 |
+
1,
|
| 13 |
+
1,
|
| 14 |
+
1,
|
| 15 |
+
1,
|
| 16 |
+
1,
|
| 17 |
+
1,
|
| 18 |
+
1,
|
| 19 |
+
1,
|
| 20 |
+
1,
|
| 21 |
+
1,
|
| 22 |
+
1,
|
| 23 |
+
1,
|
| 24 |
+
1,
|
| 25 |
+
1,
|
| 26 |
+
1,
|
| 27 |
+
1,
|
| 28 |
+
1,
|
| 29 |
+
1,
|
| 30 |
+
1,
|
| 31 |
+
1,
|
| 32 |
+
1,
|
| 33 |
+
1,
|
| 34 |
+
1,
|
| 35 |
+
1,
|
| 36 |
+
1,
|
| 37 |
+
1,
|
| 38 |
+
1,
|
| 39 |
+
1,
|
| 40 |
+
1,
|
| 41 |
+
1,
|
| 42 |
+
1,
|
| 43 |
+
1,
|
| 44 |
+
1,
|
| 45 |
+
1,
|
| 46 |
+
1,
|
| 47 |
+
1,
|
| 48 |
+
1,
|
| 49 |
+
1,
|
| 50 |
+
1,
|
| 51 |
+
1,
|
| 52 |
+
1,
|
| 53 |
+
1,
|
| 54 |
+
1,
|
| 55 |
+
1,
|
| 56 |
+
1,
|
| 57 |
+
1,
|
| 58 |
+
1,
|
| 59 |
+
1,
|
| 60 |
+
1,
|
| 61 |
+
1,
|
| 62 |
+
1,
|
| 63 |
+
1,
|
| 64 |
+
1,
|
| 65 |
+
1,
|
| 66 |
+
1,
|
| 67 |
+
1,
|
| 68 |
+
1
|
| 69 |
+
],
|
| 70 |
+
"bos_token_id": null,
|
| 71 |
+
"eos_token_id": null,
|
| 72 |
+
"head_dim": 128,
|
| 73 |
+
"hidden_act": "silu",
|
| 74 |
+
"hidden_size": 3072,
|
| 75 |
+
"initializer_range": 0.02,
|
| 76 |
+
"intermediate_size": 1536,
|
| 77 |
+
"layernorm_full_attention_beta": 1.0,
|
| 78 |
+
"layernorm_linear_attention_beta": 1.0,
|
| 79 |
+
"layernorm_mlp_beta": 1.0,
|
| 80 |
+
"max_position_embeddings": 196608,
|
| 81 |
+
"mlp_intermediate_size": 8192,
|
| 82 |
+
"model_type": "minimax",
|
| 83 |
+
"mtp_transformer_layers": 1,
|
| 84 |
+
"num_attention_heads": 48,
|
| 85 |
+
"num_experts_per_tok": 8,
|
| 86 |
+
"num_hidden_layers": 62,
|
| 87 |
+
"num_key_value_heads": 8,
|
| 88 |
+
"num_local_experts": 256,
|
| 89 |
+
"num_mtp_modules": 3,
|
| 90 |
+
"output_router_logits": false,
|
| 91 |
+
"qk_norm_type": "per_layer",
|
| 92 |
+
"quantization": {
|
| 93 |
+
"group_size": 64,
|
| 94 |
+
"bits": 6,
|
| 95 |
+
"mode": "affine",
|
| 96 |
+
"model.layers.0.block_sparse_moe.gate": {
|
| 97 |
+
"group_size": 64,
|
| 98 |
+
"bits": 8
|
| 99 |
+
},
|
| 100 |
+
"model.layers.1.block_sparse_moe.gate": {
|
| 101 |
+
"group_size": 64,
|
| 102 |
+
"bits": 8
|
| 103 |
+
},
|
| 104 |
+
"model.layers.2.block_sparse_moe.gate": {
|
| 105 |
+
"group_size": 64,
|
| 106 |
+
"bits": 8
|
| 107 |
+
},
|
| 108 |
+
"model.layers.3.block_sparse_moe.gate": {
|
| 109 |
+
"group_size": 64,
|
| 110 |
+
"bits": 8
|
| 111 |
+
},
|
| 112 |
+
"model.layers.4.block_sparse_moe.gate": {
|
| 113 |
+
"group_size": 64,
|
| 114 |
+
"bits": 8
|
| 115 |
+
},
|
| 116 |
+
"model.layers.5.block_sparse_moe.gate": {
|
| 117 |
+
"group_size": 64,
|
| 118 |
+
"bits": 8
|
| 119 |
+
},
|
| 120 |
+
"model.layers.6.block_sparse_moe.gate": {
|
| 121 |
+
"group_size": 64,
|
| 122 |
+
"bits": 8
|
| 123 |
+
},
|
| 124 |
+
"model.layers.7.block_sparse_moe.gate": {
|
| 125 |
+
"group_size": 64,
|
| 126 |
+
"bits": 8
|
| 127 |
+
},
|
| 128 |
+
"model.layers.8.block_sparse_moe.gate": {
|
| 129 |
+
"group_size": 64,
|
| 130 |
+
"bits": 8
|
| 131 |
+
},
|
| 132 |
+
"model.layers.9.block_sparse_moe.gate": {
|
| 133 |
+
"group_size": 64,
|
| 134 |
+
"bits": 8
|
| 135 |
+
},
|
| 136 |
+
"model.layers.10.block_sparse_moe.gate": {
|
| 137 |
+
"group_size": 64,
|
| 138 |
+
"bits": 8
|
| 139 |
+
},
|
| 140 |
+
"model.layers.11.block_sparse_moe.gate": {
|
| 141 |
+
"group_size": 64,
|
| 142 |
+
"bits": 8
|
| 143 |
+
},
|
| 144 |
+
"model.layers.12.block_sparse_moe.gate": {
|
| 145 |
+
"group_size": 64,
|
| 146 |
+
"bits": 8
|
| 147 |
+
},
|
| 148 |
+
"model.layers.13.block_sparse_moe.gate": {
|
| 149 |
+
"group_size": 64,
|
| 150 |
+
"bits": 8
|
| 151 |
+
},
|
| 152 |
+
"model.layers.14.block_sparse_moe.gate": {
|
| 153 |
+
"group_size": 64,
|
| 154 |
+
"bits": 8
|
| 155 |
+
},
|
| 156 |
+
"model.layers.15.block_sparse_moe.gate": {
|
| 157 |
+
"group_size": 64,
|
| 158 |
+
"bits": 8
|
| 159 |
+
},
|
| 160 |
+
"model.layers.16.block_sparse_moe.gate": {
|
| 161 |
+
"group_size": 64,
|
| 162 |
+
"bits": 8
|
| 163 |
+
},
|
| 164 |
+
"model.layers.17.block_sparse_moe.gate": {
|
| 165 |
+
"group_size": 64,
|
| 166 |
+
"bits": 8
|
| 167 |
+
},
|
| 168 |
+
"model.layers.18.block_sparse_moe.gate": {
|
| 169 |
+
"group_size": 64,
|
| 170 |
+
"bits": 8
|
| 171 |
+
},
|
| 172 |
+
"model.layers.19.block_sparse_moe.gate": {
|
| 173 |
+
"group_size": 64,
|
| 174 |
+
"bits": 8
|
| 175 |
+
},
|
| 176 |
+
"model.layers.20.block_sparse_moe.gate": {
|
| 177 |
+
"group_size": 64,
|
| 178 |
+
"bits": 8
|
| 179 |
+
},
|
| 180 |
+
"model.layers.21.block_sparse_moe.gate": {
|
| 181 |
+
"group_size": 64,
|
| 182 |
+
"bits": 8
|
| 183 |
+
},
|
| 184 |
+
"model.layers.22.block_sparse_moe.gate": {
|
| 185 |
+
"group_size": 64,
|
| 186 |
+
"bits": 8
|
| 187 |
+
},
|
| 188 |
+
"model.layers.23.block_sparse_moe.gate": {
|
| 189 |
+
"group_size": 64,
|
| 190 |
+
"bits": 8
|
| 191 |
+
},
|
| 192 |
+
"model.layers.24.block_sparse_moe.gate": {
|
| 193 |
+
"group_size": 64,
|
| 194 |
+
"bits": 8
|
| 195 |
+
},
|
| 196 |
+
"model.layers.25.block_sparse_moe.gate": {
|
| 197 |
+
"group_size": 64,
|
| 198 |
+
"bits": 8
|
| 199 |
+
},
|
| 200 |
+
"model.layers.26.block_sparse_moe.gate": {
|
| 201 |
+
"group_size": 64,
|
| 202 |
+
"bits": 8
|
| 203 |
+
},
|
| 204 |
+
"model.layers.27.block_sparse_moe.gate": {
|
| 205 |
+
"group_size": 64,
|
| 206 |
+
"bits": 8
|
| 207 |
+
},
|
| 208 |
+
"model.layers.28.block_sparse_moe.gate": {
|
| 209 |
+
"group_size": 64,
|
| 210 |
+
"bits": 8
|
| 211 |
+
},
|
| 212 |
+
"model.layers.29.block_sparse_moe.gate": {
|
| 213 |
+
"group_size": 64,
|
| 214 |
+
"bits": 8
|
| 215 |
+
},
|
| 216 |
+
"model.layers.30.block_sparse_moe.gate": {
|
| 217 |
+
"group_size": 64,
|
| 218 |
+
"bits": 8
|
| 219 |
+
},
|
| 220 |
+
"model.layers.31.block_sparse_moe.gate": {
|
| 221 |
+
"group_size": 64,
|
| 222 |
+
"bits": 8
|
| 223 |
+
},
|
| 224 |
+
"model.layers.32.block_sparse_moe.gate": {
|
| 225 |
+
"group_size": 64,
|
| 226 |
+
"bits": 8
|
| 227 |
+
},
|
| 228 |
+
"model.layers.33.block_sparse_moe.gate": {
|
| 229 |
+
"group_size": 64,
|
| 230 |
+
"bits": 8
|
| 231 |
+
},
|
| 232 |
+
"model.layers.34.block_sparse_moe.gate": {
|
| 233 |
+
"group_size": 64,
|
| 234 |
+
"bits": 8
|
| 235 |
+
},
|
| 236 |
+
"model.layers.35.block_sparse_moe.gate": {
|
| 237 |
+
"group_size": 64,
|
| 238 |
+
"bits": 8
|
| 239 |
+
},
|
| 240 |
+
"model.layers.36.block_sparse_moe.gate": {
|
| 241 |
+
"group_size": 64,
|
| 242 |
+
"bits": 8
|
| 243 |
+
},
|
| 244 |
+
"model.layers.37.block_sparse_moe.gate": {
|
| 245 |
+
"group_size": 64,
|
| 246 |
+
"bits": 8
|
| 247 |
+
},
|
| 248 |
+
"model.layers.38.block_sparse_moe.gate": {
|
| 249 |
+
"group_size": 64,
|
| 250 |
+
"bits": 8
|
| 251 |
+
},
|
| 252 |
+
"model.layers.39.block_sparse_moe.gate": {
|
| 253 |
+
"group_size": 64,
|
| 254 |
+
"bits": 8
|
| 255 |
+
},
|
| 256 |
+
"model.layers.40.block_sparse_moe.gate": {
|
| 257 |
+
"group_size": 64,
|
| 258 |
+
"bits": 8
|
| 259 |
+
},
|
| 260 |
+
"model.layers.41.block_sparse_moe.gate": {
|
| 261 |
+
"group_size": 64,
|
| 262 |
+
"bits": 8
|
| 263 |
+
},
|
| 264 |
+
"model.layers.42.block_sparse_moe.gate": {
|
| 265 |
+
"group_size": 64,
|
| 266 |
+
"bits": 8
|
| 267 |
+
},
|
| 268 |
+
"model.layers.43.block_sparse_moe.gate": {
|
| 269 |
+
"group_size": 64,
|
| 270 |
+
"bits": 8
|
| 271 |
+
},
|
| 272 |
+
"model.layers.44.block_sparse_moe.gate": {
|
| 273 |
+
"group_size": 64,
|
| 274 |
+
"bits": 8
|
| 275 |
+
},
|
| 276 |
+
"model.layers.45.block_sparse_moe.gate": {
|
| 277 |
+
"group_size": 64,
|
| 278 |
+
"bits": 8
|
| 279 |
+
},
|
| 280 |
+
"model.layers.46.block_sparse_moe.gate": {
|
| 281 |
+
"group_size": 64,
|
| 282 |
+
"bits": 8
|
| 283 |
+
},
|
| 284 |
+
"model.layers.47.block_sparse_moe.gate": {
|
| 285 |
+
"group_size": 64,
|
| 286 |
+
"bits": 8
|
| 287 |
+
},
|
| 288 |
+
"model.layers.48.block_sparse_moe.gate": {
|
| 289 |
+
"group_size": 64,
|
| 290 |
+
"bits": 8
|
| 291 |
+
},
|
| 292 |
+
"model.layers.49.block_sparse_moe.gate": {
|
| 293 |
+
"group_size": 64,
|
| 294 |
+
"bits": 8
|
| 295 |
+
},
|
| 296 |
+
"model.layers.50.block_sparse_moe.gate": {
|
| 297 |
+
"group_size": 64,
|
| 298 |
+
"bits": 8
|
| 299 |
+
},
|
| 300 |
+
"model.layers.51.block_sparse_moe.gate": {
|
| 301 |
+
"group_size": 64,
|
| 302 |
+
"bits": 8
|
| 303 |
+
},
|
| 304 |
+
"model.layers.52.block_sparse_moe.gate": {
|
| 305 |
+
"group_size": 64,
|
| 306 |
+
"bits": 8
|
| 307 |
+
},
|
| 308 |
+
"model.layers.53.block_sparse_moe.gate": {
|
| 309 |
+
"group_size": 64,
|
| 310 |
+
"bits": 8
|
| 311 |
+
},
|
| 312 |
+
"model.layers.54.block_sparse_moe.gate": {
|
| 313 |
+
"group_size": 64,
|
| 314 |
+
"bits": 8
|
| 315 |
+
},
|
| 316 |
+
"model.layers.55.block_sparse_moe.gate": {
|
| 317 |
+
"group_size": 64,
|
| 318 |
+
"bits": 8
|
| 319 |
+
},
|
| 320 |
+
"model.layers.56.block_sparse_moe.gate": {
|
| 321 |
+
"group_size": 64,
|
| 322 |
+
"bits": 8
|
| 323 |
+
},
|
| 324 |
+
"model.layers.57.block_sparse_moe.gate": {
|
| 325 |
+
"group_size": 64,
|
| 326 |
+
"bits": 8
|
| 327 |
+
},
|
| 328 |
+
"model.layers.58.block_sparse_moe.gate": {
|
| 329 |
+
"group_size": 64,
|
| 330 |
+
"bits": 8
|
| 331 |
+
},
|
| 332 |
+
"model.layers.59.block_sparse_moe.gate": {
|
| 333 |
+
"group_size": 64,
|
| 334 |
+
"bits": 8
|
| 335 |
+
},
|
| 336 |
+
"model.layers.60.block_sparse_moe.gate": {
|
| 337 |
+
"group_size": 64,
|
| 338 |
+
"bits": 8
|
| 339 |
+
},
|
| 340 |
+
"model.layers.61.block_sparse_moe.gate": {
|
| 341 |
+
"group_size": 64,
|
| 342 |
+
"bits": 8
|
| 343 |
+
}
|
| 344 |
+
},
|
| 345 |
+
"quantization_config": {
|
| 346 |
+
"group_size": 64,
|
| 347 |
+
"bits": 6,
|
| 348 |
+
"mode": "affine",
|
| 349 |
+
"model.layers.0.block_sparse_moe.gate": {
|
| 350 |
+
"group_size": 64,
|
| 351 |
+
"bits": 8
|
| 352 |
+
},
|
| 353 |
+
"model.layers.1.block_sparse_moe.gate": {
|
| 354 |
+
"group_size": 64,
|
| 355 |
+
"bits": 8
|
| 356 |
+
},
|
| 357 |
+
"model.layers.2.block_sparse_moe.gate": {
|
| 358 |
+
"group_size": 64,
|
| 359 |
+
"bits": 8
|
| 360 |
+
},
|
| 361 |
+
"model.layers.3.block_sparse_moe.gate": {
|
| 362 |
+
"group_size": 64,
|
| 363 |
+
"bits": 8
|
| 364 |
+
},
|
| 365 |
+
"model.layers.4.block_sparse_moe.gate": {
|
| 366 |
+
"group_size": 64,
|
| 367 |
+
"bits": 8
|
| 368 |
+
},
|
| 369 |
+
"model.layers.5.block_sparse_moe.gate": {
|
| 370 |
+
"group_size": 64,
|
| 371 |
+
"bits": 8
|
| 372 |
+
},
|
| 373 |
+
"model.layers.6.block_sparse_moe.gate": {
|
| 374 |
+
"group_size": 64,
|
| 375 |
+
"bits": 8
|
| 376 |
+
},
|
| 377 |
+
"model.layers.7.block_sparse_moe.gate": {
|
| 378 |
+
"group_size": 64,
|
| 379 |
+
"bits": 8
|
| 380 |
+
},
|
| 381 |
+
"model.layers.8.block_sparse_moe.gate": {
|
| 382 |
+
"group_size": 64,
|
| 383 |
+
"bits": 8
|
| 384 |
+
},
|
| 385 |
+
"model.layers.9.block_sparse_moe.gate": {
|
| 386 |
+
"group_size": 64,
|
| 387 |
+
"bits": 8
|
| 388 |
+
},
|
| 389 |
+
"model.layers.10.block_sparse_moe.gate": {
|
| 390 |
+
"group_size": 64,
|
| 391 |
+
"bits": 8
|
| 392 |
+
},
|
| 393 |
+
"model.layers.11.block_sparse_moe.gate": {
|
| 394 |
+
"group_size": 64,
|
| 395 |
+
"bits": 8
|
| 396 |
+
},
|
| 397 |
+
"model.layers.12.block_sparse_moe.gate": {
|
| 398 |
+
"group_size": 64,
|
| 399 |
+
"bits": 8
|
| 400 |
+
},
|
| 401 |
+
"model.layers.13.block_sparse_moe.gate": {
|
| 402 |
+
"group_size": 64,
|
| 403 |
+
"bits": 8
|
| 404 |
+
},
|
| 405 |
+
"model.layers.14.block_sparse_moe.gate": {
|
| 406 |
+
"group_size": 64,
|
| 407 |
+
"bits": 8
|
| 408 |
+
},
|
| 409 |
+
"model.layers.15.block_sparse_moe.gate": {
|
| 410 |
+
"group_size": 64,
|
| 411 |
+
"bits": 8
|
| 412 |
+
},
|
| 413 |
+
"model.layers.16.block_sparse_moe.gate": {
|
| 414 |
+
"group_size": 64,
|
| 415 |
+
"bits": 8
|
| 416 |
+
},
|
| 417 |
+
"model.layers.17.block_sparse_moe.gate": {
|
| 418 |
+
"group_size": 64,
|
| 419 |
+
"bits": 8
|
| 420 |
+
},
|
| 421 |
+
"model.layers.18.block_sparse_moe.gate": {
|
| 422 |
+
"group_size": 64,
|
| 423 |
+
"bits": 8
|
| 424 |
+
},
|
| 425 |
+
"model.layers.19.block_sparse_moe.gate": {
|
| 426 |
+
"group_size": 64,
|
| 427 |
+
"bits": 8
|
| 428 |
+
},
|
| 429 |
+
"model.layers.20.block_sparse_moe.gate": {
|
| 430 |
+
"group_size": 64,
|
| 431 |
+
"bits": 8
|
| 432 |
+
},
|
| 433 |
+
"model.layers.21.block_sparse_moe.gate": {
|
| 434 |
+
"group_size": 64,
|
| 435 |
+
"bits": 8
|
| 436 |
+
},
|
| 437 |
+
"model.layers.22.block_sparse_moe.gate": {
|
| 438 |
+
"group_size": 64,
|
| 439 |
+
"bits": 8
|
| 440 |
+
},
|
| 441 |
+
"model.layers.23.block_sparse_moe.gate": {
|
| 442 |
+
"group_size": 64,
|
| 443 |
+
"bits": 8
|
| 444 |
+
},
|
| 445 |
+
"model.layers.24.block_sparse_moe.gate": {
|
| 446 |
+
"group_size": 64,
|
| 447 |
+
"bits": 8
|
| 448 |
+
},
|
| 449 |
+
"model.layers.25.block_sparse_moe.gate": {
|
| 450 |
+
"group_size": 64,
|
| 451 |
+
"bits": 8
|
| 452 |
+
},
|
| 453 |
+
"model.layers.26.block_sparse_moe.gate": {
|
| 454 |
+
"group_size": 64,
|
| 455 |
+
"bits": 8
|
| 456 |
+
},
|
| 457 |
+
"model.layers.27.block_sparse_moe.gate": {
|
| 458 |
+
"group_size": 64,
|
| 459 |
+
"bits": 8
|
| 460 |
+
},
|
| 461 |
+
"model.layers.28.block_sparse_moe.gate": {
|
| 462 |
+
"group_size": 64,
|
| 463 |
+
"bits": 8
|
| 464 |
+
},
|
| 465 |
+
"model.layers.29.block_sparse_moe.gate": {
|
| 466 |
+
"group_size": 64,
|
| 467 |
+
"bits": 8
|
| 468 |
+
},
|
| 469 |
+
"model.layers.30.block_sparse_moe.gate": {
|
| 470 |
+
"group_size": 64,
|
| 471 |
+
"bits": 8
|
| 472 |
+
},
|
| 473 |
+
"model.layers.31.block_sparse_moe.gate": {
|
| 474 |
+
"group_size": 64,
|
| 475 |
+
"bits": 8
|
| 476 |
+
},
|
| 477 |
+
"model.layers.32.block_sparse_moe.gate": {
|
| 478 |
+
"group_size": 64,
|
| 479 |
+
"bits": 8
|
| 480 |
+
},
|
| 481 |
+
"model.layers.33.block_sparse_moe.gate": {
|
| 482 |
+
"group_size": 64,
|
| 483 |
+
"bits": 8
|
| 484 |
+
},
|
| 485 |
+
"model.layers.34.block_sparse_moe.gate": {
|
| 486 |
+
"group_size": 64,
|
| 487 |
+
"bits": 8
|
| 488 |
+
},
|
| 489 |
+
"model.layers.35.block_sparse_moe.gate": {
|
| 490 |
+
"group_size": 64,
|
| 491 |
+
"bits": 8
|
| 492 |
+
},
|
| 493 |
+
"model.layers.36.block_sparse_moe.gate": {
|
| 494 |
+
"group_size": 64,
|
| 495 |
+
"bits": 8
|
| 496 |
+
},
|
| 497 |
+
"model.layers.37.block_sparse_moe.gate": {
|
| 498 |
+
"group_size": 64,
|
| 499 |
+
"bits": 8
|
| 500 |
+
},
|
| 501 |
+
"model.layers.38.block_sparse_moe.gate": {
|
| 502 |
+
"group_size": 64,
|
| 503 |
+
"bits": 8
|
| 504 |
+
},
|
| 505 |
+
"model.layers.39.block_sparse_moe.gate": {
|
| 506 |
+
"group_size": 64,
|
| 507 |
+
"bits": 8
|
| 508 |
+
},
|
| 509 |
+
"model.layers.40.block_sparse_moe.gate": {
|
| 510 |
+
"group_size": 64,
|
| 511 |
+
"bits": 8
|
| 512 |
+
},
|
| 513 |
+
"model.layers.41.block_sparse_moe.gate": {
|
| 514 |
+
"group_size": 64,
|
| 515 |
+
"bits": 8
|
| 516 |
+
},
|
| 517 |
+
"model.layers.42.block_sparse_moe.gate": {
|
| 518 |
+
"group_size": 64,
|
| 519 |
+
"bits": 8
|
| 520 |
+
},
|
| 521 |
+
"model.layers.43.block_sparse_moe.gate": {
|
| 522 |
+
"group_size": 64,
|
| 523 |
+
"bits": 8
|
| 524 |
+
},
|
| 525 |
+
"model.layers.44.block_sparse_moe.gate": {
|
| 526 |
+
"group_size": 64,
|
| 527 |
+
"bits": 8
|
| 528 |
+
},
|
| 529 |
+
"model.layers.45.block_sparse_moe.gate": {
|
| 530 |
+
"group_size": 64,
|
| 531 |
+
"bits": 8
|
| 532 |
+
},
|
| 533 |
+
"model.layers.46.block_sparse_moe.gate": {
|
| 534 |
+
"group_size": 64,
|
| 535 |
+
"bits": 8
|
| 536 |
+
},
|
| 537 |
+
"model.layers.47.block_sparse_moe.gate": {
|
| 538 |
+
"group_size": 64,
|
| 539 |
+
"bits": 8
|
| 540 |
+
},
|
| 541 |
+
"model.layers.48.block_sparse_moe.gate": {
|
| 542 |
+
"group_size": 64,
|
| 543 |
+
"bits": 8
|
| 544 |
+
},
|
| 545 |
+
"model.layers.49.block_sparse_moe.gate": {
|
| 546 |
+
"group_size": 64,
|
| 547 |
+
"bits": 8
|
| 548 |
+
},
|
| 549 |
+
"model.layers.50.block_sparse_moe.gate": {
|
| 550 |
+
"group_size": 64,
|
| 551 |
+
"bits": 8
|
| 552 |
+
},
|
| 553 |
+
"model.layers.51.block_sparse_moe.gate": {
|
| 554 |
+
"group_size": 64,
|
| 555 |
+
"bits": 8
|
| 556 |
+
},
|
| 557 |
+
"model.layers.52.block_sparse_moe.gate": {
|
| 558 |
+
"group_size": 64,
|
| 559 |
+
"bits": 8
|
| 560 |
+
},
|
| 561 |
+
"model.layers.53.block_sparse_moe.gate": {
|
| 562 |
+
"group_size": 64,
|
| 563 |
+
"bits": 8
|
| 564 |
+
},
|
| 565 |
+
"model.layers.54.block_sparse_moe.gate": {
|
| 566 |
+
"group_size": 64,
|
| 567 |
+
"bits": 8
|
| 568 |
+
},
|
| 569 |
+
"model.layers.55.block_sparse_moe.gate": {
|
| 570 |
+
"group_size": 64,
|
| 571 |
+
"bits": 8
|
| 572 |
+
},
|
| 573 |
+
"model.layers.56.block_sparse_moe.gate": {
|
| 574 |
+
"group_size": 64,
|
| 575 |
+
"bits": 8
|
| 576 |
+
},
|
| 577 |
+
"model.layers.57.block_sparse_moe.gate": {
|
| 578 |
+
"group_size": 64,
|
| 579 |
+
"bits": 8
|
| 580 |
+
},
|
| 581 |
+
"model.layers.58.block_sparse_moe.gate": {
|
| 582 |
+
"group_size": 64,
|
| 583 |
+
"bits": 8
|
| 584 |
+
},
|
| 585 |
+
"model.layers.59.block_sparse_moe.gate": {
|
| 586 |
+
"group_size": 64,
|
| 587 |
+
"bits": 8
|
| 588 |
+
},
|
| 589 |
+
"model.layers.60.block_sparse_moe.gate": {
|
| 590 |
+
"group_size": 64,
|
| 591 |
+
"bits": 8
|
| 592 |
+
},
|
| 593 |
+
"model.layers.61.block_sparse_moe.gate": {
|
| 594 |
+
"group_size": 64,
|
| 595 |
+
"bits": 8
|
| 596 |
+
}
|
| 597 |
+
},
|
| 598 |
+
"rms_norm_eps": 1e-06,
|
| 599 |
+
"rope_theta": 5000000,
|
| 600 |
+
"rotary_dim": 64,
|
| 601 |
+
"router_aux_loss_coef": 0.001,
|
| 602 |
+
"router_jitter_noise": 0.0,
|
| 603 |
+
"scoring_func": "sigmoid",
|
| 604 |
+
"shared_intermediate_size": 0,
|
| 605 |
+
"shared_moe_mode": "sigmoid",
|
| 606 |
+
"sliding_window": null,
|
| 607 |
+
"tie_word_embeddings": false,
|
| 608 |
+
"transformers_version": "4.46.1",
|
| 609 |
+
"use_cache": true,
|
| 610 |
+
"use_mtp": true,
|
| 611 |
+
"use_qk_norm": true,
|
| 612 |
+
"use_routing_bias": true,
|
| 613 |
+
"vocab_size": 200064
|
| 614 |
+
}
|
generation_config.json
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"do_sample": true,
|
| 3 |
+
"temperature": 1.0,
|
| 4 |
+
"top_p": 0.95,
|
| 5 |
+
"top_k": 40,
|
| 6 |
+
"transformers_version": "4.46.1"
|
| 7 |
+
}
|
merges.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
model-00001-of-00038.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d132f3f45b669b687df185c1f8664a255eb3217fb4de74686d4746e040f6099b
|
| 3 |
+
size 4498611286
|
model-00002-of-00038.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:585ec5cc3024d5425b87488e732005fb15a8a3f120a8b8eb872ae57c08493f68
|
| 3 |
+
size 4980732462
|
model-00003-of-00038.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cb9e7528ea5afdd712ce860d7554c0da539924c02c7d74e4aab507bb3966b04d
|
| 3 |
+
size 4944035148
|
model-00004-of-00038.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ecf4dc0cc50a5a71a29fdb329337c060cb72ccaeb6325e7120f9df0fc54e6285
|
| 3 |
+
size 4980732494
|
model-00005-of-00038.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:14e13e2a9fb42ee57d0b8286a7d82b4366bcc066051a9d76cc06b898e7733c52
|
| 3 |
+
size 4980732440
|
model-00006-of-00038.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9cb379ce0497a3a9482b4934d5a52fe66915d8a813ee0685f49a233d033619fe
|
| 3 |
+
size 4944035148
|
model-00007-of-00038.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0acc3bc7748ca4d912addf995084328920d9337201e246a4cd7548099bfab619
|
| 3 |
+
size 4980732543
|
model-00008-of-00038.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:76b15447a277ced959c9fce219e66e4dc66f3756ecac458d80f88f6548ca0386
|
| 3 |
+
size 4980732511
|
model-00009-of-00038.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:492fbe4e41d20d0ec22edff0b89257606cab8f16b7b42ab42484565906cfc661
|
| 3 |
+
size 4944035183
|
model-00010-of-00038.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d5580b98c2866f3b9891e564c103a0ef6618cd12d07d481400b2f8836b05f058
|
| 3 |
+
size 4980732533
|
model-00011-of-00038.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3e86ccd87baf3791c93a79098ea6d263a543ca900d66f300ade171a824c91b2c
|
| 3 |
+
size 4980732501
|
model-00012-of-00038.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:392f5dc656c7e882d72f926b13f721f2228ed99c945461de1d45f053933918c8
|
| 3 |
+
size 4944035153
|
model-00013-of-00038.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:65065e81ac815d0ec69b70f3073b18d84423db0dc8b651e0790d73cc90e64c3c
|
| 3 |
+
size 4980732517
|
model-00014-of-00038.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c5af2c5ae5c040a4608de192cf524d728f9019c81bbe206cb53bfbb93431fc24
|
| 3 |
+
size 4980732513
|
model-00015-of-00038.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e3e42e69ab3c4890b5a4aabc61460f2611d3a8836b6b71a0f687df2a786ba956
|
| 3 |
+
size 4944035183
|
model-00016-of-00038.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3b392c6a3390007463cb5b62a324eca66431e62ed6b05ffe57ba50c559091bb9
|
| 3 |
+
size 4980732545
|
model-00017-of-00038.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:84f5f9e80a5ed182849269fef314defe45340924fdcf355fd86da819150fda68
|
| 3 |
+
size 4980732511
|
model-00018-of-00038.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:92922013ea374800ecb6760e29b14967ebeb662a94e966418bb0f2b774442f34
|
| 3 |
+
size 4944035183
|
model-00019-of-00038.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a3979a295f28ccd2295cf95103f342331d46e7f8f453b5165411506f0e2c7a14
|
| 3 |
+
size 4980732515
|
model-00020-of-00038.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8a9d4959fe9f5c262df41cee3d05f32e3bb87f63612ebd63bd8536774d63739c
|
| 3 |
+
size 4980732513
|
model-00021-of-00038.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b16143b39042d3ec0d1370c2f0d16620305a792d879b11e706ac16f591d6b939
|
| 3 |
+
size 4944035175
|
model-00022-of-00038.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a0b3cc63114f085e4da179006306287ba559265958029072eb7da1288e3ca1fb
|
| 3 |
+
size 4980732545
|
model-00023-of-00038.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:51fae3d83a6955b6906e2474229f5c524ba0cf45bfa43190d5deaf6365347a5f
|
| 3 |
+
size 4980732493
|
model-00024-of-00038.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4ea57b899408f51f6965885cc64d03a4377c84da84a90d8717a38decec37cac9
|
| 3 |
+
size 4944035173
|
model-00025-of-00038.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:922e77ac5ec7eb71eb31c0608af1550d247cdaa264c991ea179068f2eda8b92c
|
| 3 |
+
size 4980732553
|
model-00026-of-00038.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f0995a21a08bc677c1c7faf753f42d5dd073636acec247efa69111a5eae1b500
|
| 3 |
+
size 4980732497
|
model-00027-of-00038.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1ffaa3c59793a5413f8be7b0301b067765d8ef2baada49142662925dc4f8bbdb
|
| 3 |
+
size 4944035181
|
model-00028-of-00038.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c60f4cecc0b39e7e059d9bafecb739d3210182b305f72ad804bf6388c6aae539
|
| 3 |
+
size 4980732561
|
model-00029-of-00038.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4b83da9e839a3835fa638ae1ce14d0c19fb837b26d7ca94f597665ea06b5c5ac
|
| 3 |
+
size 4980732505
|
model-00030-of-00038.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ca8930769abfcbba15c03e6bd1c07dda302722cbef9ea9b2a5ae228a9512bac1
|
| 3 |
+
size 4944035169
|
model-00031-of-00038.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9767c1c00f0ac795614d3eb243e1043ef8d98df070b6f4deec793974169a09d3
|
| 3 |
+
size 4980732535
|
model-00032-of-00038.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9287bd5f61a6e042f38d08c6193248779b6733d36b2194e1491c6d211d08419c
|
| 3 |
+
size 4980732489
|
model-00033-of-00038.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2e201fa92ae674c15c9b8a4e73d9ad54b992b8fe30d2e5484b9b9d33b8cf4bb3
|
| 3 |
+
size 4944035187
|
model-00034-of-00038.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:69143bfc34eb6ab0d26cd3310af1b19ef8c6cf8549d755776984ad317d193b11
|
| 3 |
+
size 4980732551
|
model-00035-of-00038.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1aa34c4777e93e6f833604b8055de4ce3ba2667f8bae7588de30befbf4b318d1
|
| 3 |
+
size 4980732489
|
model-00036-of-00038.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4e9373ed9e2e4593ef0d7b4deb1291fa0f6757e76639b7f42b13b0d447696da4
|
| 3 |
+
size 4944035185
|
model-00037-of-00038.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f8fb67536eafcf5dc2b7575fe0254fdea23f6e949b811cc47603ce0a938e56e9
|
| 3 |
+
size 4980732521
|
model-00038-of-00038.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e7480f64d9d77a0ca39f43f571dbeccb2e1a70be0868d082fc93724f5a6c9e0c
|
| 3 |
+
size 2462315047
|
model.safetensors.index.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
special_tokens_map.json
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"additional_special_tokens": [
|
| 3 |
+
"<code_interpreter>",
|
| 4 |
+
"<commit_after>",
|
| 5 |
+
"<commit_before>",
|
| 6 |
+
"<commit_msg>",
|
| 7 |
+
"<empty_output>",
|
| 8 |
+
"<filename>",
|
| 9 |
+
"<fim_middle>",
|
| 10 |
+
"<fim_pad>",
|
| 11 |
+
"<fim_prefix>",
|
| 12 |
+
"<fim_suffix>",
|
| 13 |
+
"<function_call>",
|
| 14 |
+
"<gh_stars>",
|
| 15 |
+
"]<]speech[>[",
|
| 16 |
+
"]<]image[>[",
|
| 17 |
+
"]<]video[>[",
|
| 18 |
+
"]<]start of speech[>[",
|
| 19 |
+
"]<]end of speech[>[",
|
| 20 |
+
"]<]start of image[>[",
|
| 21 |
+
"]<]end of image[>[",
|
| 22 |
+
"]<]start of video[>[",
|
| 23 |
+
"]<]end of video[>[",
|
| 24 |
+
"]<]vision pad[>[",
|
| 25 |
+
"]~!b[",
|
| 26 |
+
"<issue_closed>",
|
| 27 |
+
"<issue_comment>",
|
| 28 |
+
"<issue_start>",
|
| 29 |
+
"<jupyter_code>",
|
| 30 |
+
"<jupyter_output>",
|
| 31 |
+
"<jupyter_start>",
|
| 32 |
+
"<jupyter_text>",
|
| 33 |
+
"<reponame>",
|
| 34 |
+
"[e~[",
|
| 35 |
+
"]!d~[",
|
| 36 |
+
"]!p~[",
|
| 37 |
+
"]~b]",
|
| 38 |
+
"<jupyter_error>",
|
| 39 |
+
"<add_file>",
|
| 40 |
+
"<delete_file>",
|
| 41 |
+
"<rename_file>",
|
| 42 |
+
"<edit_file>",
|
| 43 |
+
"<commit_message>",
|
| 44 |
+
"<empty_source_file>",
|
| 45 |
+
"<repo_struct>",
|
| 46 |
+
"<code_context>",
|
| 47 |
+
"<file_content>",
|
| 48 |
+
"<source_files>",
|
| 49 |
+
"<pr_start>",
|
| 50 |
+
"<review_comment>",
|
| 51 |
+
"<filepath>",
|
| 52 |
+
"<file_sep>"
|
| 53 |
+
],
|
| 54 |
+
"bos_token": {
|
| 55 |
+
"content": "]~!b[",
|
| 56 |
+
"lstrip": false,
|
| 57 |
+
"normalized": false,
|
| 58 |
+
"rstrip": false,
|
| 59 |
+
"single_word": false
|
| 60 |
+
},
|
| 61 |
+
"eos_token": {
|
| 62 |
+
"content": "[e~[",
|
| 63 |
+
"lstrip": false,
|
| 64 |
+
"normalized": false,
|
| 65 |
+
"rstrip": false,
|
| 66 |
+
"single_word": false
|
| 67 |
+
},
|
| 68 |
+
"unk_token": {
|
| 69 |
+
"content": "]!d~[",
|
| 70 |
+
"lstrip": false,
|
| 71 |
+
"normalized": false,
|
| 72 |
+
"rstrip": false,
|
| 73 |
+
"single_word": false
|
| 74 |
+
}
|
| 75 |
+
}
|
tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e7b90ed7f55d905175bc26771d6d7d33b40b46742f073675bc816fedaf482ea1
|
| 3 |
+
size 15522763
|
tokenizer_config.json
ADDED
|
@@ -0,0 +1,497 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_prefix_space": false,
|
| 3 |
+
"added_tokens_decoder": {
|
| 4 |
+
"200000": {
|
| 5 |
+
"content": "]!p~[",
|
| 6 |
+
"lstrip": false,
|
| 7 |
+
"normalized": false,
|
| 8 |
+
"rstrip": false,
|
| 9 |
+
"single_word": false,
|
| 10 |
+
"special": true
|
| 11 |
+
},
|
| 12 |
+
"200001": {
|
| 13 |
+
"content": "<fim_prefix>",
|
| 14 |
+
"lstrip": false,
|
| 15 |
+
"normalized": false,
|
| 16 |
+
"rstrip": false,
|
| 17 |
+
"single_word": false,
|
| 18 |
+
"special": true
|
| 19 |
+
},
|
| 20 |
+
"200002": {
|
| 21 |
+
"content": "<fim_middle>",
|
| 22 |
+
"lstrip": false,
|
| 23 |
+
"normalized": false,
|
| 24 |
+
"rstrip": false,
|
| 25 |
+
"single_word": false,
|
| 26 |
+
"special": true
|
| 27 |
+
},
|
| 28 |
+
"200003": {
|
| 29 |
+
"content": "<fim_suffix>",
|
| 30 |
+
"lstrip": false,
|
| 31 |
+
"normalized": false,
|
| 32 |
+
"rstrip": false,
|
| 33 |
+
"single_word": false,
|
| 34 |
+
"special": true
|
| 35 |
+
},
|
| 36 |
+
"200004": {
|
| 37 |
+
"content": "<fim_pad>",
|
| 38 |
+
"lstrip": false,
|
| 39 |
+
"normalized": false,
|
| 40 |
+
"rstrip": false,
|
| 41 |
+
"single_word": false,
|
| 42 |
+
"special": true
|
| 43 |
+
},
|
| 44 |
+
"200005": {
|
| 45 |
+
"content": "<reponame>",
|
| 46 |
+
"lstrip": false,
|
| 47 |
+
"normalized": false,
|
| 48 |
+
"rstrip": false,
|
| 49 |
+
"single_word": false,
|
| 50 |
+
"special": true
|
| 51 |
+
},
|
| 52 |
+
"200006": {
|
| 53 |
+
"content": "<filename>",
|
| 54 |
+
"lstrip": false,
|
| 55 |
+
"normalized": false,
|
| 56 |
+
"rstrip": false,
|
| 57 |
+
"single_word": false,
|
| 58 |
+
"special": true
|
| 59 |
+
},
|
| 60 |
+
"200007": {
|
| 61 |
+
"content": "<gh_stars>",
|
| 62 |
+
"lstrip": false,
|
| 63 |
+
"normalized": false,
|
| 64 |
+
"rstrip": false,
|
| 65 |
+
"single_word": false,
|
| 66 |
+
"special": true
|
| 67 |
+
},
|
| 68 |
+
"200008": {
|
| 69 |
+
"content": "<issue_start>",
|
| 70 |
+
"lstrip": false,
|
| 71 |
+
"normalized": false,
|
| 72 |
+
"rstrip": false,
|
| 73 |
+
"single_word": false,
|
| 74 |
+
"special": true
|
| 75 |
+
},
|
| 76 |
+
"200009": {
|
| 77 |
+
"content": "<issue_comment>",
|
| 78 |
+
"lstrip": false,
|
| 79 |
+
"normalized": false,
|
| 80 |
+
"rstrip": false,
|
| 81 |
+
"single_word": false,
|
| 82 |
+
"special": true
|
| 83 |
+
},
|
| 84 |
+
"200010": {
|
| 85 |
+
"content": "<issue_closed>",
|
| 86 |
+
"lstrip": false,
|
| 87 |
+
"normalized": false,
|
| 88 |
+
"rstrip": false,
|
| 89 |
+
"single_word": false,
|
| 90 |
+
"special": true
|
| 91 |
+
},
|
| 92 |
+
"200011": {
|
| 93 |
+
"content": "<jupyter_start>",
|
| 94 |
+
"lstrip": false,
|
| 95 |
+
"normalized": false,
|
| 96 |
+
"rstrip": false,
|
| 97 |
+
"single_word": false,
|
| 98 |
+
"special": true
|
| 99 |
+
},
|
| 100 |
+
"200012": {
|
| 101 |
+
"content": "<jupyter_text>",
|
| 102 |
+
"lstrip": false,
|
| 103 |
+
"normalized": false,
|
| 104 |
+
"rstrip": false,
|
| 105 |
+
"single_word": false,
|
| 106 |
+
"special": true
|
| 107 |
+
},
|
| 108 |
+
"200013": {
|
| 109 |
+
"content": "<jupyter_code>",
|
| 110 |
+
"lstrip": false,
|
| 111 |
+
"normalized": false,
|
| 112 |
+
"rstrip": false,
|
| 113 |
+
"single_word": false,
|
| 114 |
+
"special": true
|
| 115 |
+
},
|
| 116 |
+
"200014": {
|
| 117 |
+
"content": "<jupyter_output>",
|
| 118 |
+
"lstrip": false,
|
| 119 |
+
"normalized": false,
|
| 120 |
+
"rstrip": false,
|
| 121 |
+
"single_word": false,
|
| 122 |
+
"special": true
|
| 123 |
+
},
|
| 124 |
+
"200015": {
|
| 125 |
+
"content": "<empty_output>",
|
| 126 |
+
"lstrip": false,
|
| 127 |
+
"normalized": false,
|
| 128 |
+
"rstrip": false,
|
| 129 |
+
"single_word": false,
|
| 130 |
+
"special": true
|
| 131 |
+
},
|
| 132 |
+
"200016": {
|
| 133 |
+
"content": "<commit_before>",
|
| 134 |
+
"lstrip": false,
|
| 135 |
+
"normalized": false,
|
| 136 |
+
"rstrip": false,
|
| 137 |
+
"single_word": false,
|
| 138 |
+
"special": true
|
| 139 |
+
},
|
| 140 |
+
"200017": {
|
| 141 |
+
"content": "<commit_msg>",
|
| 142 |
+
"lstrip": false,
|
| 143 |
+
"normalized": false,
|
| 144 |
+
"rstrip": false,
|
| 145 |
+
"single_word": false,
|
| 146 |
+
"special": true
|
| 147 |
+
},
|
| 148 |
+
"200018": {
|
| 149 |
+
"content": "<commit_after>",
|
| 150 |
+
"lstrip": false,
|
| 151 |
+
"normalized": false,
|
| 152 |
+
"rstrip": false,
|
| 153 |
+
"single_word": false,
|
| 154 |
+
"special": true
|
| 155 |
+
},
|
| 156 |
+
"200019": {
|
| 157 |
+
"content": "]~b]",
|
| 158 |
+
"lstrip": false,
|
| 159 |
+
"normalized": false,
|
| 160 |
+
"rstrip": false,
|
| 161 |
+
"single_word": false,
|
| 162 |
+
"special": true
|
| 163 |
+
},
|
| 164 |
+
"200020": {
|
| 165 |
+
"content": "[e~[",
|
| 166 |
+
"lstrip": false,
|
| 167 |
+
"normalized": false,
|
| 168 |
+
"rstrip": false,
|
| 169 |
+
"single_word": false,
|
| 170 |
+
"special": true
|
| 171 |
+
},
|
| 172 |
+
"200021": {
|
| 173 |
+
"content": "]!d~[",
|
| 174 |
+
"lstrip": false,
|
| 175 |
+
"normalized": false,
|
| 176 |
+
"rstrip": false,
|
| 177 |
+
"single_word": false,
|
| 178 |
+
"special": true
|
| 179 |
+
},
|
| 180 |
+
"200022": {
|
| 181 |
+
"content": "<function_call>",
|
| 182 |
+
"lstrip": false,
|
| 183 |
+
"normalized": false,
|
| 184 |
+
"rstrip": false,
|
| 185 |
+
"single_word": false,
|
| 186 |
+
"special": true
|
| 187 |
+
},
|
| 188 |
+
"200023": {
|
| 189 |
+
"content": "<code_interpreter>",
|
| 190 |
+
"lstrip": false,
|
| 191 |
+
"normalized": false,
|
| 192 |
+
"rstrip": false,
|
| 193 |
+
"single_word": false,
|
| 194 |
+
"special": true
|
| 195 |
+
},
|
| 196 |
+
"200024": {
|
| 197 |
+
"content": "]<]speech[>[",
|
| 198 |
+
"lstrip": false,
|
| 199 |
+
"normalized": false,
|
| 200 |
+
"rstrip": false,
|
| 201 |
+
"single_word": false,
|
| 202 |
+
"special": true
|
| 203 |
+
},
|
| 204 |
+
"200025": {
|
| 205 |
+
"content": "]<]image[>[",
|
| 206 |
+
"lstrip": false,
|
| 207 |
+
"normalized": false,
|
| 208 |
+
"rstrip": false,
|
| 209 |
+
"single_word": false,
|
| 210 |
+
"special": true
|
| 211 |
+
},
|
| 212 |
+
"200026": {
|
| 213 |
+
"content": "]<]video[>[",
|
| 214 |
+
"lstrip": false,
|
| 215 |
+
"normalized": false,
|
| 216 |
+
"rstrip": false,
|
| 217 |
+
"single_word": false,
|
| 218 |
+
"special": true
|
| 219 |
+
},
|
| 220 |
+
"200027": {
|
| 221 |
+
"content": "]<]start of speech[>[",
|
| 222 |
+
"lstrip": false,
|
| 223 |
+
"normalized": false,
|
| 224 |
+
"rstrip": false,
|
| 225 |
+
"single_word": false,
|
| 226 |
+
"special": true
|
| 227 |
+
},
|
| 228 |
+
"200028": {
|
| 229 |
+
"content": "]<]end of speech[>[",
|
| 230 |
+
"lstrip": false,
|
| 231 |
+
"normalized": false,
|
| 232 |
+
"rstrip": false,
|
| 233 |
+
"single_word": false,
|
| 234 |
+
"special": true
|
| 235 |
+
},
|
| 236 |
+
"200029": {
|
| 237 |
+
"content": "]<]start of image[>[",
|
| 238 |
+
"lstrip": false,
|
| 239 |
+
"normalized": false,
|
| 240 |
+
"rstrip": false,
|
| 241 |
+
"single_word": false,
|
| 242 |
+
"special": true
|
| 243 |
+
},
|
| 244 |
+
"200030": {
|
| 245 |
+
"content": "]<]end of image[>[",
|
| 246 |
+
"lstrip": false,
|
| 247 |
+
"normalized": false,
|
| 248 |
+
"rstrip": false,
|
| 249 |
+
"single_word": false,
|
| 250 |
+
"special": true
|
| 251 |
+
},
|
| 252 |
+
"200031": {
|
| 253 |
+
"content": "]<]start of video[>[",
|
| 254 |
+
"lstrip": false,
|
| 255 |
+
"normalized": false,
|
| 256 |
+
"rstrip": false,
|
| 257 |
+
"single_word": false,
|
| 258 |
+
"special": true
|
| 259 |
+
},
|
| 260 |
+
"200032": {
|
| 261 |
+
"content": "]<]end of video[>[",
|
| 262 |
+
"lstrip": false,
|
| 263 |
+
"normalized": false,
|
| 264 |
+
"rstrip": false,
|
| 265 |
+
"single_word": false,
|
| 266 |
+
"special": true
|
| 267 |
+
},
|
| 268 |
+
"200033": {
|
| 269 |
+
"content": "]<]vision pad[>[",
|
| 270 |
+
"lstrip": false,
|
| 271 |
+
"normalized": false,
|
| 272 |
+
"rstrip": false,
|
| 273 |
+
"single_word": false,
|
| 274 |
+
"special": true
|
| 275 |
+
},
|
| 276 |
+
"200034": {
|
| 277 |
+
"content": "]~!b[",
|
| 278 |
+
"lstrip": false,
|
| 279 |
+
"normalized": false,
|
| 280 |
+
"rstrip": false,
|
| 281 |
+
"single_word": false,
|
| 282 |
+
"special": true
|
| 283 |
+
},
|
| 284 |
+
"200035": {
|
| 285 |
+
"content": "<jupyter_error>",
|
| 286 |
+
"lstrip": false,
|
| 287 |
+
"normalized": false,
|
| 288 |
+
"rstrip": false,
|
| 289 |
+
"single_word": false,
|
| 290 |
+
"special": true
|
| 291 |
+
},
|
| 292 |
+
"200036": {
|
| 293 |
+
"content": "<add_file>",
|
| 294 |
+
"lstrip": false,
|
| 295 |
+
"normalized": false,
|
| 296 |
+
"rstrip": false,
|
| 297 |
+
"single_word": false,
|
| 298 |
+
"special": true
|
| 299 |
+
},
|
| 300 |
+
"200037": {
|
| 301 |
+
"content": "<delete_file>",
|
| 302 |
+
"lstrip": false,
|
| 303 |
+
"normalized": false,
|
| 304 |
+
"rstrip": false,
|
| 305 |
+
"single_word": false,
|
| 306 |
+
"special": true
|
| 307 |
+
},
|
| 308 |
+
"200038": {
|
| 309 |
+
"content": "<rename_file>",
|
| 310 |
+
"lstrip": false,
|
| 311 |
+
"normalized": false,
|
| 312 |
+
"rstrip": false,
|
| 313 |
+
"single_word": false,
|
| 314 |
+
"special": true
|
| 315 |
+
},
|
| 316 |
+
"200039": {
|
| 317 |
+
"content": "<edit_file>",
|
| 318 |
+
"lstrip": false,
|
| 319 |
+
"normalized": false,
|
| 320 |
+
"rstrip": false,
|
| 321 |
+
"single_word": false,
|
| 322 |
+
"special": true
|
| 323 |
+
},
|
| 324 |
+
"200040": {
|
| 325 |
+
"content": "<commit_message>",
|
| 326 |
+
"lstrip": false,
|
| 327 |
+
"normalized": false,
|
| 328 |
+
"rstrip": false,
|
| 329 |
+
"single_word": false,
|
| 330 |
+
"special": true
|
| 331 |
+
},
|
| 332 |
+
"200041": {
|
| 333 |
+
"content": "<empty_source_file>",
|
| 334 |
+
"lstrip": false,
|
| 335 |
+
"normalized": false,
|
| 336 |
+
"rstrip": false,
|
| 337 |
+
"single_word": false,
|
| 338 |
+
"special": true
|
| 339 |
+
},
|
| 340 |
+
"200042": {
|
| 341 |
+
"content": "<repo_struct>",
|
| 342 |
+
"lstrip": false,
|
| 343 |
+
"normalized": false,
|
| 344 |
+
"rstrip": false,
|
| 345 |
+
"single_word": false,
|
| 346 |
+
"special": true
|
| 347 |
+
},
|
| 348 |
+
"200043": {
|
| 349 |
+
"content": "<code_context>",
|
| 350 |
+
"lstrip": false,
|
| 351 |
+
"normalized": false,
|
| 352 |
+
"rstrip": false,
|
| 353 |
+
"single_word": false,
|
| 354 |
+
"special": true
|
| 355 |
+
},
|
| 356 |
+
"200044": {
|
| 357 |
+
"content": "<file_content>",
|
| 358 |
+
"lstrip": false,
|
| 359 |
+
"normalized": false,
|
| 360 |
+
"rstrip": false,
|
| 361 |
+
"single_word": false,
|
| 362 |
+
"special": true
|
| 363 |
+
},
|
| 364 |
+
"200045": {
|
| 365 |
+
"content": "<source_files>",
|
| 366 |
+
"lstrip": false,
|
| 367 |
+
"normalized": false,
|
| 368 |
+
"rstrip": false,
|
| 369 |
+
"single_word": false,
|
| 370 |
+
"special": true
|
| 371 |
+
},
|
| 372 |
+
"200046": {
|
| 373 |
+
"content": "<pr_start>",
|
| 374 |
+
"lstrip": false,
|
| 375 |
+
"normalized": false,
|
| 376 |
+
"rstrip": false,
|
| 377 |
+
"single_word": false,
|
| 378 |
+
"special": true
|
| 379 |
+
},
|
| 380 |
+
"200047": {
|
| 381 |
+
"content": "<review_comment>",
|
| 382 |
+
"lstrip": false,
|
| 383 |
+
"normalized": false,
|
| 384 |
+
"rstrip": false,
|
| 385 |
+
"single_word": false,
|
| 386 |
+
"special": true
|
| 387 |
+
},
|
| 388 |
+
"200048": {
|
| 389 |
+
"content": "<filepath>",
|
| 390 |
+
"lstrip": false,
|
| 391 |
+
"normalized": false,
|
| 392 |
+
"rstrip": false,
|
| 393 |
+
"single_word": false,
|
| 394 |
+
"special": true
|
| 395 |
+
},
|
| 396 |
+
"200049": {
|
| 397 |
+
"content": "<file_sep>",
|
| 398 |
+
"lstrip": false,
|
| 399 |
+
"normalized": false,
|
| 400 |
+
"rstrip": false,
|
| 401 |
+
"single_word": false,
|
| 402 |
+
"special": true
|
| 403 |
+
},
|
| 404 |
+
"200050": {
|
| 405 |
+
"content": "<think>",
|
| 406 |
+
"lstrip": false,
|
| 407 |
+
"normalized": false,
|
| 408 |
+
"rstrip": false,
|
| 409 |
+
"single_word": false,
|
| 410 |
+
"special": false
|
| 411 |
+
},
|
| 412 |
+
"200051": {
|
| 413 |
+
"content": "</think>",
|
| 414 |
+
"lstrip": false,
|
| 415 |
+
"normalized": false,
|
| 416 |
+
"rstrip": false,
|
| 417 |
+
"single_word": false,
|
| 418 |
+
"special": false
|
| 419 |
+
},
|
| 420 |
+
"200052": {
|
| 421 |
+
"content": "<minimax:tool_call>",
|
| 422 |
+
"lstrip": false,
|
| 423 |
+
"normalized": false,
|
| 424 |
+
"rstrip": false,
|
| 425 |
+
"single_word": false,
|
| 426 |
+
"special": false
|
| 427 |
+
},
|
| 428 |
+
"200053": {
|
| 429 |
+
"content": "</minimax:tool_call>",
|
| 430 |
+
"lstrip": false,
|
| 431 |
+
"normalized": false,
|
| 432 |
+
"rstrip": false,
|
| 433 |
+
"single_word": false,
|
| 434 |
+
"special": false
|
| 435 |
+
}
|
| 436 |
+
},
|
| 437 |
+
"additional_special_tokens": [
|
| 438 |
+
"<code_interpreter>",
|
| 439 |
+
"<commit_after>",
|
| 440 |
+
"<commit_before>",
|
| 441 |
+
"<commit_msg>",
|
| 442 |
+
"<empty_output>",
|
| 443 |
+
"<filename>",
|
| 444 |
+
"<fim_middle>",
|
| 445 |
+
"<fim_pad>",
|
| 446 |
+
"<fim_prefix>",
|
| 447 |
+
"<fim_suffix>",
|
| 448 |
+
"<function_call>",
|
| 449 |
+
"<gh_stars>",
|
| 450 |
+
"]<]speech[>[",
|
| 451 |
+
"]<]image[>[",
|
| 452 |
+
"]<]video[>[",
|
| 453 |
+
"]<]start of speech[>[",
|
| 454 |
+
"]<]end of speech[>[",
|
| 455 |
+
"]<]start of image[>[",
|
| 456 |
+
"]<]end of image[>[",
|
| 457 |
+
"]<]start of video[>[",
|
| 458 |
+
"]<]end of video[>[",
|
| 459 |
+
"]<]vision pad[>[",
|
| 460 |
+
"]~!b[",
|
| 461 |
+
"<issue_closed>",
|
| 462 |
+
"<issue_comment>",
|
| 463 |
+
"<issue_start>",
|
| 464 |
+
"<jupyter_code>",
|
| 465 |
+
"<jupyter_output>",
|
| 466 |
+
"<jupyter_start>",
|
| 467 |
+
"<jupyter_text>",
|
| 468 |
+
"<reponame>",
|
| 469 |
+
"[e~[",
|
| 470 |
+
"]!d~[",
|
| 471 |
+
"]!p~[",
|
| 472 |
+
"]~b]",
|
| 473 |
+
"<jupyter_error>",
|
| 474 |
+
"<add_file>",
|
| 475 |
+
"<delete_file>",
|
| 476 |
+
"<rename_file>",
|
| 477 |
+
"<edit_file>",
|
| 478 |
+
"<commit_message>",
|
| 479 |
+
"<empty_source_file>",
|
| 480 |
+
"<repo_struct>",
|
| 481 |
+
"<code_context>",
|
| 482 |
+
"<file_content>",
|
| 483 |
+
"<source_files>",
|
| 484 |
+
"<pr_start>",
|
| 485 |
+
"<review_comment>",
|
| 486 |
+
"<filepath>",
|
| 487 |
+
"<file_sep>"
|
| 488 |
+
],
|
| 489 |
+
"bos_token": "]~!b[",
|
| 490 |
+
"clean_up_tokenization_spaces": false,
|
| 491 |
+
"eos_token": "[e~[",
|
| 492 |
+
"extra_special_tokens": {},
|
| 493 |
+
"model_max_length": 40960000,
|
| 494 |
+
"tokenizer_class": "GPT2Tokenizer",
|
| 495 |
+
"unk_token": "]!d~[",
|
| 496 |
+
"chat_template": "{# ----------\u2011\u2011\u2011 special token variables \u2011\u2011\u2011---------- #}\n{%- set toolcall_begin_token = '<minimax:tool_call>' -%}\n{%- set toolcall_end_token = '</minimax:tool_call>' -%}\n{#- Tool Rendering Functions ============================================== -#}\n{%- macro render_tool_namespace(namespace_name, tool_list) -%}\n{%- for tool in tool_list -%}\n<tool>{{ tool.function | tojson(ensure_ascii=False) }}</tool>\n{% endfor -%}\n{%- endmacro -%}\n{%- macro visible_text(content) -%}\n {%- if content is string -%}\n {{ content }}\n {%- elif content is iterable and content is not mapping -%}\n {%- for item in content -%}\n {%- if item is mapping and item.type == 'text' -%}\n {{- item.text }}\n {%- elif item is string -%}\n {{- item }}\n {%- endif -%}\n {%- endfor -%}\n {%- else -%}\n {{- content }}\n {%- endif -%}\n{%- endmacro -%}\n{#- System Message Construction ============================================ -#}\n{%- macro build_system_message(system_message) -%}\n {%- if system_message and system_message.content -%}\n {{- visible_text(system_message.content) }}\n {%- else -%}\n {%- if model_identity is not defined -%}\n {%- set model_identity = \"You are a helpful assistant.\" -%}\n {%- endif -%}\n {{- model_identity }}\n {%- endif -%}\n \n {#- Handle current_date -#}\n {%- if system_message and system_message.current_date -%}\n {{- '\\n' ~ 'Current date: ' + system_message.current_date }}\n {%- endif -%}\n {#- Handle current_location -#}\n {%- if system_message and system_message.current_location -%}\n {{- '\\n' ~ 'Current location: ' + system_message.current_location }}\n {%- endif -%}\n{%- endmacro -%}\n{#- Main Template Logic ================================================= -#}\n{#- Extract system message (only first message if it's system) -#}\n{%- set system_message = none -%}\n{%- set conversation_messages = messages -%}\n{%- if messages and messages[0].role == \"system\" -%}\n {%- set system_message = messages[0] -%}\n {%- set conversation_messages = messages[1:] -%}\n{%- endif -%}\n{#- Get the last user message turn, for interleved thinking -#}\n{%- set ns = namespace(last_user_index=-1) %}\n{% for m in conversation_messages %}\n {%- if m.role == 'user' %}\n {% set ns.last_user_index = loop.index0 -%}\n {%- endif %}\n{%- endfor %}\n{#- Render system message -#}\n{{- ']~!b[' ~ ']~b]system' ~ '\\n' }}\n{{- build_system_message(system_message) }}\n{#- Render tools if available -#}\n{%- if tools -%}\n {{- '\\n\\n' ~ '# Tools' ~ '\\n' ~ 'You may call one or more tools to assist with the user query.\\nHere are the tools available in JSONSchema format:' ~ '\\n' }}\n {{- '\\n' ~ '<tools>' ~ '\\n' }}\n {{- render_tool_namespace(\"functions\", tools) }}\n {{- '</tools>' ~ '\\n\\n' }}\n{{- 'When making tool calls, use XML format to invoke tools and pass parameters:' ~ '\\n' }}\n{{- '\\n' ~ toolcall_begin_token }}\n<invoke name=\"tool-name-1\">\n<parameter name=\"param-key-1\">param-value-1</parameter>\n<parameter name=\"param-key-2\">param-value-2</parameter>\n...\n</invoke>\n{{- '\\n' ~ toolcall_end_token }}\n{%- endif -%}\n{{- '[e~[\\n' }}\n\n{#- Render messages -#}\n{%- set last_tool_call = namespace(name=none) -%}\n{%- for message in conversation_messages -%}\n {%- if message.role == 'assistant' -%}\n {#- Only render reasoning_content if no user message follows -#}\n {{- ']~b]ai' ~ '\\n' }}\n\n {%- set reasoning_content = '' %}\n {%- set content = visible_text(message.content) %}\n {%- if message.reasoning_content is string %}\n {%- set reasoning_content = message.reasoning_content %}\n {%- else %}\n {%- if '</think>' in content %}\n {%- set reasoning_content = content.split('</think>')[0].strip('\\n').split('<think>')[-1].strip('\\n') %}\n {%- set content = content.split('</think>')[-1].strip('\\n') %}\n {%- endif %}\n {%- endif %}\n {%- if reasoning_content and loop.index0 > ns.last_user_index -%}\n {{- '<think>' ~ '\\n' ~ reasoning_content ~ '\\n' ~ '</think>' ~ '\\n\\n' }}\n {%- endif -%}\n {%- if content -%}\n {{- content }}\n {%- endif -%}\n {%- if message.tool_calls -%}\n {{- '\\n' ~ toolcall_begin_token ~ '\\n' }}\n\n {%- for tool_call in message.tool_calls -%}\n {%- if tool_call.function %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '<invoke name=\"' + tool_call.name + '\">' }}\n {% set _args = tool_call.arguments %}\n {%- for k, v in _args.items() %}\n {{- '<parameter name=\"' + k + '\">' }}\n {{- v | tojson(ensure_ascii=False) if v is not string else v }}\n {{- '</parameter>' }}\n {% endfor %}\n {{- '</invoke>' ~ '\\n' }}\n {%- endfor -%}\n \n {{- toolcall_end_token}}\n {%- set last_tool_call.name = message.tool_calls[-1].name -%}\n {%- else -%}\n {%- set last_tool_call.name = none -%}\n {%- endif -%}\n {{- '[e~[' ~ '\\n' }}\n \n {%- elif message.role == 'tool' -%}\n {%- if last_tool_call.name is none -%}\n {{- raise_exception(\"Message has tool role, but there was no previous assistant message with a tool call!\") }}\n {%- endif -%}\n {%- if loop.first or (conversation_messages[loop.index0 - 1].role != 'tool') -%}\n {{- ']~b]tool' }}\n {%- endif -%}\n {%- if message.content is string -%}\n {{- '\\n<response>' }}\n {{- message.content }}\n {{- '</response>' }}\n {%- else -%}\n {%- for tr in message.content -%}\n {{- '\\n<response>' }}\n {{- tr.output if tr.output is defined else (tr.text if tr.type == 'text' and tr.text is defined else tr) }}\n {{- '\\n</response>' }}\n {%- endfor -%}\n {%- endif -%}\n {%- if loop.last or (conversation_messages[loop.index0 + 1].role != 'tool') -%}\n {{- '[e~[\\n' -}}\n {%- endif -%}\n \n {%- elif message.role == 'user' -%}\n {{- ']~b]user' ~ '\\n' }}\n {{- visible_text(message.content) }}\n {{- '[e~[' ~ '\\n' }}\n {%- endif -%}\n{%- endfor -%}\n\n{#- Generation prompt -#}\n{%- if add_generation_prompt -%}\n{{- ']~b]ai' ~ '\\n' ~ '<think>' ~ '\\n' }}\n{%- endif -%}\n"
|
| 497 |
+
}
|
vocab.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|