ivanfioravanti commited on
Commit
a342e4b
·
verified ·
1 Parent(s): 578d548

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +1 -0
  2. README.md +31 -0
  3. added_tokens.json +63 -0
  4. chat_template.jinja +247 -0
  5. config.json +875 -0
  6. configuration_minimax_m3_vl.py +111 -0
  7. generation_config.json +8 -0
  8. image_processor.py +223 -0
  9. model-00001-of-00058.safetensors +3 -0
  10. model-00002-of-00058.safetensors +3 -0
  11. model-00003-of-00058.safetensors +3 -0
  12. model-00004-of-00058.safetensors +3 -0
  13. model-00005-of-00058.safetensors +3 -0
  14. model-00006-of-00058.safetensors +3 -0
  15. model-00007-of-00058.safetensors +3 -0
  16. model-00008-of-00058.safetensors +3 -0
  17. model-00009-of-00058.safetensors +3 -0
  18. model-00010-of-00058.safetensors +3 -0
  19. model-00011-of-00058.safetensors +3 -0
  20. model-00012-of-00058.safetensors +3 -0
  21. model-00013-of-00058.safetensors +3 -0
  22. model-00014-of-00058.safetensors +3 -0
  23. model-00015-of-00058.safetensors +3 -0
  24. model-00016-of-00058.safetensors +3 -0
  25. model-00017-of-00058.safetensors +3 -0
  26. model-00018-of-00058.safetensors +3 -0
  27. model-00019-of-00058.safetensors +3 -0
  28. model-00020-of-00058.safetensors +3 -0
  29. model-00021-of-00058.safetensors +3 -0
  30. model-00022-of-00058.safetensors +3 -0
  31. model-00023-of-00058.safetensors +3 -0
  32. model-00024-of-00058.safetensors +3 -0
  33. model-00025-of-00058.safetensors +3 -0
  34. model-00026-of-00058.safetensors +3 -0
  35. model-00027-of-00058.safetensors +3 -0
  36. model-00028-of-00058.safetensors +3 -0
  37. model-00029-of-00058.safetensors +3 -0
  38. model-00030-of-00058.safetensors +3 -0
  39. model-00031-of-00058.safetensors +3 -0
  40. model-00032-of-00058.safetensors +3 -0
  41. model-00033-of-00058.safetensors +3 -0
  42. model-00034-of-00058.safetensors +3 -0
  43. model-00035-of-00058.safetensors +3 -0
  44. model-00036-of-00058.safetensors +3 -0
  45. model-00037-of-00058.safetensors +3 -0
  46. model-00038-of-00058.safetensors +3 -0
  47. model-00039-of-00058.safetensors +3 -0
  48. model-00040-of-00058.safetensors +3 -0
  49. model-00041-of-00058.safetensors +3 -0
  50. model-00042-of-00058.safetensors +3 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ pipeline_tag: image-text-to-text
3
+ license: other
4
+ license_name: minimax-community
5
+ license_link: LICENSE
6
+ library_name: mlx
7
+ tags:
8
+ - multimodal
9
+ - moe
10
+ - agent
11
+ - coding
12
+ - video
13
+ - mlx
14
+ base_model: MiniMaxAI/MiniMax-M3
15
+ ---
16
+
17
+ # mlx-community/MiniMax-M3-4bit
18
+
19
+ This model was converted to MLX format from [`MiniMaxAI/MiniMax-M3`](https://huggingface.co/MiniMaxAI/MiniMax-M3)
20
+ using mlx-vlm version **0.6.3**.
21
+ Refer to the [original model card](https://huggingface.co/MiniMaxAI/MiniMax-M3) for more details on the model.
22
+
23
+ ## Use with mlx
24
+
25
+ ```bash
26
+ pip install -U mlx-vlm
27
+ ```
28
+
29
+ ```bash
30
+ python -m mlx_vlm.generate --model mlx-community/MiniMax-M3-4bit --max-tokens 100 --temperature 0.0 --prompt "Describe this image." --image <path_to_image>
31
+ ```
added_tokens.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "]!p~[": 200000,
3
+ "<fim_prefix>": 200001,
4
+ "<fim_middle>": 200002,
5
+ "<fim_suffix>": 200003,
6
+ "<fim_pad>": 200004,
7
+ "<reponame>": 200005,
8
+ "<filename>": 200006,
9
+ "<gh_stars>": 200007,
10
+ "<issue_start>": 200008,
11
+ "<issue_comment>": 200009,
12
+ "<issue_closed>": 200010,
13
+ "<jupyter_start>": 200011,
14
+ "<jupyter_text>": 200012,
15
+ "<jupyter_code>": 200013,
16
+ "<jupyter_output>": 200014,
17
+ "<empty_output>": 200015,
18
+ "<commit_before>": 200016,
19
+ "<commit_msg>": 200017,
20
+ "<commit_after>": 200018,
21
+ "]~b]": 200019,
22
+ "[e~[": 200020,
23
+ "]!d~[": 200021,
24
+ "<function_call>": 200022,
25
+ "<code_interpreter>": 200023,
26
+ "]<]speech[>[": 200024,
27
+ "]<]image[>[": 200025,
28
+ "]<]video[>[": 200026,
29
+ "]<]start of speech[>[": 200027,
30
+ "]<]end of speech[>[": 200028,
31
+ "]<]start of image[>[": 200029,
32
+ "]<]end of image[>[": 200030,
33
+ "]<]start of video[>[": 200031,
34
+ "]<]end of video[>[": 200032,
35
+ "]<]vision pad[>[": 200033,
36
+ "]~!b[": 200034,
37
+ "<jupyter_error>": 200035,
38
+ "<add_file>": 200036,
39
+ "<delete_file>": 200037,
40
+ "<rename_file>": 200038,
41
+ "<edit_file>": 200039,
42
+ "<commit_message>": 200040,
43
+ "<empty_source_file>": 200041,
44
+ "<repo_struct>": 200042,
45
+ "<code_context>": 200043,
46
+ "<file_content>": 200044,
47
+ "<source_files>": 200045,
48
+ "<pr_start>": 200046,
49
+ "<review_comment>": 200047,
50
+ "<filepath>": 200048,
51
+ "<file_sep>": 200049,
52
+ "<think>": 200050,
53
+ "</think>": 200051,
54
+ "<tool_call>": 200052,
55
+ "</tool_call>": 200053,
56
+ "]<]frame[>[": 200054,
57
+ "]<]start of frame[>[": 200055,
58
+ "]<]end of frame[>[": 200056,
59
+ "<|content_altered_placeholder|>": 200057,
60
+ "]<]minimax[>[": 200058,
61
+ "<mm:think>": 200059,
62
+ "</mm:think>": 200060
63
+ }
chat_template.jinja ADDED
@@ -0,0 +1,247 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {# ---------- special token variables ---------- #}
2
+ {%- set ns_token = ']<]minimax[>[' -%}
3
+ {%- set bod_token = ']~!b[' -%}
4
+ {%- set bos_token = ']~b]' -%}
5
+ {%- set eos_token = '[e~[' -%}
6
+ {%- set toolcall_begin_token = ns_token ~ '<tool_call>' -%}
7
+ {%- set toolcall_end_token = ns_token ~ '</tool_call>' -%}
8
+ {%- set think_begin_token = '<mm:think>' -%}
9
+ {%- set think_end_token = '</mm:think>' -%}
10
+ {%- set image_token = ']<]image[>[' -%}
11
+ {%- set video_token = ']<]video[>[' -%}
12
+ {#- Thinking mode: "enabled" / "disabled" / "adaptive" / not defined -#}
13
+ {#- Recursive XML renderer for tool_call arguments ======================== -#}
14
+ {#- None values are intentionally skipped in mapping iteration so that
15
+ `<key>null</key>` (which would round-trip to the literal string "null")
16
+ never appears in the rendered tool_call. The convention is: omit the
17
+ field entirely. The top-level `_args` loop applies the same rule.
18
+ The `val is none` branch below is a safety net only — upstream cleaning
19
+ (drop_none_in_tool_arguments) should ensure no None ever reaches here. -#}
20
+ {%- macro to_xml(val, ns) -%}
21
+ {%- if val is mapping -%}
22
+ {%- for k, v in val.items() if v is not none -%}
23
+ {{ ns }}<{{ k }}>{{ to_xml(v, ns) }}{{ ns }}</{{ k }}>
24
+ {%- endfor -%}
25
+ {%- elif val is iterable and val is not string -%}
26
+ {%- for item in val -%}
27
+ {{ ns }}<item>{{ to_xml(item, ns) }}{{ ns }}</item>
28
+ {%- endfor -%}
29
+ {%- elif val is none -%}
30
+ {#- Should be unreachable when upstream cleaning is applied. -#}
31
+ {%- elif val is boolean -%}
32
+ {{ val | tojson }}
33
+ {%- else -%}
34
+ {{ val }}
35
+ {%- endif -%}
36
+ {%- endmacro -%}
37
+ {#- Tool Rendering Functions ============================================== -#}
38
+ {%- macro render_tool_namespace(namespace_name, tool_list) -%}
39
+ {%- for tool in tool_list -%}
40
+ <tool>{{ tool.function | tojson(ensure_ascii=False) }}</tool>
41
+ {% endfor -%}
42
+ {%- endmacro -%}
43
+ {%- macro visible_text(content) -%}
44
+ {%- if content is string -%}
45
+ {{ content }}
46
+ {%- elif content is iterable and content is not mapping -%}
47
+ {%- for item in content -%}
48
+ {%- if item is mapping and item.type == 'text' -%}
49
+ {{- item.text }}
50
+ {%- elif item is mapping and item.type == 'image' -%}
51
+ {{- image_token }}
52
+ {%- elif item is mapping and item.type == 'video' -%}
53
+ {{- video_token}}
54
+ {%- elif item is string -%}
55
+ {{- item }}
56
+ {%- endif -%}
57
+ {%- endfor -%}
58
+ {%- elif content is none -%}
59
+ {{- '' }}
60
+ {%- else -%}
61
+ {{- content }}
62
+ {%- endif -%}
63
+ {%- endmacro -%}
64
+ {#- System Message Construction ============================================ -#}
65
+ {%- macro build_system_message(system_message) -%}
66
+ {%- if system_message and system_message.content -%}
67
+ {{- visible_text(system_message.content) }}
68
+ {%- else -%}
69
+ {{- 'Your model version is MiniMax-M3, developed by MiniMax. Knowledge cutoff: January 2026. Founded in early 2022, MiniMax is a global AI foundation model company committed to advancing the frontiers of AI towards AGI.' }}
70
+ {%- endif -%}
71
+
72
+ {#- Thinking mode instructions -#}
73
+ {{- '\n\n<thinking_instructions>\n' }}
74
+ {{- 'You have a thinking capability that allows you to reason step by step before responding. When thinking is enabled, wrap your reasoning in ' ~ think_begin_token ~ think_end_token ~ ' tags before your response. When thinking is disabled, begin your response directly after the ' ~ think_end_token ~ ' prefix. When thinking is adaptive, decide on your own whether to think for the current turn.\n' }}
75
+ {%- if thinking_mode is defined -%}
76
+ {%- if thinking_mode == "enabled" -%}
77
+ {{- 'Current thinking mode: enabled. You MUST think step by step before every response, including after receiving function/tool results.\n' }}
78
+ {%- elif thinking_mode == "disabled" -%}
79
+ {{- 'Current thinking mode: disabled. Do not output any thinking process.\n' }}
80
+ {%- elif thinking_mode == "adaptive" -%}
81
+ {{- 'Current thinking mode: adaptive. You are encouraged to think for complex decision-making, multi-step reasoning, or when analyzing function/tool results.\n' }}
82
+ {%- endif -%}
83
+ {%- else -%}
84
+ {{- 'Current thinking mode: adaptive. You are encouraged to think for complex decision-making, multi-step reasoning, or when analyzing function/tool results.\n' }}
85
+ {%- endif -%}
86
+ {{- '</thinking_instructions>' }}
87
+ {%- endmacro -%}
88
+ {%- macro build_developer_message(developer_message) -%}
89
+ {%- if developer_message and developer_message.content -%}
90
+ {{- visible_text(developer_message.content) }}
91
+ {%- else -%}
92
+ {%- if model_identity is not defined -%}
93
+ {%- set model_identity = "You are a helpful assistant." -%}
94
+ {%- endif -%}
95
+ {{- model_identity }}
96
+ {%- endif -%}
97
+ {%- endmacro -%}
98
+ {#- Main Template Logic ================================================= -#}
99
+ {#- Role mapping: root -> system sp (high priority), system/developer -> developer sp (low priority) -#}
100
+ {%- set system_message = none -%}
101
+ {%- set developer_message = none -%}
102
+ {%- set conversation_messages = messages -%}
103
+ {%- if messages and messages[0].role == "root" -%}
104
+ {%- set system_message = messages[0] -%}
105
+ {%- set conversation_messages = messages[1:] -%}
106
+ {%- if conversation_messages and conversation_messages[0].role in ["system", "developer"] -%}
107
+ {%- set developer_message = conversation_messages[0] -%}
108
+ {%- set conversation_messages = conversation_messages[1:] -%}
109
+ {%- endif -%}
110
+ {%- elif messages and messages[0].role in ["system", "developer"] -%}
111
+ {%- set developer_message = messages[0] -%}
112
+ {%- set conversation_messages = messages[1:] -%}
113
+ {%- endif -%}
114
+ {#- Render system sp (higher priority, root role only) -#}
115
+ {{- bod_token ~ bos_token ~ 'system' ~ '\n' }}
116
+ {{- build_system_message(system_message) }}
117
+ {{- eos_token ~ '\n' }}
118
+
119
+ {#- Render developer sp (lower priority: system/developer role + tools) -#}
120
+ {{- bos_token ~ 'developer' ~ '\n' }}
121
+ {{- build_developer_message(developer_message) }}
122
+ {%- if tools -%}
123
+ {{- '\n\n' ~ '# Tools' ~ '\n' ~ 'You may call one or more tools to assist with the user query.\nHere are the tools available in JSONSchema format:' ~ '\n' }}
124
+ {{- '\n' ~ '<tools>' ~ '\n' }}
125
+ {{- render_tool_namespace("functions", tools) }}
126
+ {{- '</tools>' ~ '\n\n' }}
127
+ {{- 'To call tools, wrap all invocations in a single ' ~ toolcall_begin_token ~ toolcall_end_token ~ ' block. Parameter values containing nested objects or arrays are recursively expanded into XML elements. Example:\n' }}
128
+ {{- '\n' ~ toolcall_begin_token ~ '\n' }}
129
+ {{- ns_token + '<invoke name="tool-name-1">' }}
130
+ {{- ns_token + '<param-1>value-1' + ns_token + '</param-1>' }}
131
+ {{- ns_token + '<param-2>' }}
132
+ {{- ns_token + '<item>' }}
133
+ {{- ns_token + '<key-a>val-a' + ns_token + '</key-a>' }}
134
+ {{- ns_token + '<key-b>val-b' + ns_token + '</key-b>' }}
135
+ {{- ns_token + '</item>' }}
136
+ {{- ns_token + '</param-2>' }}
137
+ {{- ns_token + '</invoke>\n' }}
138
+ {{- ns_token + '<invoke name="tool-name-2">' }}
139
+ {{- ns_token + '<param-1>value-1' + ns_token + '</param-1>' }}
140
+ {{- ns_token + '</invoke>\n' }}
141
+ {{- toolcall_end_token }}
142
+ {%- endif -%}
143
+ {{- eos_token ~ '\n' }}
144
+
145
+ {#- Render messages -#}
146
+ {%- set last_tool_call = namespace(name=none) -%}
147
+ {%- for message in conversation_messages -%}
148
+ {%- if message.role == 'assistant' -%}
149
+ {{- bos_token ~ 'ai' ~ '\n' }}
150
+
151
+ {%- set reasoning_content = '' %}
152
+ {%- set content = visible_text(message.content) %}
153
+ {%- if message.reasoning_content is string %}
154
+ {%- set reasoning_content = message.reasoning_content %}
155
+ {%- else %}
156
+ {%- if think_end_token in content %}
157
+ {%- set reasoning_content = content.split(think_end_token)[0].strip('\n').split(think_begin_token)[-1].strip('\n') %}
158
+ {%- set content = content.split(think_end_token)[-1].strip('\n') %}
159
+ {%- endif %}
160
+ {%- endif %}
161
+
162
+ {%- if reasoning_content -%}
163
+ {#- Render thinking for every assistant turn (all-turn visible) -#}
164
+ {{- think_begin_token ~ reasoning_content ~ think_end_token }}
165
+ {%- else -%}
166
+ {#- No thinking rendered → prefix with think_end_token -#}
167
+ {{- think_end_token }}
168
+ {%- endif -%}
169
+
170
+ {%- if content -%}
171
+ {{- content }}
172
+ {%- endif -%}
173
+ {%- if message.tool_calls -%}
174
+ {{- toolcall_begin_token ~ '\n' }}
175
+
176
+ {%- for tool_call in message.tool_calls -%}
177
+ {%- if tool_call.function -%}
178
+ {%- set tool_call = tool_call.function -%}
179
+ {%- endif -%}
180
+ {{- ns_token + '<invoke name="' + tool_call.name + '">' }}
181
+ {%- set _args = tool_call.arguments -%}
182
+ {%- for k, v in _args.items() if v is not none %}
183
+ {{- ns_token + '<' + k + '>' -}}
184
+ {{- to_xml(v, ns_token) -}}
185
+ {{- ns_token + '</' + k + '>' }}
186
+ {%- endfor -%}
187
+ {{- ns_token + '</invoke>' ~ '\n' }}
188
+ {%- endfor -%}
189
+
190
+ {{- toolcall_end_token }}
191
+ {%- if message.tool_calls[-1].function -%}
192
+ {%- set last_tool_call.name = message.tool_calls[-1].function.name -%}
193
+ {%- else -%}
194
+ {%- set last_tool_call.name = message.tool_calls[-1].name -%}
195
+ {%- endif -%}
196
+ {%- else -%}
197
+ {%- set last_tool_call.name = none -%}
198
+ {%- endif -%}
199
+ {{- eos_token ~ '\n' }}
200
+
201
+ {%- elif message.role == 'tool' -%}
202
+ {%- if last_tool_call.name is none -%}
203
+ {{- raise_exception("Message has tool role, but there was no previous assistant message with a tool call!") }}
204
+ {%- endif -%}
205
+ {%- if loop.first or (conversation_messages[loop.index0 - 1].role != 'tool') -%}
206
+ {{- bos_token ~ 'tool' }}
207
+ {%- endif -%}
208
+ {{- '\n<response>' }}
209
+ {%- if message.content is string -%}
210
+ {{- message.content }}
211
+ {%- else -%}
212
+ {%- for tr in message.content -%}
213
+ {%- if tr is mapping and tr.type is defined and tr.type == 'image' -%}
214
+ {{- image_token }}
215
+ {%- elif tr is mapping and tr.type is defined and tr.type == 'video' -%}
216
+ {{- video_token }}
217
+ {%- else -%}
218
+ {{- tr.output if tr.output is defined else (tr.text if tr.type == 'text' and tr.text is defined else tr) }}
219
+ {%- endif -%}
220
+ {%- endfor -%}
221
+ {%- endif -%}
222
+ {{- '</response>' }}
223
+ {%- if loop.last or (conversation_messages[loop.index0 + 1].role != 'tool') -%}
224
+ {{- eos_token ~ '\n' -}}
225
+ {%- endif -%}
226
+
227
+ {%- elif message.role == 'user' -%}
228
+ {{- bos_token ~ 'user' ~ '\n' }}
229
+ {{- visible_text(message.content) }}
230
+ {{- eos_token ~ '\n' }}
231
+ {%- endif -%}
232
+ {%- endfor -%}
233
+
234
+ {#- Generation prompt -#}
235
+ {%- if add_generation_prompt -%}
236
+ {{- bos_token ~ 'ai' ~ '\n' }}
237
+ {%- if thinking_mode is defined and thinking_mode == "disabled" -%}
238
+ {{- think_end_token }}
239
+ {%- elif thinking_mode is defined and thinking_mode == "adaptive" -%}
240
+ {#- adaptive: no prefix, let model decide -#}
241
+ {%- elif thinking_mode is defined and thinking_mode == "enabled" -%}
242
+ {#- enabled or not defined: default to think -#}
243
+ {{- think_begin_token }}
244
+ {%- else -%}
245
+ {#- adaptive: no prefix, let model decide -#}
246
+ {%- endif -%}
247
+ {%- endif -%}
config.json ADDED
@@ -0,0 +1,875 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "MiniMaxM3SparseForConditionalGeneration"
4
+ ],
5
+ "auto_map": {
6
+ "AutoConfig": "configuration_minimax_m3_vl.MiniMaxM3VLConfig"
7
+ },
8
+ "do_sample": true,
9
+ "eos_token_id": 200020,
10
+ "generation_config": {
11
+ "bos_token_id": 200019,
12
+ "do_sample": true,
13
+ "eos_token_id": 200020,
14
+ "temperature": 1.0,
15
+ "top_p": 0.95,
16
+ "transformers_version": "4.46.1"
17
+ },
18
+ "image_grid_pinpoints": "[(336, 336), (336, 672), (336, 1008), (336, 1344), (336, 1680), (336, 2016), (672, 336), (672, 672), (672, 1008), (672, 1344), (672, 1680), (672, 2016), (1008, 336), (1008, 672), (1008, 1008), (1008, 1344), (1008, 1680), (1008, 2016), (1344, 336), (1344, 672), (1344, 1008), (1344, 1344), (1344, 1680), (1344, 2016), (1680, 336), (1680, 672), (1680, 1008), (1680, 1344), (1680, 1680), (1680, 2016), (2016, 336), (2016, 672), (2016, 1008), (2016, 1344), (2016, 1680), (2016, 2016)]",
19
+ "image_seq_length": 576,
20
+ "image_token_index": 200025,
21
+ "img_token_compression_config": {
22
+ "image_token_compression_method": "patch_merge",
23
+ "spatial_merge_size": 2,
24
+ "temporal_patch_size": 2
25
+ },
26
+ "model_type": "minimax_m3_vl",
27
+ "multimodal_projector_bias": true,
28
+ "num_reward_heads": 0,
29
+ "process_image_mode": "dynamic_res",
30
+ "projector_hidden_act": "gelu",
31
+ "projector_hidden_size": 6144,
32
+ "quantization": {
33
+ "group_size": 64,
34
+ "bits": 4,
35
+ "mode": "affine",
36
+ "language_model.model.layers.3.block_sparse_moe.gate": {
37
+ "group_size": 64,
38
+ "bits": 8,
39
+ "mode": "affine"
40
+ },
41
+ "language_model.model.layers.4.block_sparse_moe.gate": {
42
+ "group_size": 64,
43
+ "bits": 8,
44
+ "mode": "affine"
45
+ },
46
+ "language_model.model.layers.5.block_sparse_moe.gate": {
47
+ "group_size": 64,
48
+ "bits": 8,
49
+ "mode": "affine"
50
+ },
51
+ "language_model.model.layers.6.block_sparse_moe.gate": {
52
+ "group_size": 64,
53
+ "bits": 8,
54
+ "mode": "affine"
55
+ },
56
+ "language_model.model.layers.7.block_sparse_moe.gate": {
57
+ "group_size": 64,
58
+ "bits": 8,
59
+ "mode": "affine"
60
+ },
61
+ "language_model.model.layers.8.block_sparse_moe.gate": {
62
+ "group_size": 64,
63
+ "bits": 8,
64
+ "mode": "affine"
65
+ },
66
+ "language_model.model.layers.9.block_sparse_moe.gate": {
67
+ "group_size": 64,
68
+ "bits": 8,
69
+ "mode": "affine"
70
+ },
71
+ "language_model.model.layers.10.block_sparse_moe.gate": {
72
+ "group_size": 64,
73
+ "bits": 8,
74
+ "mode": "affine"
75
+ },
76
+ "language_model.model.layers.11.block_sparse_moe.gate": {
77
+ "group_size": 64,
78
+ "bits": 8,
79
+ "mode": "affine"
80
+ },
81
+ "language_model.model.layers.12.block_sparse_moe.gate": {
82
+ "group_size": 64,
83
+ "bits": 8,
84
+ "mode": "affine"
85
+ },
86
+ "language_model.model.layers.13.block_sparse_moe.gate": {
87
+ "group_size": 64,
88
+ "bits": 8,
89
+ "mode": "affine"
90
+ },
91
+ "language_model.model.layers.14.block_sparse_moe.gate": {
92
+ "group_size": 64,
93
+ "bits": 8,
94
+ "mode": "affine"
95
+ },
96
+ "language_model.model.layers.15.block_sparse_moe.gate": {
97
+ "group_size": 64,
98
+ "bits": 8,
99
+ "mode": "affine"
100
+ },
101
+ "language_model.model.layers.16.block_sparse_moe.gate": {
102
+ "group_size": 64,
103
+ "bits": 8,
104
+ "mode": "affine"
105
+ },
106
+ "language_model.model.layers.17.block_sparse_moe.gate": {
107
+ "group_size": 64,
108
+ "bits": 8,
109
+ "mode": "affine"
110
+ },
111
+ "language_model.model.layers.18.block_sparse_moe.gate": {
112
+ "group_size": 64,
113
+ "bits": 8,
114
+ "mode": "affine"
115
+ },
116
+ "language_model.model.layers.19.block_sparse_moe.gate": {
117
+ "group_size": 64,
118
+ "bits": 8,
119
+ "mode": "affine"
120
+ },
121
+ "language_model.model.layers.20.block_sparse_moe.gate": {
122
+ "group_size": 64,
123
+ "bits": 8,
124
+ "mode": "affine"
125
+ },
126
+ "language_model.model.layers.21.block_sparse_moe.gate": {
127
+ "group_size": 64,
128
+ "bits": 8,
129
+ "mode": "affine"
130
+ },
131
+ "language_model.model.layers.22.block_sparse_moe.gate": {
132
+ "group_size": 64,
133
+ "bits": 8,
134
+ "mode": "affine"
135
+ },
136
+ "language_model.model.layers.23.block_sparse_moe.gate": {
137
+ "group_size": 64,
138
+ "bits": 8,
139
+ "mode": "affine"
140
+ },
141
+ "language_model.model.layers.24.block_sparse_moe.gate": {
142
+ "group_size": 64,
143
+ "bits": 8,
144
+ "mode": "affine"
145
+ },
146
+ "language_model.model.layers.25.block_sparse_moe.gate": {
147
+ "group_size": 64,
148
+ "bits": 8,
149
+ "mode": "affine"
150
+ },
151
+ "language_model.model.layers.26.block_sparse_moe.gate": {
152
+ "group_size": 64,
153
+ "bits": 8,
154
+ "mode": "affine"
155
+ },
156
+ "language_model.model.layers.27.block_sparse_moe.gate": {
157
+ "group_size": 64,
158
+ "bits": 8,
159
+ "mode": "affine"
160
+ },
161
+ "language_model.model.layers.28.block_sparse_moe.gate": {
162
+ "group_size": 64,
163
+ "bits": 8,
164
+ "mode": "affine"
165
+ },
166
+ "language_model.model.layers.29.block_sparse_moe.gate": {
167
+ "group_size": 64,
168
+ "bits": 8,
169
+ "mode": "affine"
170
+ },
171
+ "language_model.model.layers.30.block_sparse_moe.gate": {
172
+ "group_size": 64,
173
+ "bits": 8,
174
+ "mode": "affine"
175
+ },
176
+ "language_model.model.layers.31.block_sparse_moe.gate": {
177
+ "group_size": 64,
178
+ "bits": 8,
179
+ "mode": "affine"
180
+ },
181
+ "language_model.model.layers.32.block_sparse_moe.gate": {
182
+ "group_size": 64,
183
+ "bits": 8,
184
+ "mode": "affine"
185
+ },
186
+ "language_model.model.layers.33.block_sparse_moe.gate": {
187
+ "group_size": 64,
188
+ "bits": 8,
189
+ "mode": "affine"
190
+ },
191
+ "language_model.model.layers.34.block_sparse_moe.gate": {
192
+ "group_size": 64,
193
+ "bits": 8,
194
+ "mode": "affine"
195
+ },
196
+ "language_model.model.layers.35.block_sparse_moe.gate": {
197
+ "group_size": 64,
198
+ "bits": 8,
199
+ "mode": "affine"
200
+ },
201
+ "language_model.model.layers.36.block_sparse_moe.gate": {
202
+ "group_size": 64,
203
+ "bits": 8,
204
+ "mode": "affine"
205
+ },
206
+ "language_model.model.layers.37.block_sparse_moe.gate": {
207
+ "group_size": 64,
208
+ "bits": 8,
209
+ "mode": "affine"
210
+ },
211
+ "language_model.model.layers.38.block_sparse_moe.gate": {
212
+ "group_size": 64,
213
+ "bits": 8,
214
+ "mode": "affine"
215
+ },
216
+ "language_model.model.layers.39.block_sparse_moe.gate": {
217
+ "group_size": 64,
218
+ "bits": 8,
219
+ "mode": "affine"
220
+ },
221
+ "language_model.model.layers.40.block_sparse_moe.gate": {
222
+ "group_size": 64,
223
+ "bits": 8,
224
+ "mode": "affine"
225
+ },
226
+ "language_model.model.layers.41.block_sparse_moe.gate": {
227
+ "group_size": 64,
228
+ "bits": 8,
229
+ "mode": "affine"
230
+ },
231
+ "language_model.model.layers.42.block_sparse_moe.gate": {
232
+ "group_size": 64,
233
+ "bits": 8,
234
+ "mode": "affine"
235
+ },
236
+ "language_model.model.layers.43.block_sparse_moe.gate": {
237
+ "group_size": 64,
238
+ "bits": 8,
239
+ "mode": "affine"
240
+ },
241
+ "language_model.model.layers.44.block_sparse_moe.gate": {
242
+ "group_size": 64,
243
+ "bits": 8,
244
+ "mode": "affine"
245
+ },
246
+ "language_model.model.layers.45.block_sparse_moe.gate": {
247
+ "group_size": 64,
248
+ "bits": 8,
249
+ "mode": "affine"
250
+ },
251
+ "language_model.model.layers.46.block_sparse_moe.gate": {
252
+ "group_size": 64,
253
+ "bits": 8,
254
+ "mode": "affine"
255
+ },
256
+ "language_model.model.layers.47.block_sparse_moe.gate": {
257
+ "group_size": 64,
258
+ "bits": 8,
259
+ "mode": "affine"
260
+ },
261
+ "language_model.model.layers.48.block_sparse_moe.gate": {
262
+ "group_size": 64,
263
+ "bits": 8,
264
+ "mode": "affine"
265
+ },
266
+ "language_model.model.layers.49.block_sparse_moe.gate": {
267
+ "group_size": 64,
268
+ "bits": 8,
269
+ "mode": "affine"
270
+ },
271
+ "language_model.model.layers.50.block_sparse_moe.gate": {
272
+ "group_size": 64,
273
+ "bits": 8,
274
+ "mode": "affine"
275
+ },
276
+ "language_model.model.layers.51.block_sparse_moe.gate": {
277
+ "group_size": 64,
278
+ "bits": 8,
279
+ "mode": "affine"
280
+ },
281
+ "language_model.model.layers.52.block_sparse_moe.gate": {
282
+ "group_size": 64,
283
+ "bits": 8,
284
+ "mode": "affine"
285
+ },
286
+ "language_model.model.layers.53.block_sparse_moe.gate": {
287
+ "group_size": 64,
288
+ "bits": 8,
289
+ "mode": "affine"
290
+ },
291
+ "language_model.model.layers.54.block_sparse_moe.gate": {
292
+ "group_size": 64,
293
+ "bits": 8,
294
+ "mode": "affine"
295
+ },
296
+ "language_model.model.layers.55.block_sparse_moe.gate": {
297
+ "group_size": 64,
298
+ "bits": 8,
299
+ "mode": "affine"
300
+ },
301
+ "language_model.model.layers.56.block_sparse_moe.gate": {
302
+ "group_size": 64,
303
+ "bits": 8,
304
+ "mode": "affine"
305
+ },
306
+ "language_model.model.layers.57.block_sparse_moe.gate": {
307
+ "group_size": 64,
308
+ "bits": 8,
309
+ "mode": "affine"
310
+ },
311
+ "language_model.model.layers.58.block_sparse_moe.gate": {
312
+ "group_size": 64,
313
+ "bits": 8,
314
+ "mode": "affine"
315
+ },
316
+ "language_model.model.layers.59.block_sparse_moe.gate": {
317
+ "group_size": 64,
318
+ "bits": 8,
319
+ "mode": "affine"
320
+ }
321
+ },
322
+ "quantization_config": {
323
+ "group_size": 64,
324
+ "bits": 4,
325
+ "mode": "affine",
326
+ "language_model.model.layers.3.block_sparse_moe.gate": {
327
+ "group_size": 64,
328
+ "bits": 8,
329
+ "mode": "affine"
330
+ },
331
+ "language_model.model.layers.4.block_sparse_moe.gate": {
332
+ "group_size": 64,
333
+ "bits": 8,
334
+ "mode": "affine"
335
+ },
336
+ "language_model.model.layers.5.block_sparse_moe.gate": {
337
+ "group_size": 64,
338
+ "bits": 8,
339
+ "mode": "affine"
340
+ },
341
+ "language_model.model.layers.6.block_sparse_moe.gate": {
342
+ "group_size": 64,
343
+ "bits": 8,
344
+ "mode": "affine"
345
+ },
346
+ "language_model.model.layers.7.block_sparse_moe.gate": {
347
+ "group_size": 64,
348
+ "bits": 8,
349
+ "mode": "affine"
350
+ },
351
+ "language_model.model.layers.8.block_sparse_moe.gate": {
352
+ "group_size": 64,
353
+ "bits": 8,
354
+ "mode": "affine"
355
+ },
356
+ "language_model.model.layers.9.block_sparse_moe.gate": {
357
+ "group_size": 64,
358
+ "bits": 8,
359
+ "mode": "affine"
360
+ },
361
+ "language_model.model.layers.10.block_sparse_moe.gate": {
362
+ "group_size": 64,
363
+ "bits": 8,
364
+ "mode": "affine"
365
+ },
366
+ "language_model.model.layers.11.block_sparse_moe.gate": {
367
+ "group_size": 64,
368
+ "bits": 8,
369
+ "mode": "affine"
370
+ },
371
+ "language_model.model.layers.12.block_sparse_moe.gate": {
372
+ "group_size": 64,
373
+ "bits": 8,
374
+ "mode": "affine"
375
+ },
376
+ "language_model.model.layers.13.block_sparse_moe.gate": {
377
+ "group_size": 64,
378
+ "bits": 8,
379
+ "mode": "affine"
380
+ },
381
+ "language_model.model.layers.14.block_sparse_moe.gate": {
382
+ "group_size": 64,
383
+ "bits": 8,
384
+ "mode": "affine"
385
+ },
386
+ "language_model.model.layers.15.block_sparse_moe.gate": {
387
+ "group_size": 64,
388
+ "bits": 8,
389
+ "mode": "affine"
390
+ },
391
+ "language_model.model.layers.16.block_sparse_moe.gate": {
392
+ "group_size": 64,
393
+ "bits": 8,
394
+ "mode": "affine"
395
+ },
396
+ "language_model.model.layers.17.block_sparse_moe.gate": {
397
+ "group_size": 64,
398
+ "bits": 8,
399
+ "mode": "affine"
400
+ },
401
+ "language_model.model.layers.18.block_sparse_moe.gate": {
402
+ "group_size": 64,
403
+ "bits": 8,
404
+ "mode": "affine"
405
+ },
406
+ "language_model.model.layers.19.block_sparse_moe.gate": {
407
+ "group_size": 64,
408
+ "bits": 8,
409
+ "mode": "affine"
410
+ },
411
+ "language_model.model.layers.20.block_sparse_moe.gate": {
412
+ "group_size": 64,
413
+ "bits": 8,
414
+ "mode": "affine"
415
+ },
416
+ "language_model.model.layers.21.block_sparse_moe.gate": {
417
+ "group_size": 64,
418
+ "bits": 8,
419
+ "mode": "affine"
420
+ },
421
+ "language_model.model.layers.22.block_sparse_moe.gate": {
422
+ "group_size": 64,
423
+ "bits": 8,
424
+ "mode": "affine"
425
+ },
426
+ "language_model.model.layers.23.block_sparse_moe.gate": {
427
+ "group_size": 64,
428
+ "bits": 8,
429
+ "mode": "affine"
430
+ },
431
+ "language_model.model.layers.24.block_sparse_moe.gate": {
432
+ "group_size": 64,
433
+ "bits": 8,
434
+ "mode": "affine"
435
+ },
436
+ "language_model.model.layers.25.block_sparse_moe.gate": {
437
+ "group_size": 64,
438
+ "bits": 8,
439
+ "mode": "affine"
440
+ },
441
+ "language_model.model.layers.26.block_sparse_moe.gate": {
442
+ "group_size": 64,
443
+ "bits": 8,
444
+ "mode": "affine"
445
+ },
446
+ "language_model.model.layers.27.block_sparse_moe.gate": {
447
+ "group_size": 64,
448
+ "bits": 8,
449
+ "mode": "affine"
450
+ },
451
+ "language_model.model.layers.28.block_sparse_moe.gate": {
452
+ "group_size": 64,
453
+ "bits": 8,
454
+ "mode": "affine"
455
+ },
456
+ "language_model.model.layers.29.block_sparse_moe.gate": {
457
+ "group_size": 64,
458
+ "bits": 8,
459
+ "mode": "affine"
460
+ },
461
+ "language_model.model.layers.30.block_sparse_moe.gate": {
462
+ "group_size": 64,
463
+ "bits": 8,
464
+ "mode": "affine"
465
+ },
466
+ "language_model.model.layers.31.block_sparse_moe.gate": {
467
+ "group_size": 64,
468
+ "bits": 8,
469
+ "mode": "affine"
470
+ },
471
+ "language_model.model.layers.32.block_sparse_moe.gate": {
472
+ "group_size": 64,
473
+ "bits": 8,
474
+ "mode": "affine"
475
+ },
476
+ "language_model.model.layers.33.block_sparse_moe.gate": {
477
+ "group_size": 64,
478
+ "bits": 8,
479
+ "mode": "affine"
480
+ },
481
+ "language_model.model.layers.34.block_sparse_moe.gate": {
482
+ "group_size": 64,
483
+ "bits": 8,
484
+ "mode": "affine"
485
+ },
486
+ "language_model.model.layers.35.block_sparse_moe.gate": {
487
+ "group_size": 64,
488
+ "bits": 8,
489
+ "mode": "affine"
490
+ },
491
+ "language_model.model.layers.36.block_sparse_moe.gate": {
492
+ "group_size": 64,
493
+ "bits": 8,
494
+ "mode": "affine"
495
+ },
496
+ "language_model.model.layers.37.block_sparse_moe.gate": {
497
+ "group_size": 64,
498
+ "bits": 8,
499
+ "mode": "affine"
500
+ },
501
+ "language_model.model.layers.38.block_sparse_moe.gate": {
502
+ "group_size": 64,
503
+ "bits": 8,
504
+ "mode": "affine"
505
+ },
506
+ "language_model.model.layers.39.block_sparse_moe.gate": {
507
+ "group_size": 64,
508
+ "bits": 8,
509
+ "mode": "affine"
510
+ },
511
+ "language_model.model.layers.40.block_sparse_moe.gate": {
512
+ "group_size": 64,
513
+ "bits": 8,
514
+ "mode": "affine"
515
+ },
516
+ "language_model.model.layers.41.block_sparse_moe.gate": {
517
+ "group_size": 64,
518
+ "bits": 8,
519
+ "mode": "affine"
520
+ },
521
+ "language_model.model.layers.42.block_sparse_moe.gate": {
522
+ "group_size": 64,
523
+ "bits": 8,
524
+ "mode": "affine"
525
+ },
526
+ "language_model.model.layers.43.block_sparse_moe.gate": {
527
+ "group_size": 64,
528
+ "bits": 8,
529
+ "mode": "affine"
530
+ },
531
+ "language_model.model.layers.44.block_sparse_moe.gate": {
532
+ "group_size": 64,
533
+ "bits": 8,
534
+ "mode": "affine"
535
+ },
536
+ "language_model.model.layers.45.block_sparse_moe.gate": {
537
+ "group_size": 64,
538
+ "bits": 8,
539
+ "mode": "affine"
540
+ },
541
+ "language_model.model.layers.46.block_sparse_moe.gate": {
542
+ "group_size": 64,
543
+ "bits": 8,
544
+ "mode": "affine"
545
+ },
546
+ "language_model.model.layers.47.block_sparse_moe.gate": {
547
+ "group_size": 64,
548
+ "bits": 8,
549
+ "mode": "affine"
550
+ },
551
+ "language_model.model.layers.48.block_sparse_moe.gate": {
552
+ "group_size": 64,
553
+ "bits": 8,
554
+ "mode": "affine"
555
+ },
556
+ "language_model.model.layers.49.block_sparse_moe.gate": {
557
+ "group_size": 64,
558
+ "bits": 8,
559
+ "mode": "affine"
560
+ },
561
+ "language_model.model.layers.50.block_sparse_moe.gate": {
562
+ "group_size": 64,
563
+ "bits": 8,
564
+ "mode": "affine"
565
+ },
566
+ "language_model.model.layers.51.block_sparse_moe.gate": {
567
+ "group_size": 64,
568
+ "bits": 8,
569
+ "mode": "affine"
570
+ },
571
+ "language_model.model.layers.52.block_sparse_moe.gate": {
572
+ "group_size": 64,
573
+ "bits": 8,
574
+ "mode": "affine"
575
+ },
576
+ "language_model.model.layers.53.block_sparse_moe.gate": {
577
+ "group_size": 64,
578
+ "bits": 8,
579
+ "mode": "affine"
580
+ },
581
+ "language_model.model.layers.54.block_sparse_moe.gate": {
582
+ "group_size": 64,
583
+ "bits": 8,
584
+ "mode": "affine"
585
+ },
586
+ "language_model.model.layers.55.block_sparse_moe.gate": {
587
+ "group_size": 64,
588
+ "bits": 8,
589
+ "mode": "affine"
590
+ },
591
+ "language_model.model.layers.56.block_sparse_moe.gate": {
592
+ "group_size": 64,
593
+ "bits": 8,
594
+ "mode": "affine"
595
+ },
596
+ "language_model.model.layers.57.block_sparse_moe.gate": {
597
+ "group_size": 64,
598
+ "bits": 8,
599
+ "mode": "affine"
600
+ },
601
+ "language_model.model.layers.58.block_sparse_moe.gate": {
602
+ "group_size": 64,
603
+ "bits": 8,
604
+ "mode": "affine"
605
+ },
606
+ "language_model.model.layers.59.block_sparse_moe.gate": {
607
+ "group_size": 64,
608
+ "bits": 8,
609
+ "mode": "affine"
610
+ }
611
+ },
612
+ "temperature": 1.0,
613
+ "text_config": {
614
+ "hidden_size": 6144,
615
+ "intermediate_size": 3072,
616
+ "num_hidden_layers": 60,
617
+ "num_attention_heads": 64,
618
+ "num_key_value_heads": 4,
619
+ "head_dim": 128,
620
+ "vocab_size": 200064,
621
+ "max_position_embeddings": 1048576,
622
+ "rms_norm_eps": 1e-06,
623
+ "use_gemma_norm": true,
624
+ "attention_output_gate": false,
625
+ "rope_theta": 5000000,
626
+ "rotary_dim": 64,
627
+ "partial_rotary_factor": 0.5,
628
+ "hidden_act": "swigluoai",
629
+ "use_qk_norm": true,
630
+ "tie_word_embeddings": false,
631
+ "dense_intermediate_size": 12288,
632
+ "shared_intermediate_size": 3072,
633
+ "num_local_experts": 128,
634
+ "num_experts_per_tok": 4,
635
+ "n_shared_experts": 1,
636
+ "scoring_func": "sigmoid",
637
+ "use_routing_bias": true,
638
+ "moe_layer_freq": [
639
+ 0,
640
+ 0,
641
+ 0,
642
+ 1,
643
+ 1,
644
+ 1,
645
+ 1,
646
+ 1,
647
+ 1,
648
+ 1,
649
+ 1,
650
+ 1,
651
+ 1,
652
+ 1,
653
+ 1,
654
+ 1,
655
+ 1,
656
+ 1,
657
+ 1,
658
+ 1,
659
+ 1,
660
+ 1,
661
+ 1,
662
+ 1,
663
+ 1,
664
+ 1,
665
+ 1,
666
+ 1,
667
+ 1,
668
+ 1,
669
+ 1,
670
+ 1,
671
+ 1,
672
+ 1,
673
+ 1,
674
+ 1,
675
+ 1,
676
+ 1,
677
+ 1,
678
+ 1,
679
+ 1,
680
+ 1,
681
+ 1,
682
+ 1,
683
+ 1,
684
+ 1,
685
+ 1,
686
+ 1,
687
+ 1,
688
+ 1,
689
+ 1,
690
+ 1,
691
+ 1,
692
+ 1,
693
+ 1,
694
+ 1,
695
+ 1,
696
+ 1,
697
+ 1,
698
+ 1
699
+ ],
700
+ "qk_norm_type": "per_head",
701
+ "num_mtp_modules": 7,
702
+ "num_nextn_predict_layers": 1,
703
+ "swiglu_alpha": 1.702,
704
+ "swiglu_limit": 7.0,
705
+ "routed_scaling_factor": 2.0,
706
+ "sparse_attention_config": {
707
+ "use_sparse_attention": true,
708
+ "sparse_index_dim": 128,
709
+ "sparse_num_index_heads": 4,
710
+ "sparse_topk_blocks": 16,
711
+ "sparse_block_size": 128,
712
+ "sparse_disable_index_value": [
713
+ 0,
714
+ 0,
715
+ 0,
716
+ 1,
717
+ 1,
718
+ 1,
719
+ 1,
720
+ 1,
721
+ 1,
722
+ 1,
723
+ 1,
724
+ 1,
725
+ 1,
726
+ 1,
727
+ 1,
728
+ 1,
729
+ 1,
730
+ 1,
731
+ 1,
732
+ 1,
733
+ 1,
734
+ 1,
735
+ 1,
736
+ 1,
737
+ 1,
738
+ 1,
739
+ 1,
740
+ 1,
741
+ 1,
742
+ 1,
743
+ 1,
744
+ 1,
745
+ 1,
746
+ 1,
747
+ 1,
748
+ 1,
749
+ 1,
750
+ 1,
751
+ 1,
752
+ 1,
753
+ 1,
754
+ 1,
755
+ 1,
756
+ 1,
757
+ 1,
758
+ 1,
759
+ 1,
760
+ 1,
761
+ 1,
762
+ 1,
763
+ 1,
764
+ 1,
765
+ 1,
766
+ 1,
767
+ 1,
768
+ 1,
769
+ 1,
770
+ 1,
771
+ 1,
772
+ 1
773
+ ],
774
+ "sparse_score_type": "max",
775
+ "sparse_init_block": 0,
776
+ "sparse_local_block": 1,
777
+ "sparse_attention_freq": [
778
+ 0,
779
+ 0,
780
+ 0,
781
+ 1,
782
+ 1,
783
+ 1,
784
+ 1,
785
+ 1,
786
+ 1,
787
+ 1,
788
+ 1,
789
+ 1,
790
+ 1,
791
+ 1,
792
+ 1,
793
+ 1,
794
+ 1,
795
+ 1,
796
+ 1,
797
+ 1,
798
+ 1,
799
+ 1,
800
+ 1,
801
+ 1,
802
+ 1,
803
+ 1,
804
+ 1,
805
+ 1,
806
+ 1,
807
+ 1,
808
+ 1,
809
+ 1,
810
+ 1,
811
+ 1,
812
+ 1,
813
+ 1,
814
+ 1,
815
+ 1,
816
+ 1,
817
+ 1,
818
+ 1,
819
+ 1,
820
+ 1,
821
+ 1,
822
+ 1,
823
+ 1,
824
+ 1,
825
+ 1,
826
+ 1,
827
+ 1,
828
+ 1,
829
+ 1,
830
+ 1,
831
+ 1,
832
+ 1,
833
+ 1,
834
+ 1,
835
+ 1,
836
+ 1,
837
+ 1
838
+ ]
839
+ },
840
+ "architectures": [
841
+ "MiniMaxM3SparseForCausalLM"
842
+ ]
843
+ },
844
+ "top_p": 0.95,
845
+ "transformers_version": "4.52.4",
846
+ "video_token_index": 200026,
847
+ "vision_config": {
848
+ "hidden_size": 1280,
849
+ "num_attention_heads": 16,
850
+ "num_hidden_layers": 32,
851
+ "intermediate_size": 5120,
852
+ "patch_size": 14,
853
+ "image_size": 2016,
854
+ "projection_dim": 6144,
855
+ "position_embedding_type": "rope",
856
+ "rope_mode": "3d",
857
+ "rope_theta": 10000.0,
858
+ "attention_dropout": 0.0,
859
+ "hidden_act": "gelu",
860
+ "initializer_factor": 1.0,
861
+ "initializer_range": 0.02,
862
+ "layer_norm_eps": 1e-05,
863
+ "model_type": "clip_vision_model",
864
+ "num_channels": 3,
865
+ "vocab_size": 32000,
866
+ "img_token_compression_config": {
867
+ "image_token_compression_method": "patch_merge",
868
+ "spatial_merge_size": 2,
869
+ "temporal_patch_size": 2
870
+ },
871
+ "vision_segment_max_frames": 4
872
+ },
873
+ "vision_feature_layer": -1,
874
+ "vision_feature_select_strategy": "full"
875
+ }
configuration_minimax_m3_vl.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """HuggingFace configs for the MiniMax VL family (M2 VL / M3 VL).
2
+
3
+ This file is bundled into every converted HF checkpoint so that loading via
4
+ ``AutoConfig.from_pretrained(..., trust_remote_code=True)`` works without any
5
+ runtime dependency on sglang or other internal packages — only stock
6
+ ``transformers`` is required.
7
+
8
+ The class definitions intentionally mirror
9
+ ``sglang.srt.configs.minimax_vl``; if either side changes, keep them in sync.
10
+
11
+ The file is named ``configuration_minimax_m3_vl.py`` (matching the legacy
12
+ ``model_type="minimax_m3_vl"`` and the converter's ``auto_map`` entry) so
13
+ that ckpts produced by this converter remain loadable by older sglang versions
14
+ that only know the ``MiniMaxM3VL*`` names. The canonical class is
15
+ ``MiniMaxM3VLConfig``; ``MiniMaxM3VLConfig`` is a thin BC alias whose only
16
+ purpose is to be referenced from ``auto_map``.
17
+ """
18
+
19
+ from typing import Optional
20
+
21
+ from transformers.configuration_utils import PretrainedConfig
22
+ from transformers.models.auto import CONFIG_MAPPING
23
+
24
+
25
+ def _coerce_sub_config(
26
+ sub_config: Optional[dict], default_model_type: str
27
+ ) -> Optional[PretrainedConfig]:
28
+ """Convert a config dict to a ``PretrainedConfig`` instance.
29
+
30
+ If ``model_type`` is registered in HF ``CONFIG_MAPPING`` the corresponding
31
+ config class is used; otherwise we fall back to a generic
32
+ ``PretrainedConfig`` so all dict keys still become real attributes (M3's
33
+ text backbone uses ``model_type="minimax_m2"`` which is not in
34
+ ``CONFIG_MAPPING``).
35
+ """
36
+ if not isinstance(sub_config, dict):
37
+ return sub_config
38
+ model_type = sub_config.get("model_type", default_model_type)
39
+ cls = CONFIG_MAPPING.get(model_type, PretrainedConfig)
40
+ return cls(**sub_config)
41
+
42
+
43
+ class MiniMaxVLBaseConfig(PretrainedConfig):
44
+ """Base config shared by every MiniMax VL variant.
45
+
46
+ Handles vision/text sub-config coercion. Concrete subclasses only need to
47
+ declare a unique ``model_type`` string.
48
+ """
49
+
50
+ def __init__(
51
+ self,
52
+ vision_config: Optional[dict] = None,
53
+ text_config: Optional[dict] = None,
54
+ image_token_index: int = 200025,
55
+ video_token_index: int = 200026,
56
+ image_seq_length: int = 576,
57
+ process_image_mode: str = "dynamic_res",
58
+ projector_hidden_act: str = "gelu",
59
+ multimodal_projector_bias: bool = True,
60
+ vision_feature_layer: int = -1,
61
+ vision_feature_select_strategy: str = "full",
62
+ img_token_compression_config: Optional[dict] = None,
63
+ image_grid_pinpoints: Optional[str] = None,
64
+ **kwargs,
65
+ ):
66
+ self.vision_config = _coerce_sub_config(vision_config, "clip_vision_model")
67
+ self.text_config = _coerce_sub_config(text_config, "mixtral")
68
+
69
+ self.image_token_index = image_token_index
70
+ self.video_token_index = video_token_index
71
+ self.image_seq_length = image_seq_length
72
+ self.process_image_mode = process_image_mode
73
+ self.projector_hidden_act = projector_hidden_act
74
+ self.multimodal_projector_bias = multimodal_projector_bias
75
+ self.vision_feature_layer = vision_feature_layer
76
+ self.vision_feature_select_strategy = vision_feature_select_strategy
77
+ self.img_token_compression_config = img_token_compression_config or {}
78
+ self.image_grid_pinpoints = image_grid_pinpoints
79
+
80
+ super().__init__(**kwargs)
81
+
82
+ def __post_init__(self, **kwargs):
83
+ super().__post_init__(**kwargs)
84
+ if hasattr(self, "vision_config"):
85
+ self.vision_config = _coerce_sub_config(self.vision_config, "clip_vision_model")
86
+ if hasattr(self, "text_config"):
87
+ self.text_config = _coerce_sub_config(self.text_config, "mixtral")
88
+
89
+
90
+ class MiniMaxM2VLConfig(MiniMaxVLBaseConfig):
91
+ """MiniMax M2 VL: vision tower + M2 (Mixtral-style MoE) text backbone."""
92
+
93
+ model_type = "minimax_m2_vl"
94
+
95
+
96
+ class MiniMaxM3VLConfig(MiniMaxVLBaseConfig):
97
+ """MiniMax M3 VL: vision tower + M3 (mixed sparse/dense MoE) text backbone."""
98
+
99
+ model_type = "minimax_m3_vl"
100
+
101
+
102
+ class MiniMaxM2MiniVLConfig(MiniMaxM2VLConfig):
103
+ """Legacy alias kept so old ``model_type="minimax_m2_mini_vl"`` ckpts load."""
104
+
105
+ model_type = "minimax_m2_mini_vl"
106
+
107
+
108
+ class MiniMaxM3VLConfig(MiniMaxM3VLConfig):
109
+ """Legacy alias kept so old ``model_type="minimax_m3_vl"`` ckpts load."""
110
+
111
+ model_type = "minimax_m3_vl"
generation_config.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 200019,
3
+ "do_sample": true,
4
+ "eos_token_id": 200020,
5
+ "temperature": 1.0,
6
+ "top_p": 0.95,
7
+ "transformers_version": "4.46.1"
8
+ }
image_processor.py ADDED
@@ -0,0 +1,223 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2023-2024 SGLang Team
2
+ # Licensed under the Apache License, Version 2.0 (the "License");
3
+ """
4
+ MiniMax VL family HuggingFace-compatible Processor, ImageProcessor, VideoProcessor.
5
+ """
6
+ import math
7
+ from typing import List, Tuple
8
+
9
+ import torch
10
+ from torchvision.transforms import InterpolationMode
11
+ from transformers import BatchFeature
12
+ from transformers.image_processing_utils_fast import (
13
+ BaseImageProcessorFast,
14
+ group_images_by_shape,
15
+ reorder_images,
16
+ )
17
+ from transformers.image_utils import PILImageResampling, SizeDict
18
+ from transformers.processing_utils import (
19
+ ImagesKwargs,
20
+ Unpack,
21
+ )
22
+ from transformers.utils import TensorType
23
+
24
+ MAX_RATIO = 200
25
+
26
+
27
+ def round_by_factor(number: int, factor: int) -> int:
28
+ return round(number / factor) * factor
29
+
30
+
31
+ def ceil_by_factor(number: int, factor: int) -> int:
32
+ return math.ceil(number / factor) * factor
33
+
34
+
35
+ def floor_by_factor(number: int, factor: int) -> int:
36
+ return math.floor(number / factor) * factor
37
+
38
+
39
+ def smart_resize(
40
+ height: int,
41
+ width: int,
42
+ factor: int = 28,
43
+ min_pixels: int = 4 * 28 * 28,
44
+ max_pixels: int = 451584,
45
+ ) -> tuple[int, int]:
46
+ if max(height, width) / min(height, width) > MAX_RATIO:
47
+ raise ValueError(
48
+ f"absolute aspect ratio must be smaller than {MAX_RATIO}, "
49
+ f"got {max(height, width) / min(height, width)}"
50
+ )
51
+ h_bar = max(factor, round_by_factor(height, factor))
52
+ w_bar = max(factor, round_by_factor(width, factor))
53
+ if h_bar * w_bar > max_pixels:
54
+ beta = math.sqrt((height * width) / max_pixels)
55
+ h_bar = floor_by_factor(height / beta, factor)
56
+ w_bar = floor_by_factor(width / beta, factor)
57
+ elif h_bar * w_bar < min_pixels:
58
+ beta = math.sqrt(min_pixels / (height * width))
59
+ h_bar = ceil_by_factor(height * beta, factor)
60
+ w_bar = ceil_by_factor(width * beta, factor)
61
+ return h_bar, w_bar
62
+
63
+
64
+ # ==============================================================================
65
+ # MiniMax M3 VL Image Processor Fast (Fast Mode - Torch based)
66
+ # ==============================================================================
67
+
68
+
69
+ class MiniMaxM3VLImageProcessorKwargs(ImagesKwargs, total=False):
70
+ patch_size: int
71
+ temporal_patch_size: int
72
+ merge_size: int
73
+ max_pixels: int
74
+
75
+
76
+ class MiniMaxM3VLImageProcessor(BaseImageProcessorFast):
77
+ do_resize = True
78
+ resample = PILImageResampling.BICUBIC
79
+ size = {"height": 672, "width": 672} # required by base class validation, not used as resize bound
80
+ default_to_square = False
81
+ do_rescale = True
82
+ rescale_factor = 1 / 255
83
+ do_normalize = True
84
+ image_mean = [0.48145466, 0.4578275, 0.40821073]
85
+ image_std = [0.26862954, 0.26130258, 0.27577711]
86
+ do_convert_rgb = True
87
+ patch_size = 14
88
+ temporal_patch_size = 2
89
+ merge_size = 2
90
+ max_pixels = 451584 # 672*672
91
+ valid_kwargs = MiniMaxM3VLImageProcessorKwargs
92
+ model_input_names = ["pixel_values", "image_grid_thw"]
93
+
94
+ def __init__(self, **kwargs: Unpack[MiniMaxM3VLImageProcessorKwargs]):
95
+ super().__init__(**kwargs)
96
+
97
+ def preprocess(
98
+ self, images, **kwargs: Unpack[MiniMaxM3VLImageProcessorKwargs]
99
+ ) -> BatchFeature:
100
+ return super().preprocess(images, **kwargs)
101
+
102
+ def _preprocess(
103
+ self,
104
+ images: List[torch.Tensor],
105
+ do_resize: bool,
106
+ size: SizeDict,
107
+ resample: PILImageResampling | InterpolationMode | int | None,
108
+ do_rescale: bool,
109
+ rescale_factor: float,
110
+ do_normalize: bool,
111
+ image_mean: float | List[float] | None,
112
+ image_std: float | List[float] | None,
113
+ patch_size: int,
114
+ temporal_patch_size: int,
115
+ merge_size: int,
116
+ max_pixels: int,
117
+ disable_grouping: bool | None,
118
+ return_tensors: str | TensorType | None,
119
+ **kwargs,
120
+ ) -> BatchFeature:
121
+ grouped_images, grouped_images_index = group_images_by_shape(
122
+ images, disable_grouping=disable_grouping
123
+ )
124
+ resized_images_grouped = {}
125
+ factor = patch_size * merge_size
126
+ for shape, stacked_images in grouped_images.items():
127
+ height, width = stacked_images.shape[-2:]
128
+ if do_resize:
129
+ resized_height, resized_width = smart_resize(
130
+ height, width, factor=factor,
131
+ max_pixels=max_pixels,
132
+ )
133
+ stacked_images = self.resize(
134
+ stacked_images,
135
+ size=SizeDict(height=resized_height, width=resized_width),
136
+ resample=resample,
137
+ )
138
+ resized_images_grouped[shape] = stacked_images
139
+
140
+ resized_images = reorder_images(resized_images_grouped, grouped_images_index)
141
+
142
+ grouped_images, grouped_images_index = group_images_by_shape(
143
+ resized_images, disable_grouping=disable_grouping
144
+ )
145
+ processed_images_grouped = {}
146
+ processed_grids = {}
147
+
148
+ for shape, stacked_images in grouped_images.items():
149
+ resized_height, resized_width = stacked_images.shape[-2:]
150
+
151
+ patches = self.rescale_and_normalize(
152
+ stacked_images,
153
+ do_rescale,
154
+ rescale_factor,
155
+ do_normalize,
156
+ image_mean,
157
+ image_std,
158
+ )
159
+ if patches.ndim == 4:
160
+ patches = patches.unsqueeze(1)
161
+
162
+ if patches.shape[1] % temporal_patch_size != 0:
163
+ repeats = patches[:, -1:].repeat(
164
+ 1,
165
+ temporal_patch_size - (patches.shape[1] % temporal_patch_size),
166
+ 1,
167
+ 1,
168
+ 1,
169
+ )
170
+ patches = torch.cat([patches, repeats], dim=1)
171
+
172
+ batch_size, grid_t, channel = patches.shape[:3]
173
+ grid_t = grid_t // temporal_patch_size
174
+ grid_h, grid_w = resized_height // patch_size, resized_width // patch_size
175
+
176
+ patches = patches.view(
177
+ batch_size,
178
+ grid_t,
179
+ temporal_patch_size,
180
+ channel,
181
+ grid_h // merge_size,
182
+ merge_size,
183
+ patch_size,
184
+ grid_w // merge_size,
185
+ merge_size,
186
+ patch_size,
187
+ )
188
+ patches = patches.permute(0, 1, 4, 7, 5, 8, 3, 2, 6, 9)
189
+
190
+ flatten_patches = patches.reshape(
191
+ batch_size,
192
+ grid_t * grid_h * grid_w,
193
+ channel * temporal_patch_size * patch_size * patch_size,
194
+ )
195
+
196
+ processed_images_grouped[shape] = flatten_patches
197
+ processed_grids[shape] = [[grid_t, grid_h, grid_w]] * batch_size
198
+
199
+ processed_images = reorder_images(
200
+ processed_images_grouped, grouped_images_index
201
+ )
202
+ processed_grids = reorder_images(processed_grids, grouped_images_index)
203
+
204
+ pixel_values = torch.cat(processed_images, dim=0)
205
+ image_grid_thw = torch.tensor(processed_grids, dtype=torch.long)
206
+
207
+ return BatchFeature(
208
+ data={"pixel_values": pixel_values, "image_grid_thw": image_grid_thw},
209
+ tensor_type=return_tensors,
210
+ )
211
+
212
+ def get_number_of_image_patches(self, height: int, width: int, images_kwargs=None):
213
+ images_kwargs = images_kwargs or {}
214
+ patch_size = images_kwargs.get("patch_size", self.patch_size)
215
+ merge_size = images_kwargs.get("merge_size", self.merge_size)
216
+ max_pixels = images_kwargs.get("max_pixels", self.max_pixels)
217
+
218
+ resized_height, resized_width = smart_resize(
219
+ height, width, factor=patch_size * merge_size,
220
+ max_pixels=max_pixels,
221
+ )
222
+ grid_h, grid_w = resized_height // patch_size, resized_width // patch_size
223
+ return grid_h * grid_w
model-00001-of-00058.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85d6eae8bc2c8bed69f6aa550e6f3eb086ac9ba24883736fbc90cdfda257a332
3
+ size 5297839185
model-00002-of-00058.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b1413a8e197ad93d03634234217bfb6c7ae156580489f32eac6348bb2caad11
3
+ size 4172005217
model-00003-of-00058.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8c485b0e2671a26779208fdf6d90fa37212ac8ae4376e668fed95a295e76943
3
+ size 4172005211
model-00004-of-00058.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21fa43d181dd80ee1809363e69337765314b14ef9c59c77a062e6e245a95aee0
3
+ size 4172005149
model-00005-of-00058.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57333d815a49a81c99dc002197d681ab7b656793186f256b1c47ed902fe3906e
3
+ size 4172005217
model-00006-of-00058.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e056500ef74711ef2fbb7c7dbfdd542c331ed72e011eb83880797754711003e8
3
+ size 4172005213
model-00007-of-00058.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0163719e881fa75636d7eff282c7ff5f41327d35e370263f9004fc64ec80f13b
3
+ size 4172005217
model-00008-of-00058.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5cea2893fd833a6cee2d2ca01cf7ad3388f797de369a5d97847efd801234c0f
3
+ size 4172005242
model-00009-of-00058.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e39a0d6967a6f7b69301dae33b9f77482d8891388ec43b061cdef5bebd51ee8
3
+ size 4172005263
model-00010-of-00058.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3eaca5ba7116485855c7909ab6cc1701bec3d72124f66057e996765b2d6d57ad
3
+ size 4172005247
model-00011-of-00058.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da5ec603c3c2e5eead90fb58959493f10cfd49c88b89a9e284bf519ff4b63f30
3
+ size 4172005259
model-00012-of-00058.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ee0af22322bc69d858f394dd954b44a94e5179c6dea857b75581c3abbc6b9c1
3
+ size 4172005245
model-00013-of-00058.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9084506c8375bad74bd66ebbe9973b45dcf5dfac8c1e904ad22629368c0994e5
3
+ size 4172005203
model-00014-of-00058.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82b6ba4ddcfaf554f4f75c9a4e54a54c6b6f3257c90640adf9361934bd5b59d8
3
+ size 4172005257
model-00015-of-00058.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d04969a85a1965733db5fb7e4f63b8edbd217051a127199b0b2542a99a6b3f1
3
+ size 4172005263
model-00016-of-00058.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:775fef001d59acf33c1a0b36686ea10fadbbc23e5a72b270d16679ddf32951c1
3
+ size 4172005259
model-00017-of-00058.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d64b69dabb551ec39682449a8f9210df3186169f5a4fb89ea7e319a49a970d8
3
+ size 4172005247
model-00018-of-00058.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c5288b060e4f447670f20ceaee19edacf38d2f5cce8e3ddffae1478c99555c7
3
+ size 4172005259
model-00019-of-00058.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9cace5efab8c76e3a467f955c5c36ba8d074f8be838a33b5884ab49c708cb23b
3
+ size 4172005259
model-00020-of-00058.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e9152eb6d04209a6f54b459900e017db2cb097c955a19288a5ef29e78c3d8cf
3
+ size 4172005255
model-00021-of-00058.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a0814b6c766e76b1be3627dbc61e7303250ac17e8048be87b9eec30669b6195
3
+ size 4172005257
model-00022-of-00058.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d12bebd43618ebea8f802e9100c3f29a195ee9f976d8752ca25f3bbdbff4286
3
+ size 4172005259
model-00023-of-00058.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0af07d8bc4580a0ed6e9485118ef14d9f9575317d057590922e5b1d14273f01c
3
+ size 4172005255
model-00024-of-00058.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:adcb39b5dfd8667f093b7231fee589372abaacc95cb478fa183c04eedf9c6f0a
3
+ size 4172005259
model-00025-of-00058.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f4281c0647ef7e77070153692d6f10520efe6280ab918baa80670ea982c5a5b
3
+ size 4172005225
model-00026-of-00058.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4765bedb19193134d8d9518f31118281ab32b3f6846905a7091f861e514ba74
3
+ size 4172005257
model-00027-of-00058.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4501c6bc2ffc694ae8c103fea6ef36a87642411323551c6f4eb22d7455c9d25
3
+ size 4172005263
model-00028-of-00058.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f0271e6723537b2be83c2c9bf9fe66cdd9216dab35808feaf7b983fa7199306
3
+ size 4172005223
model-00029-of-00058.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56dc1666f2ac875d63937d0a6efc5d4c49c1d4393ee828993f01ea66ac4b16cb
3
+ size 4172005257
model-00030-of-00058.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f98e7b3f0f5bd7e6216ba2e842dfd0900c8d365f32e6820b42f494dabb4b62b2
3
+ size 4172005235
model-00031-of-00058.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fda7f306868f825a06d6d4c3ba79d5592fa0dd3a66fed5a3c01b90aec299cca4
3
+ size 4172005263
model-00032-of-00058.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1546fda15dcb7724be074d3ba9172b5caccf50737f3544cccdb66494402d74d5
3
+ size 4172005259
model-00033-of-00058.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99ec5110da1fec971c1569819d923abb375ac9c6bcb4e976e125c7536a6b15cb
3
+ size 4172005257
model-00034-of-00058.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:346a858960a809be48d0e5571c24a0525d93a75f032c3802085c50a6ed69d3e1
3
+ size 4172005201
model-00035-of-00058.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d2fa0ff96e104bb3d36823a85087fdfe9d6562b58d649b2be83bf349cb07c89
3
+ size 4172005259
model-00036-of-00058.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c618dc1d0261fc2f8fa027902294f763efba8a59a75097fdceeee137ec311e0b
3
+ size 4172005257
model-00037-of-00058.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1702f4d9a89cb81be07ece1ff9787e4caecf1cb76dfaab5f4925c0bec576d7cf
3
+ size 4172005263
model-00038-of-00058.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:667fae9fbd06e3adfd8cc658b4356fcb0dbc92dbf17e377083f32055d401c1dc
3
+ size 4172005173
model-00039-of-00058.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11eaf7e8fc0121384727d9a1c312a600df532a6a9a360f1cc96c0d12c874cbfc
3
+ size 4172005263
model-00040-of-00058.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:680a2bd9abfa32b84a1fcb5bf7aa8d182cbe25fc98e1ba4d4512aaed56a2c898
3
+ size 4172005167
model-00041-of-00058.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a537b70147547604918d429726e480bc7bcc9ee5fc9f308fcc41d209b43c4fc8
3
+ size 4172005259
model-00042-of-00058.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7c877fb402497f234d399b0d3defd4e4fef6988a9a3ecbb016f7d128c3e6c5a
3
+ size 4172005257