Upload
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +1 -0
- README.md +6 -4
- chat_template.jinja +143 -0
- config.json +162 -0
- configuration_mimo_v2_flash.py +109 -0
- model-00001-of-00071.safetensors +3 -0
- model-00002-of-00071.safetensors +3 -0
- model-00003-of-00071.safetensors +3 -0
- model-00004-of-00071.safetensors +3 -0
- model-00005-of-00071.safetensors +3 -0
- model-00006-of-00071.safetensors +3 -0
- model-00007-of-00071.safetensors +3 -0
- model-00008-of-00071.safetensors +3 -0
- model-00009-of-00071.safetensors +3 -0
- model-00010-of-00071.safetensors +3 -0
- model-00011-of-00071.safetensors +3 -0
- model-00012-of-00071.safetensors +3 -0
- model-00013-of-00071.safetensors +3 -0
- model-00014-of-00071.safetensors +3 -0
- model-00015-of-00071.safetensors +3 -0
- model-00016-of-00071.safetensors +3 -0
- model-00017-of-00071.safetensors +3 -0
- model-00018-of-00071.safetensors +3 -0
- model-00019-of-00071.safetensors +3 -0
- model-00020-of-00071.safetensors +3 -0
- model-00021-of-00071.safetensors +3 -0
- model-00022-of-00071.safetensors +3 -0
- model-00023-of-00071.safetensors +3 -0
- model-00024-of-00071.safetensors +3 -0
- model-00025-of-00071.safetensors +3 -0
- model-00026-of-00071.safetensors +3 -0
- model-00027-of-00071.safetensors +3 -0
- model-00028-of-00071.safetensors +3 -0
- model-00029-of-00071.safetensors +3 -0
- model-00030-of-00071.safetensors +3 -0
- model-00031-of-00071.safetensors +3 -0
- model-00032-of-00071.safetensors +3 -0
- model-00033-of-00071.safetensors +3 -0
- model-00034-of-00071.safetensors +3 -0
- model-00035-of-00071.safetensors +3 -0
- model-00036-of-00071.safetensors +3 -0
- model-00037-of-00071.safetensors +3 -0
- model-00038-of-00071.safetensors +3 -0
- model-00039-of-00071.safetensors +3 -0
- model-00040-of-00071.safetensors +3 -0
- model-00041-of-00071.safetensors +3 -0
- model-00042-of-00071.safetensors +3 -0
- model-00043-of-00071.safetensors +3 -0
- model-00044-of-00071.safetensors +3 -0
- model-00045-of-00071.safetensors +3 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
README.md
CHANGED
|
@@ -1,5 +1,7 @@
|
|
| 1 |
---
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
-
|
| 5 |
-
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
language: en
|
| 3 |
+
tags:
|
| 4 |
+
- mlx
|
| 5 |
+
pipeline_tag: text-generation
|
| 6 |
+
library_name: mlx
|
| 7 |
+
---
|
chat_template.jinja
ADDED
|
@@ -0,0 +1,143 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{%- if not add_generation_prompt is defined -%}
|
| 2 |
+
{%- set add_generation_prompt = false -%}
|
| 3 |
+
{%- endif -%}
|
| 4 |
+
{%- if not enable_thinking is defined -%}
|
| 5 |
+
{%- set enable_thinking = false -%}
|
| 6 |
+
{%- endif -%}
|
| 7 |
+
{%- if not keep_all_reasoning is defined -%}
|
| 8 |
+
{%- set keep_all_reasoning = false -%}
|
| 9 |
+
{%- endif -%}
|
| 10 |
+
{%- macro render_extra_keys(json_dict, handled_keys) -%}
|
| 11 |
+
{%- if json_dict is mapping %}
|
| 12 |
+
{%- for json_key in json_dict if json_key not in handled_keys %}
|
| 13 |
+
{%- if json_dict[json_key] is mapping or (json_dict[json_key] is sequence and json_dict[json_key] is not string) %}
|
| 14 |
+
{{- '\n<' ~ json_key ~ '>' ~ (json_dict[json_key] | tojson | safe) ~ '</' ~ json_key ~ '>' }}
|
| 15 |
+
{%- else %}
|
| 16 |
+
{{-'\n<' ~ json_key ~ '>' ~ (json_dict[json_key] | string) ~ '</' ~ json_key ~ '>' }}
|
| 17 |
+
{%- endif %}
|
| 18 |
+
{%- endfor %}
|
| 19 |
+
{%- endif %}
|
| 20 |
+
{%- endmacro -%}
|
| 21 |
+
{%- if messages[0]["role"] == "system" %}
|
| 22 |
+
{%- set system_message = messages[0]["content"] %}
|
| 23 |
+
{%- set loop_messages = messages[1:] %}
|
| 24 |
+
{%- else %}
|
| 25 |
+
{%- set loop_messages = messages %}
|
| 26 |
+
{%- endif %}
|
| 27 |
+
{%- set ns = namespace(last_user_index=-1) %}
|
| 28 |
+
{%- for m in loop_messages %}
|
| 29 |
+
{%- if m.role == 'user' %}
|
| 30 |
+
{%- set ns.last_user_index = loop.index0 -%}
|
| 31 |
+
{%- endif %}
|
| 32 |
+
{%- endfor %}
|
| 33 |
+
{%- if not tools is defined %}
|
| 34 |
+
{%- set tools = [] %}
|
| 35 |
+
{%- endif %}
|
| 36 |
+
{%- if system_message is defined %}
|
| 37 |
+
{{- "<|im_start|>system\n" + system_message }}
|
| 38 |
+
{%- else %}
|
| 39 |
+
{{- "<|im_start|>system\nYou are MiMo, a helpful AI assistant engineered by Xiaomi." }}
|
| 40 |
+
{%- endif %}
|
| 41 |
+
{%- if tools is iterable and tools | length > 0 %}
|
| 42 |
+
{{- "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou have access to the following functions:\n\n" }}
|
| 43 |
+
{{- "<tools>" }}
|
| 44 |
+
{%- for tool in tools %}
|
| 45 |
+
{%- if tool.function is defined %}
|
| 46 |
+
{%- set tool = tool.function %}
|
| 47 |
+
{%- endif %}
|
| 48 |
+
{{- "\n<function>\n<name>" ~ tool.name ~ "</name>" }}
|
| 49 |
+
{%- if tool.description is defined %}
|
| 50 |
+
{{- '\n<description>' ~ (tool.description | trim) ~ '</description>' }}
|
| 51 |
+
{%- endif %}
|
| 52 |
+
{{- '\n<parameters>' }}
|
| 53 |
+
{%- if tool.parameters is defined and tool.parameters is mapping and tool.parameters.properties is defined and tool.parameters.properties is mapping %}
|
| 54 |
+
{%- for param_name, param_fields in tool.parameters.properties|items %}
|
| 55 |
+
{{- '\n<parameter>' }}
|
| 56 |
+
{{- '\n<name>' ~ param_name ~ '</name>' }}
|
| 57 |
+
{%- if param_fields.type is defined %}
|
| 58 |
+
{{- '\n<type>' ~ (param_fields.type | string) ~ '</type>' }}
|
| 59 |
+
{%- endif %}
|
| 60 |
+
{%- if param_fields.description is defined %}
|
| 61 |
+
{{- '\n<description>' ~ (param_fields.description | trim) ~ '</description>' }}
|
| 62 |
+
{%- endif %}
|
| 63 |
+
{%- set handled_keys = ['name', 'type', 'description'] %}
|
| 64 |
+
{{- render_extra_keys(param_fields, handled_keys) }}
|
| 65 |
+
{{- '\n</parameter>' }}
|
| 66 |
+
{%- endfor %}
|
| 67 |
+
{%- endif %}
|
| 68 |
+
{%- set handled_keys = ['type', 'properties'] %}
|
| 69 |
+
{{- render_extra_keys(tool.parameters, handled_keys) }}
|
| 70 |
+
{{- '\n</parameters>' }}
|
| 71 |
+
{%- set handled_keys = ['type', 'name', 'description', 'parameters'] %}
|
| 72 |
+
{{- render_extra_keys(tool, handled_keys) }}
|
| 73 |
+
{{- '\n</function>' }}
|
| 74 |
+
{%- endfor %}
|
| 75 |
+
{{- "\n</tools>" }}
|
| 76 |
+
{{- '\n\nFor each function call, output the function name and arguments in the following format:\n<tool_call>\n<function=example_function_name>\n<parameter=example_parameter_1>value_1</parameter>\n<parameter=example_parameter_2>This is the value for the second parameter\nthat can span\nmultiple lines</parameter>\n</function>\n</tool_call>\n\n<IMPORTANT>\n- Function calls MUST follow the specified format: an inner <function=...></function> block must be nested within <tool_call></tool_call> XML tags\n- DO NOT use function calls inside <think></think> tags.\n- The value enclosed between parameter tags is preserved exactly as-is, including newlines and spaces.\n</IMPORTANT>' }}
|
| 77 |
+
{%- endif %}
|
| 78 |
+
{{- '<|im_end|>' }}
|
| 79 |
+
{%- for message in loop_messages %}
|
| 80 |
+
{%- if message.content is string %}
|
| 81 |
+
{%- set content = message.content %}
|
| 82 |
+
{%- else %}
|
| 83 |
+
{%- set content = '' %}
|
| 84 |
+
{%- endif %}
|
| 85 |
+
{%- if message.role == "assistant" %}
|
| 86 |
+
{%- if message.reasoning_content is string %}
|
| 87 |
+
{%- set reasoning_content = message.reasoning_content %}
|
| 88 |
+
{%- else %}
|
| 89 |
+
{%- set reasoning_content = '' %}
|
| 90 |
+
{%- if '</think>' in content %}
|
| 91 |
+
{%- set reasoning_content = content.split('</think>')[0].split('<think>')[-1] %}
|
| 92 |
+
{%- set content = content.split('</think>')[-1] %}
|
| 93 |
+
{%- endif %}
|
| 94 |
+
{%- endif %}
|
| 95 |
+
{%- if (keep_all_reasoning or loop.index0 > ns.last_user_index) and reasoning_content -%}
|
| 96 |
+
{{- '<|im_start|>' + message.role + '\n<think>' + reasoning_content + '</think>' + content }}
|
| 97 |
+
{%- else %}
|
| 98 |
+
{{- '<|im_start|>' + message.role + '\n<think></think>' + content }}
|
| 99 |
+
{%- endif %}
|
| 100 |
+
{%- if message.tool_calls is defined and message.tool_calls is iterable and message.tool_calls | length > 0 %}
|
| 101 |
+
{%- for tool_call in message.tool_calls %}
|
| 102 |
+
{%- if tool_call.function is defined %}
|
| 103 |
+
{%- set tool_call = tool_call.function %}
|
| 104 |
+
{%- endif %}
|
| 105 |
+
{{- '<tool_call>\n<function=' + tool_call.name + '>\n' }}
|
| 106 |
+
{%- if tool_call.arguments is defined %}
|
| 107 |
+
{%- for args_name, args_value in tool_call.arguments|items %}
|
| 108 |
+
{{- '<parameter=' + args_name + '>' }}
|
| 109 |
+
{%- set args_value = args_value | tojson | safe if args_value is mapping or (args_value is sequence and args_value is not string) else args_value | string %}
|
| 110 |
+
{{- args_value }}
|
| 111 |
+
{{- '</parameter>\n' }}
|
| 112 |
+
{%- endfor %}
|
| 113 |
+
{%- endif %}
|
| 114 |
+
{{- '</function>\n</tool_call>' }}
|
| 115 |
+
{%- endfor %}
|
| 116 |
+
{%- endif %}
|
| 117 |
+
{{- '<|im_end|>' }}
|
| 118 |
+
{%- elif message.role == "user" or message.role == "system"%}
|
| 119 |
+
{{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' }}
|
| 120 |
+
{%- elif message.role == "tool" %}
|
| 121 |
+
{%- if loop.previtem and loop.previtem.role != "tool" %}
|
| 122 |
+
{{- '<|im_start|>tool\n' }}
|
| 123 |
+
{%- endif %}
|
| 124 |
+
{{- '<tool_response>\n' }}
|
| 125 |
+
{{- message.content }}
|
| 126 |
+
{{- '\n</tool_response>\n' }}
|
| 127 |
+
{%- if not loop.last and loop.nextitem.role != "tool" %}
|
| 128 |
+
{{- '<|im_end|>' }}
|
| 129 |
+
{%- elif loop.last %}
|
| 130 |
+
{{- '<|im_end|>' }}
|
| 131 |
+
{%- endif %}
|
| 132 |
+
{%- else %}
|
| 133 |
+
{{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' }}
|
| 134 |
+
{%- endif %}
|
| 135 |
+
{%- endfor %}
|
| 136 |
+
{%- if add_generation_prompt %}
|
| 137 |
+
{{- '<|im_start|>assistant\n' }}
|
| 138 |
+
{%- if not enable_thinking -%}
|
| 139 |
+
{{- '<think></think>' -}}
|
| 140 |
+
{%- else -%}
|
| 141 |
+
{{- '' -}}
|
| 142 |
+
{%- endif -%}
|
| 143 |
+
{%- endif %}
|
config.json
ADDED
|
@@ -0,0 +1,162 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_full_attention_sink_bias": false,
|
| 3 |
+
"add_swa_attention_sink_bias": true,
|
| 4 |
+
"architectures": [
|
| 5 |
+
"MiMoV2FlashForCausalLM"
|
| 6 |
+
],
|
| 7 |
+
"attention_bias": false,
|
| 8 |
+
"attention_chunk_size": 128,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"attention_value_scale": 0.707,
|
| 11 |
+
"auto_map": {
|
| 12 |
+
"AutoConfig": "configuration_mimo_v2_flash.MiMoV2FlashConfig",
|
| 13 |
+
"AutoModel": "modeling_mimo_v2_flash.MiMoV2FlashModel",
|
| 14 |
+
"AutoModelForCausalLM": "modeling_mimo_v2_flash.MiMoV2FlashForCausalLM"
|
| 15 |
+
},
|
| 16 |
+
"head_dim": 192,
|
| 17 |
+
"hidden_act": "silu",
|
| 18 |
+
"hidden_size": 4096,
|
| 19 |
+
"hybrid_layer_pattern": [
|
| 20 |
+
0,
|
| 21 |
+
1,
|
| 22 |
+
1,
|
| 23 |
+
1,
|
| 24 |
+
1,
|
| 25 |
+
0,
|
| 26 |
+
1,
|
| 27 |
+
1,
|
| 28 |
+
1,
|
| 29 |
+
1,
|
| 30 |
+
1,
|
| 31 |
+
0,
|
| 32 |
+
1,
|
| 33 |
+
1,
|
| 34 |
+
1,
|
| 35 |
+
1,
|
| 36 |
+
1,
|
| 37 |
+
0,
|
| 38 |
+
1,
|
| 39 |
+
1,
|
| 40 |
+
1,
|
| 41 |
+
1,
|
| 42 |
+
1,
|
| 43 |
+
0,
|
| 44 |
+
1,
|
| 45 |
+
1,
|
| 46 |
+
1,
|
| 47 |
+
1,
|
| 48 |
+
1,
|
| 49 |
+
0,
|
| 50 |
+
1,
|
| 51 |
+
1,
|
| 52 |
+
1,
|
| 53 |
+
1,
|
| 54 |
+
1,
|
| 55 |
+
0,
|
| 56 |
+
1,
|
| 57 |
+
1,
|
| 58 |
+
1,
|
| 59 |
+
1,
|
| 60 |
+
1,
|
| 61 |
+
0,
|
| 62 |
+
1,
|
| 63 |
+
1,
|
| 64 |
+
1,
|
| 65 |
+
1,
|
| 66 |
+
1,
|
| 67 |
+
0
|
| 68 |
+
],
|
| 69 |
+
"initializer_range": 0.02,
|
| 70 |
+
"intermediate_size": 16384,
|
| 71 |
+
"layernorm_epsilon": 1e-05,
|
| 72 |
+
"max_position_embeddings": 262144,
|
| 73 |
+
"model_type": "mimo_v2_flash",
|
| 74 |
+
"moe_intermediate_size": 2048,
|
| 75 |
+
"moe_layer_freq": [
|
| 76 |
+
0,
|
| 77 |
+
1,
|
| 78 |
+
1,
|
| 79 |
+
1,
|
| 80 |
+
1,
|
| 81 |
+
1,
|
| 82 |
+
1,
|
| 83 |
+
1,
|
| 84 |
+
1,
|
| 85 |
+
1,
|
| 86 |
+
1,
|
| 87 |
+
1,
|
| 88 |
+
1,
|
| 89 |
+
1,
|
| 90 |
+
1,
|
| 91 |
+
1,
|
| 92 |
+
1,
|
| 93 |
+
1,
|
| 94 |
+
1,
|
| 95 |
+
1,
|
| 96 |
+
1,
|
| 97 |
+
1,
|
| 98 |
+
1,
|
| 99 |
+
1,
|
| 100 |
+
1,
|
| 101 |
+
1,
|
| 102 |
+
1,
|
| 103 |
+
1,
|
| 104 |
+
1,
|
| 105 |
+
1,
|
| 106 |
+
1,
|
| 107 |
+
1,
|
| 108 |
+
1,
|
| 109 |
+
1,
|
| 110 |
+
1,
|
| 111 |
+
1,
|
| 112 |
+
1,
|
| 113 |
+
1,
|
| 114 |
+
1,
|
| 115 |
+
1,
|
| 116 |
+
1,
|
| 117 |
+
1,
|
| 118 |
+
1,
|
| 119 |
+
1,
|
| 120 |
+
1,
|
| 121 |
+
1,
|
| 122 |
+
1,
|
| 123 |
+
1
|
| 124 |
+
],
|
| 125 |
+
"n_group": 1,
|
| 126 |
+
"n_routed_experts": 256,
|
| 127 |
+
"n_shared_experts": null,
|
| 128 |
+
"norm_topk_prob": true,
|
| 129 |
+
"num_attention_heads": 64,
|
| 130 |
+
"num_experts_per_tok": 8,
|
| 131 |
+
"num_hidden_layers": 48,
|
| 132 |
+
"num_key_value_heads": 4,
|
| 133 |
+
"partial_rotary_factor": 0.334,
|
| 134 |
+
"quantization": {
|
| 135 |
+
"group_size": 64,
|
| 136 |
+
"bits": 8,
|
| 137 |
+
"mode": "affine"
|
| 138 |
+
},
|
| 139 |
+
"quantization_config": {
|
| 140 |
+
"group_size": 64,
|
| 141 |
+
"bits": 8,
|
| 142 |
+
"mode": "affine"
|
| 143 |
+
},
|
| 144 |
+
"rope_theta": 5000000,
|
| 145 |
+
"routed_scaling_factor": null,
|
| 146 |
+
"scoring_func": "sigmoid",
|
| 147 |
+
"sliding_window": 128,
|
| 148 |
+
"sliding_window_size": 128,
|
| 149 |
+
"swa_head_dim": 192,
|
| 150 |
+
"swa_num_attention_heads": 64,
|
| 151 |
+
"swa_num_key_value_heads": 8,
|
| 152 |
+
"swa_rope_theta": 10000,
|
| 153 |
+
"swa_v_head_dim": 128,
|
| 154 |
+
"tie_word_embeddings": false,
|
| 155 |
+
"topk_group": 1,
|
| 156 |
+
"topk_method": "noaux_tc",
|
| 157 |
+
"torch_dtype": "bfloat16",
|
| 158 |
+
"transformers_version": "4.40.1",
|
| 159 |
+
"use_cache": true,
|
| 160 |
+
"v_head_dim": 128,
|
| 161 |
+
"vocab_size": 152576
|
| 162 |
+
}
|
configuration_mimo_v2_flash.py
ADDED
|
@@ -0,0 +1,109 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# coding=utf-8
|
| 2 |
+
#
|
| 3 |
+
# Copyright 2025 Xiaomi Corporation.
|
| 4 |
+
# Copyright 2025 The HuggingFace Inc. team.
|
| 5 |
+
#
|
| 6 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 7 |
+
# you may not use this file except in compliance with the License.
|
| 8 |
+
# You may obtain a copy of the License at
|
| 9 |
+
#
|
| 10 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 11 |
+
#
|
| 12 |
+
# Unless required by applicable law or agreed to in writing, software
|
| 13 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 14 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 15 |
+
# See the License for the specific language governing permissions and
|
| 16 |
+
# limitations under the License.
|
| 17 |
+
|
| 18 |
+
from transformers.configuration_utils import PretrainedConfig
|
| 19 |
+
from transformers.modeling_rope_utils import rope_config_validation
|
| 20 |
+
from transformers.utils import logging
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
logger = logging.get_logger(__name__)
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
class MiMoV2FlashConfig(PretrainedConfig):
|
| 27 |
+
|
| 28 |
+
model_type = ""
|
| 29 |
+
keys_to_ignore_at_inference = ["past_key_values"]
|
| 30 |
+
|
| 31 |
+
# Default tensor parallel plan for base model `Hybrid`
|
| 32 |
+
base_model_tp_plan = {
|
| 33 |
+
"layers.*.self_attn.q_proj": "colwise",
|
| 34 |
+
"layers.*.self_attn.k_proj": "colwise",
|
| 35 |
+
"layers.*.self_attn.v_proj": "colwise",
|
| 36 |
+
"layers.*.self_attn.o_proj": "rowwise",
|
| 37 |
+
"layers.*.mlp.gate_proj": "colwise",
|
| 38 |
+
"layers.*.mlp.up_proj": "colwise",
|
| 39 |
+
"layers.*.mlp.down_proj": "rowwise",
|
| 40 |
+
}
|
| 41 |
+
base_model_pp_plan = {
|
| 42 |
+
"embed_tokens": (["input_ids"], ["inputs_embeds"]),
|
| 43 |
+
"layers": (["hidden_states", "attention_mask"], ["hidden_states"]),
|
| 44 |
+
"norm": (["hidden_states"], ["hidden_states"]),
|
| 45 |
+
}
|
| 46 |
+
|
| 47 |
+
attribute_map = {
|
| 48 |
+
"num_local_experts": "n_routed_experts",
|
| 49 |
+
}
|
| 50 |
+
|
| 51 |
+
def __init__(
|
| 52 |
+
self,
|
| 53 |
+
vocab_size=151936,
|
| 54 |
+
hidden_size=4096,
|
| 55 |
+
intermediate_size=22016,
|
| 56 |
+
num_hidden_layers=32,
|
| 57 |
+
num_attention_heads=32,
|
| 58 |
+
num_key_value_heads=32,
|
| 59 |
+
hidden_act="silu",
|
| 60 |
+
max_position_embeddings=32768,
|
| 61 |
+
initializer_range=0.02,
|
| 62 |
+
layernorm_epsilon=1e-6,
|
| 63 |
+
use_cache=True,
|
| 64 |
+
tie_word_embeddings=False,
|
| 65 |
+
rope_theta=10000.0,
|
| 66 |
+
rope_scaling=None,
|
| 67 |
+
attention_dropout=0.0,
|
| 68 |
+
hybrid_block_size=None,
|
| 69 |
+
hybrid_layer_pattern=None,
|
| 70 |
+
partial_rotary_factor=1.0,
|
| 71 |
+
**kwargs,
|
| 72 |
+
):
|
| 73 |
+
self.vocab_size = vocab_size
|
| 74 |
+
self.max_position_embeddings = max_position_embeddings
|
| 75 |
+
self.hidden_size = hidden_size
|
| 76 |
+
self.intermediate_size = intermediate_size
|
| 77 |
+
self.num_hidden_layers = num_hidden_layers
|
| 78 |
+
self.num_attention_heads = num_attention_heads
|
| 79 |
+
|
| 80 |
+
# for backward compatibility
|
| 81 |
+
if num_key_value_heads is None:
|
| 82 |
+
num_key_value_heads = num_attention_heads
|
| 83 |
+
|
| 84 |
+
self.num_key_value_heads = num_key_value_heads
|
| 85 |
+
self.hidden_act = hidden_act
|
| 86 |
+
self.initializer_range = initializer_range
|
| 87 |
+
self.layernorm_epsilon = layernorm_epsilon
|
| 88 |
+
self.use_cache = use_cache
|
| 89 |
+
self.rope_theta = rope_theta
|
| 90 |
+
self.rope_scaling = rope_scaling
|
| 91 |
+
self.attention_dropout = attention_dropout
|
| 92 |
+
|
| 93 |
+
if hybrid_block_size is not None and hybrid_layer_pattern is None:
|
| 94 |
+
hybrid_layer_pattern = [0 if ((i + 1) % hybrid_block_size == 0) else 1 for i in range(num_hidden_layers)]
|
| 95 |
+
self.hybrid_block_size = hybrid_block_size
|
| 96 |
+
self.hybrid_layer_pattern = hybrid_layer_pattern
|
| 97 |
+
|
| 98 |
+
self.partial_rotary_factor = partial_rotary_factor
|
| 99 |
+
|
| 100 |
+
# Validate the correctness of rotary position embeddings parameters
|
| 101 |
+
# BC: if there is a 'type' field, move it to 'rope_type'.
|
| 102 |
+
if self.rope_scaling is not None and "type" in self.rope_scaling:
|
| 103 |
+
self.rope_scaling["rope_type"] = self.rope_scaling["type"]
|
| 104 |
+
rope_config_validation(self)
|
| 105 |
+
|
| 106 |
+
super().__init__(
|
| 107 |
+
tie_word_embeddings=tie_word_embeddings,
|
| 108 |
+
**kwargs,
|
| 109 |
+
)
|
model-00001-of-00071.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1bb7fd5c06b7e1f107acf128cf74c2c98b5570f1f7008dda90caf257521dccae
|
| 3 |
+
size 3354612527
|
model-00002-of-00071.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4fa7b4eb05bfbd2967c1194d5a9b8a7095dacdbe17f5adbddacb4254143d78d0
|
| 3 |
+
size 4665790200
|
model-00003-of-00071.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b65d7aeb850f6c8a0333395febaddf9b3d504cfb59a7572cd0ea999c7efd6e60
|
| 3 |
+
size 4563403538
|
model-00004-of-00071.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:680445d66ecfc0b641e2243ef09cf6b8164f352470a6695abb12dd3c0e1db274
|
| 3 |
+
size 4665790258
|
model-00005-of-00071.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1588f60a7171d23145af2af7c5cc06431ae8115521b0392c70bb4105595d08ef
|
| 3 |
+
size 4665790214
|
model-00006-of-00071.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7e203c3580a25abd1b67f32839030e262b30c3e06a230b7a4532d14ff9f34dc7
|
| 3 |
+
size 4563403538
|
model-00007-of-00071.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9e0a9c2a7b69bed7031202d998968c38b75c7fa75ebfd8af02f95d25a179db49
|
| 3 |
+
size 4660219434
|
model-00008-of-00071.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ae0ce749b6ca1c7b3da3b94105df792c122ad34e7661edb3bc501a45b921af43
|
| 3 |
+
size 4665790218
|
model-00009-of-00071.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:31ae522990dd285ccea63e10e8c489aba1be33d3e1582058bc18926bbfb14047
|
| 3 |
+
size 4563403534
|
model-00010-of-00071.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f65e4a670144b6395ca49fcdc40691f36ef3072fc02b657557a707249df21d33
|
| 3 |
+
size 4665790242
|
model-00011-of-00071.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d511ad2f1463d7b42aca2159f91d76d20cb9e6e27c3a32ca95ead8b46150bcdb
|
| 3 |
+
size 4665790214
|
model-00012-of-00071.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e1c8b8994c371a76e7c8a5f0509f0822aeb5e0aeb51b7dba36764079e2999f3d
|
| 3 |
+
size 4563403538
|
model-00013-of-00071.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9c41574a1adfb130f0b820bca9b17ae289286f98d37dced042986f4a006f7544
|
| 3 |
+
size 4665790278
|
model-00014-of-00071.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:76bb12fd2c24249314985d828ffcabbfeec1f5d828d990dd1bd52a7e6df15aed
|
| 3 |
+
size 4665790215
|
model-00015-of-00071.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f8c74834c4c8696ad07240625c3d1126976218091b6e480b8cedc6dd0782f1f9
|
| 3 |
+
size 4563403544
|
model-00016-of-00071.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:560793a235440990d113351d43ef3ddc46aee9eba869f0aada63acee9b6532e6
|
| 3 |
+
size 4660219486
|
model-00017-of-00071.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3de7d4937528bcf9f850d875a9f5ba2b72276025361cb9fa18e2726be98f85a8
|
| 3 |
+
size 4665790245
|
model-00018-of-00071.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b04899fe5d628af0fb11a0d2a4016f585242f5c3827f3145fbeb68ac20d294b1
|
| 3 |
+
size 4563403544
|
model-00019-of-00071.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:417b6c17b1d4354ff57b140f9dec2547e24783ae9a2f843e33000831b10f2b85
|
| 3 |
+
size 4665790277
|
model-00020-of-00071.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e41608d494035d42c8352c95b28c6b373bfc003bc84784d823fdfd34504d74a2
|
| 3 |
+
size 4665790235
|
model-00021-of-00071.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1b584a5ba56cf748a4ef65bd152453ca4c5e5b5038a226138f464ee5da476156
|
| 3 |
+
size 4563403540
|
model-00022-of-00071.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:88e03f2a83bff63a78db1a1be9ac53d0f102f7453fc820fcef81f324dac6fc26
|
| 3 |
+
size 4665790301
|
model-00023-of-00071.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e7d83c7c8bde832f5b0dc2a4e9919df1cf894ba22b5e2b30fc160cb35ae2b773
|
| 3 |
+
size 4665790245
|
model-00024-of-00071.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bb68ac6bed83189a0828140d83538d51bd00a432dc8b443131f31dc3df0e214a
|
| 3 |
+
size 4563403544
|
model-00025-of-00071.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5e2ed8d67b55cc932d50b7bc8abbe8fe4abc9698880fac0dd4aaf47636565a48
|
| 3 |
+
size 4660219480
|
model-00026-of-00071.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:967d7a215440b937d109907cacf165be4f9066adfa8b30814cd1bbc889f2b1d9
|
| 3 |
+
size 4665790253
|
model-00027-of-00071.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2c95c534e2f67161905c31d6fa874a8141f6dee4e0547e82e21026fa51c860c4
|
| 3 |
+
size 4563403548
|
model-00028-of-00071.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2e0dc81536758f328ed1684041a6bf89fddbf7ff23caf660c307eab23058fcff
|
| 3 |
+
size 4665790297
|
model-00029-of-00071.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e1c0ca11a1cbf6e81fef8b23a65650394bfc0f1ebbe7798e8ef57989235d9520
|
| 3 |
+
size 4665790243
|
model-00030-of-00071.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4fb42852a702315a891e731d0341b0bfe25c9accd1dc6de35261bc4b544f4165
|
| 3 |
+
size 4563403540
|
model-00031-of-00071.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:59f5616157fd4a827724be5e896f3a679a8528a3bdd15dbbca41ec39c776354d
|
| 3 |
+
size 4665790297
|
model-00032-of-00071.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fb0ce6aa499ea51c2d3fd3d0f93a2de2f13f60f3f78ab5d00bff34ca55d655e4
|
| 3 |
+
size 4665790227
|
model-00033-of-00071.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3a761e02adac0f006080631e355c7555050dcb5785c987e5536264904969648c
|
| 3 |
+
size 4563403542
|
model-00034-of-00071.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b5a02ed506734fe44957ce3dde1c3380ffde6b907da2977763b7c1fb86451e01
|
| 3 |
+
size 4660219490
|
model-00035-of-00071.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:562743c69b9e994423f975206674102e2552eab601d6b8836845b98ebd49bb10
|
| 3 |
+
size 4665790227
|
model-00036-of-00071.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1f5e8bea6bcd4f7e3a80cc7f89ee4e3ac557b52038b21a82a7d2683efc64faf5
|
| 3 |
+
size 4563403538
|
model-00037-of-00071.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5d93ba8fa21d29cbca7c9f23b55ee918f04665156a3ae307f673453ceb103a42
|
| 3 |
+
size 4665790295
|
model-00038-of-00071.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ed23e90aacfc6105e43265511afec0ff53bee622058b19a66dd7a07f92cf016a
|
| 3 |
+
size 4665790237
|
model-00039-of-00071.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c0884eac62194fc6c0a4999e70ff80b9a9cf835054eac2ab063cc13cbfd9b0f7
|
| 3 |
+
size 4563403544
|
model-00040-of-00071.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d25fea33679489343feec234e13505e706746db3f8f3b42d6c89cc7e985a525e
|
| 3 |
+
size 4665790297
|
model-00041-of-00071.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:979f3e691c04ba9790a7b09a495710f550e5c5093fa184af17e210cb3c022b26
|
| 3 |
+
size 4665790237
|
model-00042-of-00071.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5199bd040ce697f3c0747e3266c7a8163d0ee4cf25bdb0b94052d8c6f287e0f9
|
| 3 |
+
size 4563403540
|
model-00043-of-00071.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bd34a9626d1606ac67d4b746fdd84c4088e8e8a9edfc95274f59f355c949b901
|
| 3 |
+
size 4660219482
|
model-00044-of-00071.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cdd44b44d0569d00967320f2b94e793d33c26fd88a6201340a69ca92095374b7
|
| 3 |
+
size 4665790243
|
model-00045-of-00071.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:60b781998bb199e2d49bd234a01668d7bb6db752c903e2fc287f633e027d9932
|
| 3 |
+
size 4563403544
|