Upload
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +1 -0
- README.md +6 -4
- chat_template.jinja +143 -0
- config.json +162 -0
- configuration_mimo_v2_flash.py +109 -0
- model-00001-of-00072.safetensors +3 -0
- model-00002-of-00072.safetensors +3 -0
- model-00003-of-00072.safetensors +3 -0
- model-00004-of-00072.safetensors +3 -0
- model-00005-of-00072.safetensors +3 -0
- model-00006-of-00072.safetensors +3 -0
- model-00007-of-00072.safetensors +3 -0
- model-00008-of-00072.safetensors +3 -0
- model-00009-of-00072.safetensors +3 -0
- model-00010-of-00072.safetensors +3 -0
- model-00011-of-00072.safetensors +3 -0
- model-00012-of-00072.safetensors +3 -0
- model-00013-of-00072.safetensors +3 -0
- model-00014-of-00072.safetensors +3 -0
- model-00015-of-00072.safetensors +3 -0
- model-00016-of-00072.safetensors +3 -0
- model-00017-of-00072.safetensors +3 -0
- model-00018-of-00072.safetensors +3 -0
- model-00019-of-00072.safetensors +3 -0
- model-00020-of-00072.safetensors +3 -0
- model-00021-of-00072.safetensors +3 -0
- model-00022-of-00072.safetensors +3 -0
- model-00023-of-00072.safetensors +3 -0
- model-00024-of-00072.safetensors +3 -0
- model-00025-of-00072.safetensors +3 -0
- model-00026-of-00072.safetensors +3 -0
- model-00027-of-00072.safetensors +3 -0
- model-00028-of-00072.safetensors +3 -0
- model-00029-of-00072.safetensors +3 -0
- model-00030-of-00072.safetensors +3 -0
- model-00031-of-00072.safetensors +3 -0
- model-00032-of-00072.safetensors +3 -0
- model-00033-of-00072.safetensors +3 -0
- model-00034-of-00072.safetensors +3 -0
- model-00035-of-00072.safetensors +3 -0
- model-00036-of-00072.safetensors +3 -0
- model-00037-of-00072.safetensors +3 -0
- model-00038-of-00072.safetensors +3 -0
- model-00039-of-00072.safetensors +3 -0
- model-00040-of-00072.safetensors +3 -0
- model-00041-of-00072.safetensors +3 -0
- model-00042-of-00072.safetensors +3 -0
- model-00043-of-00072.safetensors +3 -0
- model-00044-of-00072.safetensors +3 -0
- model-00045-of-00072.safetensors +3 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
README.md
CHANGED
|
@@ -1,5 +1,7 @@
|
|
| 1 |
---
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
language: en
|
| 3 |
+
library_name: mlx
|
| 4 |
+
pipeline_tag: text-generation
|
| 5 |
+
tags:
|
| 6 |
+
- mlx
|
| 7 |
+
---
|
chat_template.jinja
ADDED
|
@@ -0,0 +1,143 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{%- if not add_generation_prompt is defined -%}
|
| 2 |
+
{%- set add_generation_prompt = false -%}
|
| 3 |
+
{%- endif -%}
|
| 4 |
+
{%- if not enable_thinking is defined -%}
|
| 5 |
+
{%- set enable_thinking = false -%}
|
| 6 |
+
{%- endif -%}
|
| 7 |
+
{%- if not keep_all_reasoning is defined -%}
|
| 8 |
+
{%- set keep_all_reasoning = false -%}
|
| 9 |
+
{%- endif -%}
|
| 10 |
+
{%- macro render_extra_keys(json_dict, handled_keys) -%}
|
| 11 |
+
{%- if json_dict is mapping %}
|
| 12 |
+
{%- for json_key in json_dict if json_key not in handled_keys %}
|
| 13 |
+
{%- if json_dict[json_key] is mapping or (json_dict[json_key] is sequence and json_dict[json_key] is not string) %}
|
| 14 |
+
{{- '\n<' ~ json_key ~ '>' ~ (json_dict[json_key] | tojson | safe) ~ '</' ~ json_key ~ '>' }}
|
| 15 |
+
{%- else %}
|
| 16 |
+
{{-'\n<' ~ json_key ~ '>' ~ (json_dict[json_key] | string) ~ '</' ~ json_key ~ '>' }}
|
| 17 |
+
{%- endif %}
|
| 18 |
+
{%- endfor %}
|
| 19 |
+
{%- endif %}
|
| 20 |
+
{%- endmacro -%}
|
| 21 |
+
{%- if messages[0]["role"] == "system" %}
|
| 22 |
+
{%- set system_message = messages[0]["content"] %}
|
| 23 |
+
{%- set loop_messages = messages[1:] %}
|
| 24 |
+
{%- else %}
|
| 25 |
+
{%- set loop_messages = messages %}
|
| 26 |
+
{%- endif %}
|
| 27 |
+
{%- set ns = namespace(last_user_index=-1) %}
|
| 28 |
+
{%- for m in loop_messages %}
|
| 29 |
+
{%- if m.role == 'user' %}
|
| 30 |
+
{%- set ns.last_user_index = loop.index0 -%}
|
| 31 |
+
{%- endif %}
|
| 32 |
+
{%- endfor %}
|
| 33 |
+
{%- if not tools is defined %}
|
| 34 |
+
{%- set tools = [] %}
|
| 35 |
+
{%- endif %}
|
| 36 |
+
{%- if system_message is defined %}
|
| 37 |
+
{{- "<|im_start|>system\n" + system_message }}
|
| 38 |
+
{%- else %}
|
| 39 |
+
{{- "<|im_start|>system\nYou are MiMo, a helpful AI assistant engineered by Xiaomi." }}
|
| 40 |
+
{%- endif %}
|
| 41 |
+
{%- if tools is iterable and tools | length > 0 %}
|
| 42 |
+
{{- "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou have access to the following functions:\n\n" }}
|
| 43 |
+
{{- "<tools>" }}
|
| 44 |
+
{%- for tool in tools %}
|
| 45 |
+
{%- if tool.function is defined %}
|
| 46 |
+
{%- set tool = tool.function %}
|
| 47 |
+
{%- endif %}
|
| 48 |
+
{{- "\n<function>\n<name>" ~ tool.name ~ "</name>" }}
|
| 49 |
+
{%- if tool.description is defined %}
|
| 50 |
+
{{- '\n<description>' ~ (tool.description | trim) ~ '</description>' }}
|
| 51 |
+
{%- endif %}
|
| 52 |
+
{{- '\n<parameters>' }}
|
| 53 |
+
{%- if tool.parameters is defined and tool.parameters is mapping and tool.parameters.properties is defined and tool.parameters.properties is mapping %}
|
| 54 |
+
{%- for param_name, param_fields in tool.parameters.properties|items %}
|
| 55 |
+
{{- '\n<parameter>' }}
|
| 56 |
+
{{- '\n<name>' ~ param_name ~ '</name>' }}
|
| 57 |
+
{%- if param_fields.type is defined %}
|
| 58 |
+
{{- '\n<type>' ~ (param_fields.type | string) ~ '</type>' }}
|
| 59 |
+
{%- endif %}
|
| 60 |
+
{%- if param_fields.description is defined %}
|
| 61 |
+
{{- '\n<description>' ~ (param_fields.description | trim) ~ '</description>' }}
|
| 62 |
+
{%- endif %}
|
| 63 |
+
{%- set handled_keys = ['name', 'type', 'description'] %}
|
| 64 |
+
{{- render_extra_keys(param_fields, handled_keys) }}
|
| 65 |
+
{{- '\n</parameter>' }}
|
| 66 |
+
{%- endfor %}
|
| 67 |
+
{%- endif %}
|
| 68 |
+
{%- set handled_keys = ['type', 'properties'] %}
|
| 69 |
+
{{- render_extra_keys(tool.parameters, handled_keys) }}
|
| 70 |
+
{{- '\n</parameters>' }}
|
| 71 |
+
{%- set handled_keys = ['type', 'name', 'description', 'parameters'] %}
|
| 72 |
+
{{- render_extra_keys(tool, handled_keys) }}
|
| 73 |
+
{{- '\n</function>' }}
|
| 74 |
+
{%- endfor %}
|
| 75 |
+
{{- "\n</tools>" }}
|
| 76 |
+
{{- '\n\nFor each function call, output the function name and arguments in the following format:\n<tool_call>\n<function=example_function_name>\n<parameter=example_parameter_1>value_1</parameter>\n<parameter=example_parameter_2>This is the value for the second parameter\nthat can span\nmultiple lines</parameter>\n</function>\n</tool_call>\n\n<IMPORTANT>\n- Function calls MUST follow the specified format: an inner <function=...></function> block must be nested within <tool_call></tool_call> XML tags\n- DO NOT use function calls inside <think></think> tags.\n- The value enclosed between parameter tags is preserved exactly as-is, including newlines and spaces.\n</IMPORTANT>' }}
|
| 77 |
+
{%- endif %}
|
| 78 |
+
{{- '<|im_end|>' }}
|
| 79 |
+
{%- for message in loop_messages %}
|
| 80 |
+
{%- if message.content is string %}
|
| 81 |
+
{%- set content = message.content %}
|
| 82 |
+
{%- else %}
|
| 83 |
+
{%- set content = '' %}
|
| 84 |
+
{%- endif %}
|
| 85 |
+
{%- if message.role == "assistant" %}
|
| 86 |
+
{%- if message.reasoning_content is string %}
|
| 87 |
+
{%- set reasoning_content = message.reasoning_content %}
|
| 88 |
+
{%- else %}
|
| 89 |
+
{%- set reasoning_content = '' %}
|
| 90 |
+
{%- if '</think>' in content %}
|
| 91 |
+
{%- set reasoning_content = content.split('</think>')[0].split('<think>')[-1] %}
|
| 92 |
+
{%- set content = content.split('</think>')[-1] %}
|
| 93 |
+
{%- endif %}
|
| 94 |
+
{%- endif %}
|
| 95 |
+
{%- if (keep_all_reasoning or loop.index0 > ns.last_user_index) and reasoning_content -%}
|
| 96 |
+
{{- '<|im_start|>' + message.role + '\n<think>' + reasoning_content + '</think>' + content }}
|
| 97 |
+
{%- else %}
|
| 98 |
+
{{- '<|im_start|>' + message.role + '\n<think></think>' + content }}
|
| 99 |
+
{%- endif %}
|
| 100 |
+
{%- if message.tool_calls is defined and message.tool_calls is iterable and message.tool_calls | length > 0 %}
|
| 101 |
+
{%- for tool_call in message.tool_calls %}
|
| 102 |
+
{%- if tool_call.function is defined %}
|
| 103 |
+
{%- set tool_call = tool_call.function %}
|
| 104 |
+
{%- endif %}
|
| 105 |
+
{{- '<tool_call>\n<function=' + tool_call.name + '>\n' }}
|
| 106 |
+
{%- if tool_call.arguments is defined %}
|
| 107 |
+
{%- for args_name, args_value in tool_call.arguments|items %}
|
| 108 |
+
{{- '<parameter=' + args_name + '>' }}
|
| 109 |
+
{%- set args_value = args_value | tojson | safe if args_value is mapping or (args_value is sequence and args_value is not string) else args_value | string %}
|
| 110 |
+
{{- args_value }}
|
| 111 |
+
{{- '</parameter>\n' }}
|
| 112 |
+
{%- endfor %}
|
| 113 |
+
{%- endif %}
|
| 114 |
+
{{- '</function>\n</tool_call>' }}
|
| 115 |
+
{%- endfor %}
|
| 116 |
+
{%- endif %}
|
| 117 |
+
{{- '<|im_end|>' }}
|
| 118 |
+
{%- elif message.role == "user" or message.role == "system"%}
|
| 119 |
+
{{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' }}
|
| 120 |
+
{%- elif message.role == "tool" %}
|
| 121 |
+
{%- if loop.previtem and loop.previtem.role != "tool" %}
|
| 122 |
+
{{- '<|im_start|>tool\n' }}
|
| 123 |
+
{%- endif %}
|
| 124 |
+
{{- '<tool_response>\n' }}
|
| 125 |
+
{{- message.content }}
|
| 126 |
+
{{- '\n</tool_response>\n' }}
|
| 127 |
+
{%- if not loop.last and loop.nextitem.role != "tool" %}
|
| 128 |
+
{{- '<|im_end|>' }}
|
| 129 |
+
{%- elif loop.last %}
|
| 130 |
+
{{- '<|im_end|>' }}
|
| 131 |
+
{%- endif %}
|
| 132 |
+
{%- else %}
|
| 133 |
+
{{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' }}
|
| 134 |
+
{%- endif %}
|
| 135 |
+
{%- endfor %}
|
| 136 |
+
{%- if add_generation_prompt %}
|
| 137 |
+
{{- '<|im_start|>assistant\n' }}
|
| 138 |
+
{%- if not enable_thinking -%}
|
| 139 |
+
{{- '<think></think>' -}}
|
| 140 |
+
{%- else -%}
|
| 141 |
+
{{- '' -}}
|
| 142 |
+
{%- endif -%}
|
| 143 |
+
{%- endif %}
|
config.json
ADDED
|
@@ -0,0 +1,162 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_full_attention_sink_bias": false,
|
| 3 |
+
"add_swa_attention_sink_bias": true,
|
| 4 |
+
"architectures": [
|
| 5 |
+
"MiMoV2FlashForCausalLM"
|
| 6 |
+
],
|
| 7 |
+
"attention_bias": false,
|
| 8 |
+
"attention_chunk_size": 128,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"attention_value_scale": 0.707,
|
| 11 |
+
"auto_map": {
|
| 12 |
+
"AutoConfig": "configuration_mimo_v2_flash.MiMoV2FlashConfig",
|
| 13 |
+
"AutoModel": "modeling_mimo_v2_flash.MiMoV2FlashModel",
|
| 14 |
+
"AutoModelForCausalLM": "modeling_mimo_v2_flash.MiMoV2FlashForCausalLM"
|
| 15 |
+
},
|
| 16 |
+
"head_dim": 192,
|
| 17 |
+
"hidden_act": "silu",
|
| 18 |
+
"hidden_size": 4096,
|
| 19 |
+
"hybrid_layer_pattern": [
|
| 20 |
+
0,
|
| 21 |
+
1,
|
| 22 |
+
1,
|
| 23 |
+
1,
|
| 24 |
+
1,
|
| 25 |
+
0,
|
| 26 |
+
1,
|
| 27 |
+
1,
|
| 28 |
+
1,
|
| 29 |
+
1,
|
| 30 |
+
1,
|
| 31 |
+
0,
|
| 32 |
+
1,
|
| 33 |
+
1,
|
| 34 |
+
1,
|
| 35 |
+
1,
|
| 36 |
+
1,
|
| 37 |
+
0,
|
| 38 |
+
1,
|
| 39 |
+
1,
|
| 40 |
+
1,
|
| 41 |
+
1,
|
| 42 |
+
1,
|
| 43 |
+
0,
|
| 44 |
+
1,
|
| 45 |
+
1,
|
| 46 |
+
1,
|
| 47 |
+
1,
|
| 48 |
+
1,
|
| 49 |
+
0,
|
| 50 |
+
1,
|
| 51 |
+
1,
|
| 52 |
+
1,
|
| 53 |
+
1,
|
| 54 |
+
1,
|
| 55 |
+
0,
|
| 56 |
+
1,
|
| 57 |
+
1,
|
| 58 |
+
1,
|
| 59 |
+
1,
|
| 60 |
+
1,
|
| 61 |
+
0,
|
| 62 |
+
1,
|
| 63 |
+
1,
|
| 64 |
+
1,
|
| 65 |
+
1,
|
| 66 |
+
1,
|
| 67 |
+
0
|
| 68 |
+
],
|
| 69 |
+
"initializer_range": 0.02,
|
| 70 |
+
"intermediate_size": 16384,
|
| 71 |
+
"layernorm_epsilon": 1e-05,
|
| 72 |
+
"max_position_embeddings": 262144,
|
| 73 |
+
"model_type": "mimo_v2_flash",
|
| 74 |
+
"moe_intermediate_size": 2048,
|
| 75 |
+
"moe_layer_freq": [
|
| 76 |
+
0,
|
| 77 |
+
1,
|
| 78 |
+
1,
|
| 79 |
+
1,
|
| 80 |
+
1,
|
| 81 |
+
1,
|
| 82 |
+
1,
|
| 83 |
+
1,
|
| 84 |
+
1,
|
| 85 |
+
1,
|
| 86 |
+
1,
|
| 87 |
+
1,
|
| 88 |
+
1,
|
| 89 |
+
1,
|
| 90 |
+
1,
|
| 91 |
+
1,
|
| 92 |
+
1,
|
| 93 |
+
1,
|
| 94 |
+
1,
|
| 95 |
+
1,
|
| 96 |
+
1,
|
| 97 |
+
1,
|
| 98 |
+
1,
|
| 99 |
+
1,
|
| 100 |
+
1,
|
| 101 |
+
1,
|
| 102 |
+
1,
|
| 103 |
+
1,
|
| 104 |
+
1,
|
| 105 |
+
1,
|
| 106 |
+
1,
|
| 107 |
+
1,
|
| 108 |
+
1,
|
| 109 |
+
1,
|
| 110 |
+
1,
|
| 111 |
+
1,
|
| 112 |
+
1,
|
| 113 |
+
1,
|
| 114 |
+
1,
|
| 115 |
+
1,
|
| 116 |
+
1,
|
| 117 |
+
1,
|
| 118 |
+
1,
|
| 119 |
+
1,
|
| 120 |
+
1,
|
| 121 |
+
1,
|
| 122 |
+
1,
|
| 123 |
+
1
|
| 124 |
+
],
|
| 125 |
+
"n_group": 1,
|
| 126 |
+
"n_routed_experts": 256,
|
| 127 |
+
"n_shared_experts": null,
|
| 128 |
+
"norm_topk_prob": true,
|
| 129 |
+
"num_attention_heads": 64,
|
| 130 |
+
"num_experts_per_tok": 8,
|
| 131 |
+
"num_hidden_layers": 48,
|
| 132 |
+
"num_key_value_heads": 4,
|
| 133 |
+
"partial_rotary_factor": 0.334,
|
| 134 |
+
"quantization": {
|
| 135 |
+
"group_size": 32,
|
| 136 |
+
"bits": 8,
|
| 137 |
+
"mode": "affine"
|
| 138 |
+
},
|
| 139 |
+
"quantization_config": {
|
| 140 |
+
"group_size": 32,
|
| 141 |
+
"bits": 8,
|
| 142 |
+
"mode": "affine"
|
| 143 |
+
},
|
| 144 |
+
"rope_theta": 5000000,
|
| 145 |
+
"routed_scaling_factor": null,
|
| 146 |
+
"scoring_func": "sigmoid",
|
| 147 |
+
"sliding_window": 128,
|
| 148 |
+
"sliding_window_size": 128,
|
| 149 |
+
"swa_head_dim": 192,
|
| 150 |
+
"swa_num_attention_heads": 64,
|
| 151 |
+
"swa_num_key_value_heads": 8,
|
| 152 |
+
"swa_rope_theta": 10000,
|
| 153 |
+
"swa_v_head_dim": 128,
|
| 154 |
+
"tie_word_embeddings": false,
|
| 155 |
+
"topk_group": 1,
|
| 156 |
+
"topk_method": "noaux_tc",
|
| 157 |
+
"torch_dtype": "bfloat16",
|
| 158 |
+
"transformers_version": "4.40.1",
|
| 159 |
+
"use_cache": true,
|
| 160 |
+
"v_head_dim": 128,
|
| 161 |
+
"vocab_size": 152576
|
| 162 |
+
}
|
configuration_mimo_v2_flash.py
ADDED
|
@@ -0,0 +1,109 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# coding=utf-8
|
| 2 |
+
#
|
| 3 |
+
# Copyright 2025 Xiaomi Corporation.
|
| 4 |
+
# Copyright 2025 The HuggingFace Inc. team.
|
| 5 |
+
#
|
| 6 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 7 |
+
# you may not use this file except in compliance with the License.
|
| 8 |
+
# You may obtain a copy of the License at
|
| 9 |
+
#
|
| 10 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 11 |
+
#
|
| 12 |
+
# Unless required by applicable law or agreed to in writing, software
|
| 13 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 14 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 15 |
+
# See the License for the specific language governing permissions and
|
| 16 |
+
# limitations under the License.
|
| 17 |
+
|
| 18 |
+
from transformers.configuration_utils import PretrainedConfig
|
| 19 |
+
from transformers.modeling_rope_utils import rope_config_validation
|
| 20 |
+
from transformers.utils import logging
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
logger = logging.get_logger(__name__)
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
class MiMoV2FlashConfig(PretrainedConfig):
|
| 27 |
+
|
| 28 |
+
model_type = ""
|
| 29 |
+
keys_to_ignore_at_inference = ["past_key_values"]
|
| 30 |
+
|
| 31 |
+
# Default tensor parallel plan for base model `Hybrid`
|
| 32 |
+
base_model_tp_plan = {
|
| 33 |
+
"layers.*.self_attn.q_proj": "colwise",
|
| 34 |
+
"layers.*.self_attn.k_proj": "colwise",
|
| 35 |
+
"layers.*.self_attn.v_proj": "colwise",
|
| 36 |
+
"layers.*.self_attn.o_proj": "rowwise",
|
| 37 |
+
"layers.*.mlp.gate_proj": "colwise",
|
| 38 |
+
"layers.*.mlp.up_proj": "colwise",
|
| 39 |
+
"layers.*.mlp.down_proj": "rowwise",
|
| 40 |
+
}
|
| 41 |
+
base_model_pp_plan = {
|
| 42 |
+
"embed_tokens": (["input_ids"], ["inputs_embeds"]),
|
| 43 |
+
"layers": (["hidden_states", "attention_mask"], ["hidden_states"]),
|
| 44 |
+
"norm": (["hidden_states"], ["hidden_states"]),
|
| 45 |
+
}
|
| 46 |
+
|
| 47 |
+
attribute_map = {
|
| 48 |
+
"num_local_experts": "n_routed_experts",
|
| 49 |
+
}
|
| 50 |
+
|
| 51 |
+
def __init__(
|
| 52 |
+
self,
|
| 53 |
+
vocab_size=151936,
|
| 54 |
+
hidden_size=4096,
|
| 55 |
+
intermediate_size=22016,
|
| 56 |
+
num_hidden_layers=32,
|
| 57 |
+
num_attention_heads=32,
|
| 58 |
+
num_key_value_heads=32,
|
| 59 |
+
hidden_act="silu",
|
| 60 |
+
max_position_embeddings=32768,
|
| 61 |
+
initializer_range=0.02,
|
| 62 |
+
layernorm_epsilon=1e-6,
|
| 63 |
+
use_cache=True,
|
| 64 |
+
tie_word_embeddings=False,
|
| 65 |
+
rope_theta=10000.0,
|
| 66 |
+
rope_scaling=None,
|
| 67 |
+
attention_dropout=0.0,
|
| 68 |
+
hybrid_block_size=None,
|
| 69 |
+
hybrid_layer_pattern=None,
|
| 70 |
+
partial_rotary_factor=1.0,
|
| 71 |
+
**kwargs,
|
| 72 |
+
):
|
| 73 |
+
self.vocab_size = vocab_size
|
| 74 |
+
self.max_position_embeddings = max_position_embeddings
|
| 75 |
+
self.hidden_size = hidden_size
|
| 76 |
+
self.intermediate_size = intermediate_size
|
| 77 |
+
self.num_hidden_layers = num_hidden_layers
|
| 78 |
+
self.num_attention_heads = num_attention_heads
|
| 79 |
+
|
| 80 |
+
# for backward compatibility
|
| 81 |
+
if num_key_value_heads is None:
|
| 82 |
+
num_key_value_heads = num_attention_heads
|
| 83 |
+
|
| 84 |
+
self.num_key_value_heads = num_key_value_heads
|
| 85 |
+
self.hidden_act = hidden_act
|
| 86 |
+
self.initializer_range = initializer_range
|
| 87 |
+
self.layernorm_epsilon = layernorm_epsilon
|
| 88 |
+
self.use_cache = use_cache
|
| 89 |
+
self.rope_theta = rope_theta
|
| 90 |
+
self.rope_scaling = rope_scaling
|
| 91 |
+
self.attention_dropout = attention_dropout
|
| 92 |
+
|
| 93 |
+
if hybrid_block_size is not None and hybrid_layer_pattern is None:
|
| 94 |
+
hybrid_layer_pattern = [0 if ((i + 1) % hybrid_block_size == 0) else 1 for i in range(num_hidden_layers)]
|
| 95 |
+
self.hybrid_block_size = hybrid_block_size
|
| 96 |
+
self.hybrid_layer_pattern = hybrid_layer_pattern
|
| 97 |
+
|
| 98 |
+
self.partial_rotary_factor = partial_rotary_factor
|
| 99 |
+
|
| 100 |
+
# Validate the correctness of rotary position embeddings parameters
|
| 101 |
+
# BC: if there is a 'type' field, move it to 'rope_type'.
|
| 102 |
+
if self.rope_scaling is not None and "type" in self.rope_scaling:
|
| 103 |
+
self.rope_scaling["rope_type"] = self.rope_scaling["type"]
|
| 104 |
+
rope_config_validation(self)
|
| 105 |
+
|
| 106 |
+
super().__init__(
|
| 107 |
+
tie_word_embeddings=tie_word_embeddings,
|
| 108 |
+
**kwargs,
|
| 109 |
+
)
|
model-00001-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3564f9fb2fa153684f59d170902f870a4398def669ed3b329c84dc7c36acfc59
|
| 3 |
+
size 3551941449
|
model-00002-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c70868b4f68fb7483385bc1172e891420b868f4143be6fcd284023be2df33791
|
| 3 |
+
size 4940123906
|
model-00003-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:51ef8b2a9c3b05549c434d9bfdcdabb3131f01142c225ee456993267ac6d387f
|
| 3 |
+
size 4831839000
|
model-00004-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f3d03e2757f13268285c0723cd617837a7d02dd1b150369c9590a6e23254fd14
|
| 3 |
+
size 4940123964
|
model-00005-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:22d04914ea8fe70e072a2d24c19bd231f6d8694c33c11591928a872732192c75
|
| 3 |
+
size 4940123918
|
model-00006-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0ae2cfa90e1ef163725c757bffedc0ac57f8d3f67adf8db42eb54e0125ad5f6a
|
| 3 |
+
size 4831839000
|
model-00007-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:27651a81025612c476f40d5bbfe0717cf683f94a4150f3c7aa9d281338e2efcd
|
| 3 |
+
size 4934225474
|
model-00008-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:297afe818c9101461e956e41c6ffcea64c6e4c269eb186bd89a1adb6df134e24
|
| 3 |
+
size 4940123922
|
model-00009-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2778d597af0aacb7d7452bc07b1a558cddc728526de67c4aa5333c165a476bbc
|
| 3 |
+
size 4831838996
|
model-00010-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:167546337c5ab9986933f64167ee4dc210a2d76c89bea71b1953d6bfcb93683b
|
| 3 |
+
size 4940123950
|
model-00011-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ae596f3609aac53475670b8e8bdede26fe2cd8e07419d274803838471ae319e9
|
| 3 |
+
size 4940123922
|
model-00012-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8fca6f0353431e1699ddc176393464d6fb3a4e372f04c82adebb6c699447f1f9
|
| 3 |
+
size 4831839000
|
model-00013-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0b02f14977b040f491a622141940e197466a63c58d1270211971374cbbe950aa
|
| 3 |
+
size 4940123982
|
model-00014-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:60bd23bf51198ce852ccd4f9e0faacf5aa6e327a888395b6f35ee09da1c84f1e
|
| 3 |
+
size 4940123921
|
model-00015-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e43e7806b443749a05ab2131006b8fa74afb37a00ddb3a062479e95c282c58d9
|
| 3 |
+
size 4831839006
|
model-00016-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:927bafbce8c7106c322f0f039010ba2e9a79a169ee6390c9edf04de9ea1a05b4
|
| 3 |
+
size 4934225512
|
model-00017-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8549fed8b5ec21b6f2de38c4b9483104086f13d95f8ac2e7d8ac5e72427198eb
|
| 3 |
+
size 4940123949
|
model-00018-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4d5403b03d56858389edb00a7402d65d9c4bbb18bb1db26d83bb01b1a9432cdc
|
| 3 |
+
size 4831839006
|
model-00019-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d69c2e22e17f5af0af37ebccfdf7a064ec1c501c7335868c038686de7f575967
|
| 3 |
+
size 4940123989
|
model-00020-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b41151e4269f6c6abd7810fb4832afb7c31c80f08bc4c1c4fb4e8d11370f986e
|
| 3 |
+
size 4940123939
|
model-00021-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2e4badd5fbeb9d18231a57f5c562f94da456aed4056962c93daaa9a4d232fefb
|
| 3 |
+
size 4831839002
|
model-00022-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c3fc2c4b22396b24beef8e4bad859065aeb423f532a6bdc8c138996ba117b1c6
|
| 3 |
+
size 4940124005
|
model-00023-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c986b086f7ac18916db4c6686292dccc47f57814ef8958d812d2b3762f7a5966
|
| 3 |
+
size 4940123949
|
model-00024-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c5178afc1af5fc10c4da438276c92cb02165c2e1cb268d537ac47a1f4c899e2a
|
| 3 |
+
size 4831839006
|
model-00025-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c052c3d7e91a91f0b2dab2bc4cef8b40c8864279efdd96b2ef4091c56a6e4a4c
|
| 3 |
+
size 4934225508
|
model-00026-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cb1f2063520ab25f301a6e5f878478775b2af879e1bcb926955350fedd84f02e
|
| 3 |
+
size 4940123957
|
model-00027-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:304b3010a5577bea4407a50159ff91ffc80de7b4ae6f189f7104b0490887945a
|
| 3 |
+
size 4831839008
|
model-00028-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:80b3efd35db14d53020a41e9534a8216c2c8259186e021c3e7cd6796b9287a9b
|
| 3 |
+
size 4940124003
|
model-00029-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bab7d8b396c84f586c5d38f7d9be903ff80f3f392bafcd7ee99b214cf455397d
|
| 3 |
+
size 4940123947
|
model-00030-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e25e2c2041443a60156b160c75670bfabb982e7514f6a45679539342647ef02b
|
| 3 |
+
size 4831839002
|
model-00031-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6121c44ab3509c7787f33601ddef916c217d6fedd238b7c13f6d8c3f96094954
|
| 3 |
+
size 4940124003
|
model-00032-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8c79b19ad7c4a8d4f136add9fdb1fa2a3e4b56ff2a822f2520bb8ba35f41638f
|
| 3 |
+
size 4940123935
|
model-00033-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:65e739e529e6e3924a4084df384f168b7491640e9615f6e72c707d41558426f0
|
| 3 |
+
size 4831839004
|
model-00034-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:912778eba874a4f2ce33a320a3f224d7c36bd907f7440bb9d51f5e8002e71255
|
| 3 |
+
size 4934225514
|
model-00035-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aa3a6d658e6ae5d863f7331a115af2ff9a4a8263d7efb1675ac245407929d710
|
| 3 |
+
size 4940123937
|
model-00036-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c0a4f7214ae8bea3dd6af704268aa82fab93dd57ffe990e1d86340dae162962a
|
| 3 |
+
size 4831839000
|
model-00037-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7729a68882abf56b6d9ec561f8b32bb6ac713f4f3f63142a0a4e08554cc2f965
|
| 3 |
+
size 4940124001
|
model-00038-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:05f8f17a0c58090dda6d28028969a2a22abab57cfd9821aafabeb4d07035355e
|
| 3 |
+
size 4940123941
|
model-00039-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4d9cc35fd4ec45e2e6fb6d2b78d4646b653323e49ecda4cb375ac7d0691213ef
|
| 3 |
+
size 4831839006
|
model-00040-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a3e30e6a71b5abeff02db6b924685fb0e7d777c4cacc0d53994aa0f9735936ea
|
| 3 |
+
size 4940124003
|
model-00041-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:226592038f40014d8f58d3738315f7f0dfd4381ffa36c2b31307161ce1eea94a
|
| 3 |
+
size 4940123941
|
model-00042-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:53c3adf5c3dbee1bd61e31a7d61c593be2ab8290c387d7dd73548db4687804d0
|
| 3 |
+
size 4831839002
|
model-00043-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aed27333b8c5713e963d1f25d2f0c0c5894de02737d000bcabec21875744594f
|
| 3 |
+
size 4934225510
|
model-00044-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:24a262a7a7e2f7316ff73d94e520ed7c67c597c031842e546a26b952501e629c
|
| 3 |
+
size 4940123947
|
model-00045-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:74cfc7925b332a5d57e92805b8263d7211735c25e3f21dd19c5072602bb9f12c
|
| 3 |
+
size 4831839006
|