Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- .gitattributes +1 -0
- .ipynb_checkpoints/model.safetensors.index-checkpoint.json +0 -0
- chat_template.jinja +103 -0
- config.json +89 -0
- generation_config.json +10 -0
- model-00001-of-00072.safetensors +3 -0
- model-00005-of-00072.safetensors +3 -0
- model-00006-of-00072.safetensors +3 -0
- model-00009-of-00072.safetensors +3 -0
- model-00012-of-00072.safetensors +3 -0
- model-00015-of-00072.safetensors +3 -0
- model-00017-of-00072.safetensors +3 -0
- model-00018-of-00072.safetensors +3 -0
- model-00019-of-00072.safetensors +3 -0
- model-00023-of-00072.safetensors +3 -0
- model-00024-of-00072.safetensors +3 -0
- model-00025-of-00072.safetensors +3 -0
- model-00027-of-00072.safetensors +3 -0
- model-00028-of-00072.safetensors +3 -0
- model-00029-of-00072.safetensors +3 -0
- model-00032-of-00072.safetensors +3 -0
- model-00033-of-00072.safetensors +3 -0
- model-00037-of-00072.safetensors +3 -0
- model-00038-of-00072.safetensors +3 -0
- model-00039-of-00072.safetensors +3 -0
- model-00040-of-00072.safetensors +3 -0
- model-00041-of-00072.safetensors +3 -0
- model-00042-of-00072.safetensors +3 -0
- model-00043-of-00072.safetensors +3 -0
- model-00046-of-00072.safetensors +3 -0
- model-00047-of-00072.safetensors +3 -0
- model-00048-of-00072.safetensors +3 -0
- model-00050-of-00072.safetensors +3 -0
- model-00052-of-00072.safetensors +3 -0
- model-00053-of-00072.safetensors +3 -0
- model-00054-of-00072.safetensors +3 -0
- model-00056-of-00072.safetensors +3 -0
- model-00058-of-00072.safetensors +3 -0
- model-00059-of-00072.safetensors +3 -0
- model-00060-of-00072.safetensors +3 -0
- model-00062-of-00072.safetensors +3 -0
- model-00066-of-00072.safetensors +3 -0
- model-00067-of-00072.safetensors +3 -0
- model-00068-of-00072.safetensors +3 -0
- model-00071-of-00072.safetensors +3 -0
- model-00072-of-00072.safetensors +3 -0
- model.safetensors.index.json +0 -0
- recipe.yaml +6 -0
- special_tokens_map.json +40 -0
- tokenizer.json +3 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
.ipynb_checkpoints/model.safetensors.index-checkpoint.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
chat_template.jinja
ADDED
|
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[gMASK]<sop>
|
| 2 |
+
{%- if tools -%}
|
| 3 |
+
<|system|>
|
| 4 |
+
# Tools
|
| 5 |
+
|
| 6 |
+
You may call one or more functions to assist with the user query.
|
| 7 |
+
|
| 8 |
+
You are provided with function signatures within <tools></tools> XML tags:
|
| 9 |
+
<tools>
|
| 10 |
+
{% for tool in tools %}
|
| 11 |
+
{{ tool | tojson(ensure_ascii=False) }}
|
| 12 |
+
{% endfor %}
|
| 13 |
+
</tools>
|
| 14 |
+
|
| 15 |
+
For each function call, output the function name and arguments within the following XML format:
|
| 16 |
+
<tool_call>{function-name}
|
| 17 |
+
<arg_key>{arg-key-1}</arg_key>
|
| 18 |
+
<arg_value>{arg-value-1}</arg_value>
|
| 19 |
+
<arg_key>{arg-key-2}</arg_key>
|
| 20 |
+
<arg_value>{arg-value-2}</arg_value>
|
| 21 |
+
...
|
| 22 |
+
</tool_call>{%- endif -%}
|
| 23 |
+
{%- macro visible_text(content) -%}
|
| 24 |
+
{%- if content is string -%}
|
| 25 |
+
{{- content }}
|
| 26 |
+
{%- elif content is iterable and content is not mapping -%}
|
| 27 |
+
{%- for item in content -%}
|
| 28 |
+
{%- if item is mapping and item.type == 'text' -%}
|
| 29 |
+
{{- item.text }}
|
| 30 |
+
{%- elif item is string -%}
|
| 31 |
+
{{- item }}
|
| 32 |
+
{%- endif -%}
|
| 33 |
+
{%- endfor -%}
|
| 34 |
+
{%- else -%}
|
| 35 |
+
{{- content }}
|
| 36 |
+
{%- endif -%}
|
| 37 |
+
{%- endmacro -%}
|
| 38 |
+
{%- set ns = namespace(last_user_index=-1) %}
|
| 39 |
+
{%- for m in messages %}
|
| 40 |
+
{%- if m.role == 'user' %}
|
| 41 |
+
{% set ns.last_user_index = loop.index0 -%}
|
| 42 |
+
{%- endif %}
|
| 43 |
+
{%- endfor %}
|
| 44 |
+
{% for m in messages %}
|
| 45 |
+
{%- if m.role == 'user' -%}<|user|>
|
| 46 |
+
{{ visible_text(m.content) }}
|
| 47 |
+
{{- '/nothink' if (enable_thinking is defined and not enable_thinking and not visible_text(m.content).endswith("/nothink")) else '' -}}
|
| 48 |
+
{%- elif m.role == 'assistant' -%}
|
| 49 |
+
<|assistant|>
|
| 50 |
+
{%- set reasoning_content = '' %}
|
| 51 |
+
{%- set content = visible_text(m.content) %}
|
| 52 |
+
{%- if m.reasoning_content is string %}
|
| 53 |
+
{%- set reasoning_content = m.reasoning_content %}
|
| 54 |
+
{%- else %}
|
| 55 |
+
{%- if '</think>' in content %}
|
| 56 |
+
{%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
|
| 57 |
+
{%- set content = content.split('</think>')[-1].lstrip('\n') %}
|
| 58 |
+
{%- endif %}
|
| 59 |
+
{%- endif %}
|
| 60 |
+
{%- if loop.index0 > ns.last_user_index and reasoning_content -%}
|
| 61 |
+
{{ '\n<think>' + reasoning_content.strip() + '</think>'}}
|
| 62 |
+
{%- else -%}
|
| 63 |
+
{{ '\n<think></think>' }}
|
| 64 |
+
{%- endif -%}
|
| 65 |
+
{%- if content.strip() -%}
|
| 66 |
+
{{ '\n' + content.strip() }}
|
| 67 |
+
{%- endif -%}
|
| 68 |
+
{% if m.tool_calls %}
|
| 69 |
+
{% for tc in m.tool_calls %}
|
| 70 |
+
{%- if tc.function %}
|
| 71 |
+
{%- set tc = tc.function %}
|
| 72 |
+
{%- endif %}
|
| 73 |
+
{{ '\n<tool_call>' + tc.name }}
|
| 74 |
+
{% set _args = tc.arguments %}
|
| 75 |
+
{% for k, v in _args.items() %}
|
| 76 |
+
<arg_key>{{ k }}</arg_key>
|
| 77 |
+
<arg_value>{{ v | tojson(ensure_ascii=False) if v is not string else v }}</arg_value>
|
| 78 |
+
{% endfor %}
|
| 79 |
+
</tool_call>{% endfor %}
|
| 80 |
+
{% endif %}
|
| 81 |
+
{%- elif m.role == 'tool' -%}
|
| 82 |
+
{%- if m.content is string -%}
|
| 83 |
+
{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
|
| 84 |
+
{{- '<|observation|>' }}
|
| 85 |
+
{%- endif %}
|
| 86 |
+
{{- '\n<tool_response>\n' }}
|
| 87 |
+
{{- m.content }}
|
| 88 |
+
{{- '\n</tool_response>' }}
|
| 89 |
+
{%- else -%}
|
| 90 |
+
<|observation|>{% for tr in m.content %}
|
| 91 |
+
|
| 92 |
+
<tool_response>
|
| 93 |
+
{{ tr.output if tr.output is defined else tr }}
|
| 94 |
+
</tool_response>{% endfor -%}
|
| 95 |
+
{% endif -%}
|
| 96 |
+
{%- elif m.role == 'system' -%}
|
| 97 |
+
<|system|>
|
| 98 |
+
{{ visible_text(m.content) }}
|
| 99 |
+
{%- endif -%}
|
| 100 |
+
{%- endfor -%}
|
| 101 |
+
{%- if add_generation_prompt -%}
|
| 102 |
+
<|assistant|>{{- '\n<think></think>' if (enable_thinking is defined and not enable_thinking) else '' -}}
|
| 103 |
+
{%- endif -%}
|
config.json
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"Glm4MoeForCausalLM"
|
| 4 |
+
],
|
| 5 |
+
"attention_bias": true,
|
| 6 |
+
"attention_dropout": 0.0,
|
| 7 |
+
"dtype": "bfloat16",
|
| 8 |
+
"eos_token_id": [
|
| 9 |
+
151329,
|
| 10 |
+
151336,
|
| 11 |
+
151338
|
| 12 |
+
],
|
| 13 |
+
"first_k_dense_replace": 3,
|
| 14 |
+
"head_dim": 128,
|
| 15 |
+
"hidden_act": "silu",
|
| 16 |
+
"hidden_size": 5120,
|
| 17 |
+
"initializer_range": 0.02,
|
| 18 |
+
"intermediate_size": 12288,
|
| 19 |
+
"max_position_embeddings": 202752,
|
| 20 |
+
"model_type": "glm4_moe",
|
| 21 |
+
"moe_intermediate_size": 1536,
|
| 22 |
+
"n_group": 1,
|
| 23 |
+
"n_routed_experts": 160,
|
| 24 |
+
"n_shared_experts": 1,
|
| 25 |
+
"norm_topk_prob": true,
|
| 26 |
+
"num_attention_heads": 96,
|
| 27 |
+
"num_experts_per_tok": 8,
|
| 28 |
+
"num_hidden_layers": 93,
|
| 29 |
+
"num_key_value_heads": 8,
|
| 30 |
+
"num_nextn_predict_layers": 1,
|
| 31 |
+
"pad_token_id": 151329,
|
| 32 |
+
"partial_rotary_factor": 0.5,
|
| 33 |
+
"quantization_config": {
|
| 34 |
+
"config_groups": {
|
| 35 |
+
"group_0": {
|
| 36 |
+
"format": "float-quantized",
|
| 37 |
+
"input_activations": {
|
| 38 |
+
"actorder": null,
|
| 39 |
+
"block_structure": null,
|
| 40 |
+
"dynamic": true,
|
| 41 |
+
"group_size": null,
|
| 42 |
+
"num_bits": 8,
|
| 43 |
+
"observer": null,
|
| 44 |
+
"observer_kwargs": {},
|
| 45 |
+
"strategy": "token",
|
| 46 |
+
"symmetric": true,
|
| 47 |
+
"type": "float"
|
| 48 |
+
},
|
| 49 |
+
"output_activations": null,
|
| 50 |
+
"targets": [
|
| 51 |
+
"Linear"
|
| 52 |
+
],
|
| 53 |
+
"weights": {
|
| 54 |
+
"actorder": null,
|
| 55 |
+
"block_structure": null,
|
| 56 |
+
"dynamic": false,
|
| 57 |
+
"group_size": null,
|
| 58 |
+
"num_bits": 8,
|
| 59 |
+
"observer": "minmax",
|
| 60 |
+
"observer_kwargs": {},
|
| 61 |
+
"strategy": "channel",
|
| 62 |
+
"symmetric": true,
|
| 63 |
+
"type": "float"
|
| 64 |
+
}
|
| 65 |
+
}
|
| 66 |
+
},
|
| 67 |
+
"format": "float-quantized",
|
| 68 |
+
"global_compression_ratio": null,
|
| 69 |
+
"ignore": [
|
| 70 |
+
"lm_head"
|
| 71 |
+
],
|
| 72 |
+
"kv_cache_scheme": null,
|
| 73 |
+
"quant_method": "compressed-tensors",
|
| 74 |
+
"quantization_status": "compressed",
|
| 75 |
+
"sparsity_config": {},
|
| 76 |
+
"transform_config": {},
|
| 77 |
+
"version": "0.12.2"
|
| 78 |
+
},
|
| 79 |
+
"rms_norm_eps": 1e-05,
|
| 80 |
+
"rope_scaling": null,
|
| 81 |
+
"rope_theta": 1000000,
|
| 82 |
+
"routed_scaling_factor": 2.5,
|
| 83 |
+
"tie_word_embeddings": false,
|
| 84 |
+
"topk_group": 1,
|
| 85 |
+
"transformers_version": "4.57.0",
|
| 86 |
+
"use_cache": true,
|
| 87 |
+
"use_qk_norm": true,
|
| 88 |
+
"vocab_size": 151552
|
| 89 |
+
}
|
generation_config.json
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_from_model_config": true,
|
| 3 |
+
"eos_token_id": [
|
| 4 |
+
151329,
|
| 5 |
+
151336,
|
| 6 |
+
151338
|
| 7 |
+
],
|
| 8 |
+
"pad_token_id": 151329,
|
| 9 |
+
"transformers_version": "4.57.0"
|
| 10 |
+
}
|
model-00001-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:25721b828db5edfbbc929be8ad7e920890baaf3ffbf9a7fadf9fbc47b677cbbc
|
| 3 |
+
size 4993429864
|
model-00005-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9d23a8569e612d7e32425666d3e6119bec2e4490a5c1e7fbf46213346058d4f3
|
| 3 |
+
size 4993847864
|
model-00006-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c769475e0161315befa9be4fb9c5aa60126a0eeaf57a1dd36dd8590fab371e5a
|
| 3 |
+
size 4993855304
|
model-00009-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b74c726a5f1f8ad1f30a984063580e18eace50c0f265ae205dcd5c1175a657d1
|
| 3 |
+
size 4993849192
|
model-00012-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3bc2233f5a96aaa91eef34f50afa724f91e76e5f2e526b4933a5d71b4a27c762
|
| 3 |
+
size 4993856224
|
model-00015-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:52266f98495c7e5f9ec51d66ccf5e84069752299102847655b94ad14820752c3
|
| 3 |
+
size 4993856224
|
model-00017-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:daa33b6de08e05e91c4a96d19d57c3010c59b72208cb9941685ce27df9ed8c73
|
| 3 |
+
size 4993849336
|
model-00018-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:863eee2ef11043f24ac6ed5bb0d6f9fff20675018ff03acb9875b9ae44e335d0
|
| 3 |
+
size 4998108024
|
model-00019-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0eb05642de8fd12329f383ec78dfa75314aa3f64ca2d9c8f366abc39a15ccc8b
|
| 3 |
+
size 4993856224
|
model-00023-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3db37fc8d4eaa7c210a8b74a0a9399a02f83d1413451eace8b19034fdbea0aa3
|
| 3 |
+
size 4993856224
|
model-00024-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fef7e9fdadfae6b70af8410044bb400dc16eede2b72d8a19a764c1dba4a72e5e
|
| 3 |
+
size 4993856376
|
model-00025-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7fe7669e82e648a2f5230ab3fb8b2a9a2ff2f0784e23fdf9d2d4e1384df99b08
|
| 3 |
+
size 4998108056
|
model-00027-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6355fc0e8d56ba65c1ac5c70ccd501d5c51722d7dd36565bbcfb1959b0331960
|
| 3 |
+
size 4993856224
|
model-00028-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:17538028a610180938cff98ea064597a50aa78392c13222358bad8243c90c49a
|
| 3 |
+
size 4993856448
|
model-00029-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8ca4312a56abf0c0827dfa8d75ac382cf8b5ff0cc5cc841555f6006a5aa1d641
|
| 3 |
+
size 4998108016
|
model-00032-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ff0373e06cbc467541dc203a6dc098e33652b3632329a4058ebdd242a79281f8
|
| 3 |
+
size 4993856520
|
model-00033-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d88243f4966ca7188cd939528815b9ab8946f34d3a902398fe9126cb04a3b383
|
| 3 |
+
size 4998108016
|
model-00037-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3cabc6ab4a3ff425f63800a0ba36e22f723e9c76787f68e83160c5f677b85e04
|
| 3 |
+
size 4996447680
|
model-00038-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5ccc3606bfc95f4f5580a56d8aecf407fc329c6c154f1609e3376c3989274659
|
| 3 |
+
size 4993849056
|
model-00039-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4bc1bd7b99e5f933f39b533edf4b2361aec033e85f43b237d8d31861585610eb
|
| 3 |
+
size 4993856392
|
model-00040-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:67546927b12db4f69ae35f38ae536bd8be232e2dfdfa4cc904fb9d8f2320488b
|
| 3 |
+
size 4998108040
|
model-00041-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e15f12ee749f7e8efcbb47f7b7d22d1d14665f09832f9819a679b1ec29c1c75a
|
| 3 |
+
size 4993856200
|
model-00042-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:896067cc2422fd8604bbc06c49f668d1cf50844949235f7b5df73776b5ff529e
|
| 3 |
+
size 4993849056
|
model-00043-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5b7dc4b5627c8fc35182b2c03e6d34d6ef19a99f07041e80c36a848068cd8443
|
| 3 |
+
size 4993856464
|
model-00046-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3e0609d1b3252de2cdc34ac259b0e05d493b3bc267aa95d02aa35777742bd4ef
|
| 3 |
+
size 4993849096
|
model-00047-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:133dba4e5153703d3da553f95a1d2366d95a045bb0d158554e472cba61593d79
|
| 3 |
+
size 4993856536
|
model-00048-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d536c853663efecbfaefd9b252e574efccbdc1ee58abb3f1a03e213fb1c4b515
|
| 3 |
+
size 4998108024
|
model-00050-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:54cdd441373b359266265c2b60150d8842d80a6329b6948daf553f429e8e9308
|
| 3 |
+
size 4993849168
|
model-00052-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:92f4b45672af16071446ce075dd116f40a4d399b568d1910473397a0cebb0726
|
| 3 |
+
size 4993819280
|
model-00053-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8bb8d6523ff5c771740ebdf7bebfc3abd69a464716255a76e47dea4aab3dd7a3
|
| 3 |
+
size 4993849056
|
model-00054-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3e111f7b3a7ee985636c7b68048bbd96d6e4f304df5a96c45fd04064ab2ce0e9
|
| 3 |
+
size 4993856416
|
model-00056-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2db1c90f6873bc25d5f8be2752c17b7190110a46459570ad228954baf26ef058
|
| 3 |
+
size 4993856216
|
model-00058-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2a6bc828b7e2dab028b48c25bfe0be2eeaa48ae90847a2c5bfdcf2ccaa87f1e7
|
| 3 |
+
size 4993856488
|
model-00059-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bd82008bf14ff2a0604ae74f8b34f8a572492f9cdb173bd3424d7a9a7587f66c
|
| 3 |
+
size 4998108024
|
model-00060-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2c7cb67852cced1215bb7de9e59483cc799cb413743845827a573cc405812bdd
|
| 3 |
+
size 4993856224
|
model-00062-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:22b6afc7f6a634e6ff4f177bdf36f5424994bd45609bbdd802564e877ad5b3f3
|
| 3 |
+
size 4993856560
|
model-00066-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3256edeb5623a5efc8a6e2a13cc4e2b2e95dd5944679abc7dd5c5d7ae818fc4a
|
| 3 |
+
size 4998108080
|
model-00067-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1df59874d305d1f0adc5b795507f8cf5608140f64cc2408a02d67ca19fc77d15
|
| 3 |
+
size 4993856168
|
model-00068-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f74ce6588b8a563ebd7c622c66e721349e40f6569810399e65dcf048934348bd
|
| 3 |
+
size 4993856224
|
model-00071-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a6ed26cd236b457118d6e92bf5852dc6f70737a08c9f4123da69d64d1050748e
|
| 3 |
+
size 4993856224
|
model-00072-of-00072.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a23ca9543bfde43e89940d0dbed3f7c397b9983f7c29cbf06c7e82ef3a4681c8
|
| 3 |
+
size 4001147496
|
model.safetensors.index.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
recipe.yaml
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
default_stage:
|
| 2 |
+
default_modifiers:
|
| 3 |
+
QuantizationModifier:
|
| 4 |
+
targets: [Linear]
|
| 5 |
+
ignore: [lm_head, 're:.*mlp.gate$', 're:.*mlp.shared_expert_gate$', 're:.*linear_attn.*']
|
| 6 |
+
scheme: FP8_DYNAMIC
|
special_tokens_map.json
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"additional_special_tokens": [
|
| 3 |
+
"<|endoftext|>",
|
| 4 |
+
"[MASK]",
|
| 5 |
+
"[gMASK]",
|
| 6 |
+
"[sMASK]",
|
| 7 |
+
"<sop>",
|
| 8 |
+
"<eop>",
|
| 9 |
+
"<|system|>",
|
| 10 |
+
"<|user|>",
|
| 11 |
+
"<|assistant|>",
|
| 12 |
+
"<|observation|>",
|
| 13 |
+
"<|begin_of_image|>",
|
| 14 |
+
"<|end_of_image|>",
|
| 15 |
+
"<|begin_of_video|>",
|
| 16 |
+
"<|end_of_video|>",
|
| 17 |
+
"<|begin_of_audio|>",
|
| 18 |
+
"<|end_of_audio|>",
|
| 19 |
+
"<|begin_of_transcription|>",
|
| 20 |
+
"<|end_of_transcription|>",
|
| 21 |
+
"<|code_prefix|>",
|
| 22 |
+
"<|code_middle|>",
|
| 23 |
+
"<|code_suffix|>",
|
| 24 |
+
"/nothink"
|
| 25 |
+
],
|
| 26 |
+
"eos_token": {
|
| 27 |
+
"content": "<|endoftext|>",
|
| 28 |
+
"lstrip": false,
|
| 29 |
+
"normalized": false,
|
| 30 |
+
"rstrip": false,
|
| 31 |
+
"single_word": false
|
| 32 |
+
},
|
| 33 |
+
"pad_token": {
|
| 34 |
+
"content": "<|endoftext|>",
|
| 35 |
+
"lstrip": false,
|
| 36 |
+
"normalized": false,
|
| 37 |
+
"rstrip": false,
|
| 38 |
+
"single_word": false
|
| 39 |
+
}
|
| 40 |
+
}
|
tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bda8e2146c3bb7b7e0fc96dcc4f0aeff041c6c27952e3ace0665663ebff346ba
|
| 3 |
+
size 19970700
|