lmmy commited on
Commit
273d9c0
·
verified ·
1 Parent(s): ce347cb

Add files using upload-large-folder tool

Browse files
README.md ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ license: other
4
+ license_name: lfm1.0
5
+ license_link: LICENSE
6
+ language:
7
+ - en
8
+ - ar
9
+ - zh
10
+ - fr
11
+ - de
12
+ - ja
13
+ - ko
14
+ - es
15
+ pipeline_tag: text-generation
16
+ tags:
17
+ - liquid
18
+ - lfm2.5
19
+ - edge
20
+ - mlx
21
+ base_model: LiquidAI/LFM2.5-1.2B-Instruct
22
+ ---
23
+ ## 💫 Community Model> LFM2.5-1.2B-Instruct by LiquidAI
24
+
25
+ _👾 [LM Studio](https://lmstudio.ai) Community models highlights program. Highlighting new & noteworthy models by the community. Join the conversation on [Discord](https://discord.gg/aPQfnNkxGC)_.
26
+
27
+ **Model creator**: [LiquidAI](https://huggingface.co/LiquidAI)<br>
28
+ **Original model**: [LFM2.5-1.2B-Instruct](https://huggingface.co/LiquidAI/LFM2.5-1.2B-Instruct)<br>
29
+ **MLX quantization**: provided by [LM Studio team](https://x.com/lmstudio) using [mlx_lm](https://github.com/ml-explore/mlx-lm)<br>
30
+
31
+ ## Technical Details
32
+
33
+ 4-bit quantized version of LFM2.5-1.2B-Instruct using MLX, optimized for Apple Silicon.
34
+
35
+ ## Special thanks
36
+
37
+ 🙏 Special thanks to the [Apple Machine Learning Research](https://github.com/ml-explore) team for creating [MLX](https://github.com/ml-explore/mlx).
38
+
39
+ ## Disclaimers
40
+
41
+ LM Studio is not the creator, originator, or owner of any Model featured in the Community Model Program. Each Community Model is created and provided by third parties. LM Studio does not endorse, support, represent or guarantee the completeness, truthfulness, accuracy, or reliability of any Community Model. You understand that Community Models can produce content that might be offensive, harmful, inaccurate or otherwise inappropriate, or deceptive. Each Community Model is the sole responsibility of the person or entity who originated such Model. LM Studio may not monitor or control the Community Models and cannot, and does not, take responsibility for any such Model. LM Studio disclaims all warranties or guarantees about the accuracy, reliability or benefits of the Community Models. LM Studio further disclaims any warranty that the Community Model will meet your requirements, be secure, uninterrupted or available at any time or location, or error-free, viruses-free, or that any errors will be corrected, or otherwise. You will be solely responsible for any damage resulting from your use of or access to the Community Models, your downloading of any Community Model, or use of any other Community Model provided by or through LM Studio.
chat_template.jinja ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {{- bos_token -}}
2
+ {%- set keep_past_thinking = keep_past_thinking | default(false) -%}
3
+ {%- set ns = namespace(system_prompt="") -%}
4
+ {%- if messages[0]["role"] == "system" -%}
5
+ {%- set ns.system_prompt = messages[0]["content"] -%}
6
+ {%- set messages = messages[1:] -%}
7
+ {%- endif -%}
8
+ {%- if tools -%}
9
+ {%- set ns.system_prompt = ns.system_prompt + ("\n" if ns.system_prompt else "") + "List of tools: [" -%}
10
+ {%- for tool in tools -%}
11
+ {%- if tool is not string -%}
12
+ {%- set tool = tool | tojson -%}
13
+ {%- endif -%}
14
+ {%- set ns.system_prompt = ns.system_prompt + tool -%}
15
+ {%- if not loop.last -%}
16
+ {%- set ns.system_prompt = ns.system_prompt + ", " -%}
17
+ {%- endif -%}
18
+ {%- endfor -%}
19
+ {%- set ns.system_prompt = ns.system_prompt + "]" -%}
20
+ {%- endif -%}
21
+ {%- if ns.system_prompt -%}
22
+ {{- "<|im_start|>system\n" + ns.system_prompt + "<|im_end|>\n" -}}
23
+ {%- endif -%}
24
+ {%- set ns.last_assistant_index = -1 -%}
25
+ {%- for message in messages -%}
26
+ {%- if message["role"] == "assistant" -%}
27
+ {%- set ns.last_assistant_index = loop.index0 -%}
28
+ {%- endif -%}
29
+ {%- endfor -%}
30
+ {%- for message in messages -%}
31
+ {{- "<|im_start|>" + message["role"] + "\n" -}}
32
+ {%- if message.get('tool_calls') %}
33
+ {# ───── create a list to append tool calls to ───── #}
34
+ {%- set tool_calls_ns = namespace(tool_calls=[])%}
35
+ {%- for tool_call in message['tool_calls'] %}
36
+ {%- set func_name = tool_call['function']['name'] %}
37
+ {%- set func_args = tool_call['function']['arguments'] %}
38
+ {# ───── create a list of func_arg strings to accumulate for each tool call ───── #}
39
+ {%- set args_ns = namespace(arg_strings=[])%}
40
+ {%- for arg_name, arg_value in func_args.items() %}
41
+ {%- if arg_value is none %}
42
+ {%- set formatted_arg_value = 'null' %}
43
+ {%- elif arg_value is boolean %}
44
+ {%- set formatted_arg_value = 'True' if arg_value else 'False' %}
45
+ {%- elif arg_value is string %}
46
+ {%- set formatted_arg_value = '"' ~ arg_value ~ '"' %}
47
+ {%- elif arg_value is mapping or arg_value is iterable %}
48
+ {%- set formatted_arg_value = arg_value | tojson %}
49
+ {%- else %}
50
+ {%- set formatted_arg_value = arg_value | string %}
51
+ {%- endif %}
52
+ {# ───── format each argument key,value pair ───── #}
53
+ {%- set args_ns.arg_strings = args_ns.arg_strings + [arg_name ~ '=' ~ formatted_arg_value] %}
54
+ {%- endfor %}
55
+ {# ───── append each formatted tool call ───── #}
56
+ {%- set tool_calls_ns.tool_calls = tool_calls_ns.tool_calls + [(func_name + '(' + (args_ns.arg_strings | join(", ")) + ')' )]%}
57
+ {%- endfor %}
58
+ {# ───── format the final tool calls ───── #}
59
+ {{-'<|tool_call_start|>[' + (tool_calls_ns.tool_calls | join(", ")) + ']<|tool_call_end|>'}}
60
+ {%- endif %}
61
+ {%- set content = message["content"] -%}
62
+ {%- if content is not string -%}
63
+ {%- set content = content | tojson -%}
64
+ {%- endif -%}
65
+ {%- if message["role"] == "assistant" and not keep_past_thinking and loop.index0 != ns.last_assistant_index -%}
66
+ {%- if "</think>" in content -%}
67
+ {%- set content = content.split("</think>")[-1] | trim -%}
68
+ {%- endif -%}
69
+ {%- endif -%}
70
+ {{- content + "<|im_end|>\n" -}}
71
+ {%- endfor -%}
72
+ {%- if add_generation_prompt -%}
73
+ {{- "<|im_start|>assistant\n" -}}
74
+ {%- endif -%}
config.json ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Lfm2ForCausalLM"
4
+ ],
5
+ "block_auto_adjust_ff_dim": true,
6
+ "block_dim": 2048,
7
+ "block_ff_dim": 12288,
8
+ "block_ffn_dim_multiplier": 1.0,
9
+ "block_mlp_init_scale": 1.0,
10
+ "block_multiple_of": 256,
11
+ "block_norm_eps": 1e-05,
12
+ "block_out_init_scale": 1.0,
13
+ "block_use_swiglu": true,
14
+ "block_use_xavier_init": true,
15
+ "bos_token_id": 1,
16
+ "conv_L_cache": 3,
17
+ "conv_bias": false,
18
+ "conv_dim": 2048,
19
+ "conv_use_xavier_init": true,
20
+ "dtype": "bfloat16",
21
+ "eos_token_id": 7,
22
+ "hidden_size": 2048,
23
+ "initializer_range": 0.02,
24
+ "intermediate_size": 12288,
25
+ "layer_types": [
26
+ "conv",
27
+ "conv",
28
+ "full_attention",
29
+ "conv",
30
+ "conv",
31
+ "full_attention",
32
+ "conv",
33
+ "conv",
34
+ "full_attention",
35
+ "conv",
36
+ "full_attention",
37
+ "conv",
38
+ "full_attention",
39
+ "conv",
40
+ "full_attention",
41
+ "conv"
42
+ ],
43
+ "max_position_embeddings": 128000,
44
+ "model_type": "lfm2",
45
+ "norm_eps": 1e-05,
46
+ "num_attention_heads": 32,
47
+ "num_heads": 32,
48
+ "num_hidden_layers": 16,
49
+ "num_key_value_heads": 8,
50
+ "pad_token_id": 0,
51
+ "quantization": {
52
+ "group_size": 64,
53
+ "bits": 4,
54
+ "mode": "affine"
55
+ },
56
+ "quantization_config": {
57
+ "group_size": 64,
58
+ "bits": 4,
59
+ "mode": "affine"
60
+ },
61
+ "rope_theta": 1000000.0,
62
+ "tie_embedding": true,
63
+ "transformers_version": "4.57.2",
64
+ "use_cache": true,
65
+ "use_pos_enc": true,
66
+ "vocab_size": 65536
67
+ }
generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": 7,
5
+ "pad_token_id": 0,
6
+ "transformers_version": "4.57.2"
7
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9dc31e3598f06a4152cee41ee377f9e56a9c719f2deb7441f4d267930082361
3
+ size 658540248
model.safetensors.index.json ADDED
@@ -0,0 +1,342 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_size": 658503168,
4
+ "total_parameters": 1170340608
5
+ },
6
+ "weight_map": {
7
+ "model.embed_tokens.biases": "model.safetensors",
8
+ "model.embed_tokens.scales": "model.safetensors",
9
+ "model.embed_tokens.weight": "model.safetensors",
10
+ "model.embedding_norm.weight": "model.safetensors",
11
+ "model.layers.0.conv.conv.weight": "model.safetensors",
12
+ "model.layers.0.conv.in_proj.biases": "model.safetensors",
13
+ "model.layers.0.conv.in_proj.scales": "model.safetensors",
14
+ "model.layers.0.conv.in_proj.weight": "model.safetensors",
15
+ "model.layers.0.conv.out_proj.biases": "model.safetensors",
16
+ "model.layers.0.conv.out_proj.scales": "model.safetensors",
17
+ "model.layers.0.conv.out_proj.weight": "model.safetensors",
18
+ "model.layers.0.feed_forward.w1.biases": "model.safetensors",
19
+ "model.layers.0.feed_forward.w1.scales": "model.safetensors",
20
+ "model.layers.0.feed_forward.w1.weight": "model.safetensors",
21
+ "model.layers.0.feed_forward.w2.biases": "model.safetensors",
22
+ "model.layers.0.feed_forward.w2.scales": "model.safetensors",
23
+ "model.layers.0.feed_forward.w2.weight": "model.safetensors",
24
+ "model.layers.0.feed_forward.w3.biases": "model.safetensors",
25
+ "model.layers.0.feed_forward.w3.scales": "model.safetensors",
26
+ "model.layers.0.feed_forward.w3.weight": "model.safetensors",
27
+ "model.layers.0.ffn_norm.weight": "model.safetensors",
28
+ "model.layers.0.operator_norm.weight": "model.safetensors",
29
+ "model.layers.1.conv.conv.weight": "model.safetensors",
30
+ "model.layers.1.conv.in_proj.biases": "model.safetensors",
31
+ "model.layers.1.conv.in_proj.scales": "model.safetensors",
32
+ "model.layers.1.conv.in_proj.weight": "model.safetensors",
33
+ "model.layers.1.conv.out_proj.biases": "model.safetensors",
34
+ "model.layers.1.conv.out_proj.scales": "model.safetensors",
35
+ "model.layers.1.conv.out_proj.weight": "model.safetensors",
36
+ "model.layers.1.feed_forward.w1.biases": "model.safetensors",
37
+ "model.layers.1.feed_forward.w1.scales": "model.safetensors",
38
+ "model.layers.1.feed_forward.w1.weight": "model.safetensors",
39
+ "model.layers.1.feed_forward.w2.biases": "model.safetensors",
40
+ "model.layers.1.feed_forward.w2.scales": "model.safetensors",
41
+ "model.layers.1.feed_forward.w2.weight": "model.safetensors",
42
+ "model.layers.1.feed_forward.w3.biases": "model.safetensors",
43
+ "model.layers.1.feed_forward.w3.scales": "model.safetensors",
44
+ "model.layers.1.feed_forward.w3.weight": "model.safetensors",
45
+ "model.layers.1.ffn_norm.weight": "model.safetensors",
46
+ "model.layers.1.operator_norm.weight": "model.safetensors",
47
+ "model.layers.10.feed_forward.w1.biases": "model.safetensors",
48
+ "model.layers.10.feed_forward.w1.scales": "model.safetensors",
49
+ "model.layers.10.feed_forward.w1.weight": "model.safetensors",
50
+ "model.layers.10.feed_forward.w2.biases": "model.safetensors",
51
+ "model.layers.10.feed_forward.w2.scales": "model.safetensors",
52
+ "model.layers.10.feed_forward.w2.weight": "model.safetensors",
53
+ "model.layers.10.feed_forward.w3.biases": "model.safetensors",
54
+ "model.layers.10.feed_forward.w3.scales": "model.safetensors",
55
+ "model.layers.10.feed_forward.w3.weight": "model.safetensors",
56
+ "model.layers.10.ffn_norm.weight": "model.safetensors",
57
+ "model.layers.10.operator_norm.weight": "model.safetensors",
58
+ "model.layers.10.self_attn.k_layernorm.weight": "model.safetensors",
59
+ "model.layers.10.self_attn.k_proj.biases": "model.safetensors",
60
+ "model.layers.10.self_attn.k_proj.scales": "model.safetensors",
61
+ "model.layers.10.self_attn.k_proj.weight": "model.safetensors",
62
+ "model.layers.10.self_attn.out_proj.biases": "model.safetensors",
63
+ "model.layers.10.self_attn.out_proj.scales": "model.safetensors",
64
+ "model.layers.10.self_attn.out_proj.weight": "model.safetensors",
65
+ "model.layers.10.self_attn.q_layernorm.weight": "model.safetensors",
66
+ "model.layers.10.self_attn.q_proj.biases": "model.safetensors",
67
+ "model.layers.10.self_attn.q_proj.scales": "model.safetensors",
68
+ "model.layers.10.self_attn.q_proj.weight": "model.safetensors",
69
+ "model.layers.10.self_attn.v_proj.biases": "model.safetensors",
70
+ "model.layers.10.self_attn.v_proj.scales": "model.safetensors",
71
+ "model.layers.10.self_attn.v_proj.weight": "model.safetensors",
72
+ "model.layers.11.conv.conv.weight": "model.safetensors",
73
+ "model.layers.11.conv.in_proj.biases": "model.safetensors",
74
+ "model.layers.11.conv.in_proj.scales": "model.safetensors",
75
+ "model.layers.11.conv.in_proj.weight": "model.safetensors",
76
+ "model.layers.11.conv.out_proj.biases": "model.safetensors",
77
+ "model.layers.11.conv.out_proj.scales": "model.safetensors",
78
+ "model.layers.11.conv.out_proj.weight": "model.safetensors",
79
+ "model.layers.11.feed_forward.w1.biases": "model.safetensors",
80
+ "model.layers.11.feed_forward.w1.scales": "model.safetensors",
81
+ "model.layers.11.feed_forward.w1.weight": "model.safetensors",
82
+ "model.layers.11.feed_forward.w2.biases": "model.safetensors",
83
+ "model.layers.11.feed_forward.w2.scales": "model.safetensors",
84
+ "model.layers.11.feed_forward.w2.weight": "model.safetensors",
85
+ "model.layers.11.feed_forward.w3.biases": "model.safetensors",
86
+ "model.layers.11.feed_forward.w3.scales": "model.safetensors",
87
+ "model.layers.11.feed_forward.w3.weight": "model.safetensors",
88
+ "model.layers.11.ffn_norm.weight": "model.safetensors",
89
+ "model.layers.11.operator_norm.weight": "model.safetensors",
90
+ "model.layers.12.feed_forward.w1.biases": "model.safetensors",
91
+ "model.layers.12.feed_forward.w1.scales": "model.safetensors",
92
+ "model.layers.12.feed_forward.w1.weight": "model.safetensors",
93
+ "model.layers.12.feed_forward.w2.biases": "model.safetensors",
94
+ "model.layers.12.feed_forward.w2.scales": "model.safetensors",
95
+ "model.layers.12.feed_forward.w2.weight": "model.safetensors",
96
+ "model.layers.12.feed_forward.w3.biases": "model.safetensors",
97
+ "model.layers.12.feed_forward.w3.scales": "model.safetensors",
98
+ "model.layers.12.feed_forward.w3.weight": "model.safetensors",
99
+ "model.layers.12.ffn_norm.weight": "model.safetensors",
100
+ "model.layers.12.operator_norm.weight": "model.safetensors",
101
+ "model.layers.12.self_attn.k_layernorm.weight": "model.safetensors",
102
+ "model.layers.12.self_attn.k_proj.biases": "model.safetensors",
103
+ "model.layers.12.self_attn.k_proj.scales": "model.safetensors",
104
+ "model.layers.12.self_attn.k_proj.weight": "model.safetensors",
105
+ "model.layers.12.self_attn.out_proj.biases": "model.safetensors",
106
+ "model.layers.12.self_attn.out_proj.scales": "model.safetensors",
107
+ "model.layers.12.self_attn.out_proj.weight": "model.safetensors",
108
+ "model.layers.12.self_attn.q_layernorm.weight": "model.safetensors",
109
+ "model.layers.12.self_attn.q_proj.biases": "model.safetensors",
110
+ "model.layers.12.self_attn.q_proj.scales": "model.safetensors",
111
+ "model.layers.12.self_attn.q_proj.weight": "model.safetensors",
112
+ "model.layers.12.self_attn.v_proj.biases": "model.safetensors",
113
+ "model.layers.12.self_attn.v_proj.scales": "model.safetensors",
114
+ "model.layers.12.self_attn.v_proj.weight": "model.safetensors",
115
+ "model.layers.13.conv.conv.weight": "model.safetensors",
116
+ "model.layers.13.conv.in_proj.biases": "model.safetensors",
117
+ "model.layers.13.conv.in_proj.scales": "model.safetensors",
118
+ "model.layers.13.conv.in_proj.weight": "model.safetensors",
119
+ "model.layers.13.conv.out_proj.biases": "model.safetensors",
120
+ "model.layers.13.conv.out_proj.scales": "model.safetensors",
121
+ "model.layers.13.conv.out_proj.weight": "model.safetensors",
122
+ "model.layers.13.feed_forward.w1.biases": "model.safetensors",
123
+ "model.layers.13.feed_forward.w1.scales": "model.safetensors",
124
+ "model.layers.13.feed_forward.w1.weight": "model.safetensors",
125
+ "model.layers.13.feed_forward.w2.biases": "model.safetensors",
126
+ "model.layers.13.feed_forward.w2.scales": "model.safetensors",
127
+ "model.layers.13.feed_forward.w2.weight": "model.safetensors",
128
+ "model.layers.13.feed_forward.w3.biases": "model.safetensors",
129
+ "model.layers.13.feed_forward.w3.scales": "model.safetensors",
130
+ "model.layers.13.feed_forward.w3.weight": "model.safetensors",
131
+ "model.layers.13.ffn_norm.weight": "model.safetensors",
132
+ "model.layers.13.operator_norm.weight": "model.safetensors",
133
+ "model.layers.14.feed_forward.w1.biases": "model.safetensors",
134
+ "model.layers.14.feed_forward.w1.scales": "model.safetensors",
135
+ "model.layers.14.feed_forward.w1.weight": "model.safetensors",
136
+ "model.layers.14.feed_forward.w2.biases": "model.safetensors",
137
+ "model.layers.14.feed_forward.w2.scales": "model.safetensors",
138
+ "model.layers.14.feed_forward.w2.weight": "model.safetensors",
139
+ "model.layers.14.feed_forward.w3.biases": "model.safetensors",
140
+ "model.layers.14.feed_forward.w3.scales": "model.safetensors",
141
+ "model.layers.14.feed_forward.w3.weight": "model.safetensors",
142
+ "model.layers.14.ffn_norm.weight": "model.safetensors",
143
+ "model.layers.14.operator_norm.weight": "model.safetensors",
144
+ "model.layers.14.self_attn.k_layernorm.weight": "model.safetensors",
145
+ "model.layers.14.self_attn.k_proj.biases": "model.safetensors",
146
+ "model.layers.14.self_attn.k_proj.scales": "model.safetensors",
147
+ "model.layers.14.self_attn.k_proj.weight": "model.safetensors",
148
+ "model.layers.14.self_attn.out_proj.biases": "model.safetensors",
149
+ "model.layers.14.self_attn.out_proj.scales": "model.safetensors",
150
+ "model.layers.14.self_attn.out_proj.weight": "model.safetensors",
151
+ "model.layers.14.self_attn.q_layernorm.weight": "model.safetensors",
152
+ "model.layers.14.self_attn.q_proj.biases": "model.safetensors",
153
+ "model.layers.14.self_attn.q_proj.scales": "model.safetensors",
154
+ "model.layers.14.self_attn.q_proj.weight": "model.safetensors",
155
+ "model.layers.14.self_attn.v_proj.biases": "model.safetensors",
156
+ "model.layers.14.self_attn.v_proj.scales": "model.safetensors",
157
+ "model.layers.14.self_attn.v_proj.weight": "model.safetensors",
158
+ "model.layers.15.conv.conv.weight": "model.safetensors",
159
+ "model.layers.15.conv.in_proj.biases": "model.safetensors",
160
+ "model.layers.15.conv.in_proj.scales": "model.safetensors",
161
+ "model.layers.15.conv.in_proj.weight": "model.safetensors",
162
+ "model.layers.15.conv.out_proj.biases": "model.safetensors",
163
+ "model.layers.15.conv.out_proj.scales": "model.safetensors",
164
+ "model.layers.15.conv.out_proj.weight": "model.safetensors",
165
+ "model.layers.15.feed_forward.w1.biases": "model.safetensors",
166
+ "model.layers.15.feed_forward.w1.scales": "model.safetensors",
167
+ "model.layers.15.feed_forward.w1.weight": "model.safetensors",
168
+ "model.layers.15.feed_forward.w2.biases": "model.safetensors",
169
+ "model.layers.15.feed_forward.w2.scales": "model.safetensors",
170
+ "model.layers.15.feed_forward.w2.weight": "model.safetensors",
171
+ "model.layers.15.feed_forward.w3.biases": "model.safetensors",
172
+ "model.layers.15.feed_forward.w3.scales": "model.safetensors",
173
+ "model.layers.15.feed_forward.w3.weight": "model.safetensors",
174
+ "model.layers.15.ffn_norm.weight": "model.safetensors",
175
+ "model.layers.15.operator_norm.weight": "model.safetensors",
176
+ "model.layers.2.feed_forward.w1.biases": "model.safetensors",
177
+ "model.layers.2.feed_forward.w1.scales": "model.safetensors",
178
+ "model.layers.2.feed_forward.w1.weight": "model.safetensors",
179
+ "model.layers.2.feed_forward.w2.biases": "model.safetensors",
180
+ "model.layers.2.feed_forward.w2.scales": "model.safetensors",
181
+ "model.layers.2.feed_forward.w2.weight": "model.safetensors",
182
+ "model.layers.2.feed_forward.w3.biases": "model.safetensors",
183
+ "model.layers.2.feed_forward.w3.scales": "model.safetensors",
184
+ "model.layers.2.feed_forward.w3.weight": "model.safetensors",
185
+ "model.layers.2.ffn_norm.weight": "model.safetensors",
186
+ "model.layers.2.operator_norm.weight": "model.safetensors",
187
+ "model.layers.2.self_attn.k_layernorm.weight": "model.safetensors",
188
+ "model.layers.2.self_attn.k_proj.biases": "model.safetensors",
189
+ "model.layers.2.self_attn.k_proj.scales": "model.safetensors",
190
+ "model.layers.2.self_attn.k_proj.weight": "model.safetensors",
191
+ "model.layers.2.self_attn.out_proj.biases": "model.safetensors",
192
+ "model.layers.2.self_attn.out_proj.scales": "model.safetensors",
193
+ "model.layers.2.self_attn.out_proj.weight": "model.safetensors",
194
+ "model.layers.2.self_attn.q_layernorm.weight": "model.safetensors",
195
+ "model.layers.2.self_attn.q_proj.biases": "model.safetensors",
196
+ "model.layers.2.self_attn.q_proj.scales": "model.safetensors",
197
+ "model.layers.2.self_attn.q_proj.weight": "model.safetensors",
198
+ "model.layers.2.self_attn.v_proj.biases": "model.safetensors",
199
+ "model.layers.2.self_attn.v_proj.scales": "model.safetensors",
200
+ "model.layers.2.self_attn.v_proj.weight": "model.safetensors",
201
+ "model.layers.3.conv.conv.weight": "model.safetensors",
202
+ "model.layers.3.conv.in_proj.biases": "model.safetensors",
203
+ "model.layers.3.conv.in_proj.scales": "model.safetensors",
204
+ "model.layers.3.conv.in_proj.weight": "model.safetensors",
205
+ "model.layers.3.conv.out_proj.biases": "model.safetensors",
206
+ "model.layers.3.conv.out_proj.scales": "model.safetensors",
207
+ "model.layers.3.conv.out_proj.weight": "model.safetensors",
208
+ "model.layers.3.feed_forward.w1.biases": "model.safetensors",
209
+ "model.layers.3.feed_forward.w1.scales": "model.safetensors",
210
+ "model.layers.3.feed_forward.w1.weight": "model.safetensors",
211
+ "model.layers.3.feed_forward.w2.biases": "model.safetensors",
212
+ "model.layers.3.feed_forward.w2.scales": "model.safetensors",
213
+ "model.layers.3.feed_forward.w2.weight": "model.safetensors",
214
+ "model.layers.3.feed_forward.w3.biases": "model.safetensors",
215
+ "model.layers.3.feed_forward.w3.scales": "model.safetensors",
216
+ "model.layers.3.feed_forward.w3.weight": "model.safetensors",
217
+ "model.layers.3.ffn_norm.weight": "model.safetensors",
218
+ "model.layers.3.operator_norm.weight": "model.safetensors",
219
+ "model.layers.4.conv.conv.weight": "model.safetensors",
220
+ "model.layers.4.conv.in_proj.biases": "model.safetensors",
221
+ "model.layers.4.conv.in_proj.scales": "model.safetensors",
222
+ "model.layers.4.conv.in_proj.weight": "model.safetensors",
223
+ "model.layers.4.conv.out_proj.biases": "model.safetensors",
224
+ "model.layers.4.conv.out_proj.scales": "model.safetensors",
225
+ "model.layers.4.conv.out_proj.weight": "model.safetensors",
226
+ "model.layers.4.feed_forward.w1.biases": "model.safetensors",
227
+ "model.layers.4.feed_forward.w1.scales": "model.safetensors",
228
+ "model.layers.4.feed_forward.w1.weight": "model.safetensors",
229
+ "model.layers.4.feed_forward.w2.biases": "model.safetensors",
230
+ "model.layers.4.feed_forward.w2.scales": "model.safetensors",
231
+ "model.layers.4.feed_forward.w2.weight": "model.safetensors",
232
+ "model.layers.4.feed_forward.w3.biases": "model.safetensors",
233
+ "model.layers.4.feed_forward.w3.scales": "model.safetensors",
234
+ "model.layers.4.feed_forward.w3.weight": "model.safetensors",
235
+ "model.layers.4.ffn_norm.weight": "model.safetensors",
236
+ "model.layers.4.operator_norm.weight": "model.safetensors",
237
+ "model.layers.5.feed_forward.w1.biases": "model.safetensors",
238
+ "model.layers.5.feed_forward.w1.scales": "model.safetensors",
239
+ "model.layers.5.feed_forward.w1.weight": "model.safetensors",
240
+ "model.layers.5.feed_forward.w2.biases": "model.safetensors",
241
+ "model.layers.5.feed_forward.w2.scales": "model.safetensors",
242
+ "model.layers.5.feed_forward.w2.weight": "model.safetensors",
243
+ "model.layers.5.feed_forward.w3.biases": "model.safetensors",
244
+ "model.layers.5.feed_forward.w3.scales": "model.safetensors",
245
+ "model.layers.5.feed_forward.w3.weight": "model.safetensors",
246
+ "model.layers.5.ffn_norm.weight": "model.safetensors",
247
+ "model.layers.5.operator_norm.weight": "model.safetensors",
248
+ "model.layers.5.self_attn.k_layernorm.weight": "model.safetensors",
249
+ "model.layers.5.self_attn.k_proj.biases": "model.safetensors",
250
+ "model.layers.5.self_attn.k_proj.scales": "model.safetensors",
251
+ "model.layers.5.self_attn.k_proj.weight": "model.safetensors",
252
+ "model.layers.5.self_attn.out_proj.biases": "model.safetensors",
253
+ "model.layers.5.self_attn.out_proj.scales": "model.safetensors",
254
+ "model.layers.5.self_attn.out_proj.weight": "model.safetensors",
255
+ "model.layers.5.self_attn.q_layernorm.weight": "model.safetensors",
256
+ "model.layers.5.self_attn.q_proj.biases": "model.safetensors",
257
+ "model.layers.5.self_attn.q_proj.scales": "model.safetensors",
258
+ "model.layers.5.self_attn.q_proj.weight": "model.safetensors",
259
+ "model.layers.5.self_attn.v_proj.biases": "model.safetensors",
260
+ "model.layers.5.self_attn.v_proj.scales": "model.safetensors",
261
+ "model.layers.5.self_attn.v_proj.weight": "model.safetensors",
262
+ "model.layers.6.conv.conv.weight": "model.safetensors",
263
+ "model.layers.6.conv.in_proj.biases": "model.safetensors",
264
+ "model.layers.6.conv.in_proj.scales": "model.safetensors",
265
+ "model.layers.6.conv.in_proj.weight": "model.safetensors",
266
+ "model.layers.6.conv.out_proj.biases": "model.safetensors",
267
+ "model.layers.6.conv.out_proj.scales": "model.safetensors",
268
+ "model.layers.6.conv.out_proj.weight": "model.safetensors",
269
+ "model.layers.6.feed_forward.w1.biases": "model.safetensors",
270
+ "model.layers.6.feed_forward.w1.scales": "model.safetensors",
271
+ "model.layers.6.feed_forward.w1.weight": "model.safetensors",
272
+ "model.layers.6.feed_forward.w2.biases": "model.safetensors",
273
+ "model.layers.6.feed_forward.w2.scales": "model.safetensors",
274
+ "model.layers.6.feed_forward.w2.weight": "model.safetensors",
275
+ "model.layers.6.feed_forward.w3.biases": "model.safetensors",
276
+ "model.layers.6.feed_forward.w3.scales": "model.safetensors",
277
+ "model.layers.6.feed_forward.w3.weight": "model.safetensors",
278
+ "model.layers.6.ffn_norm.weight": "model.safetensors",
279
+ "model.layers.6.operator_norm.weight": "model.safetensors",
280
+ "model.layers.7.conv.conv.weight": "model.safetensors",
281
+ "model.layers.7.conv.in_proj.biases": "model.safetensors",
282
+ "model.layers.7.conv.in_proj.scales": "model.safetensors",
283
+ "model.layers.7.conv.in_proj.weight": "model.safetensors",
284
+ "model.layers.7.conv.out_proj.biases": "model.safetensors",
285
+ "model.layers.7.conv.out_proj.scales": "model.safetensors",
286
+ "model.layers.7.conv.out_proj.weight": "model.safetensors",
287
+ "model.layers.7.feed_forward.w1.biases": "model.safetensors",
288
+ "model.layers.7.feed_forward.w1.scales": "model.safetensors",
289
+ "model.layers.7.feed_forward.w1.weight": "model.safetensors",
290
+ "model.layers.7.feed_forward.w2.biases": "model.safetensors",
291
+ "model.layers.7.feed_forward.w2.scales": "model.safetensors",
292
+ "model.layers.7.feed_forward.w2.weight": "model.safetensors",
293
+ "model.layers.7.feed_forward.w3.biases": "model.safetensors",
294
+ "model.layers.7.feed_forward.w3.scales": "model.safetensors",
295
+ "model.layers.7.feed_forward.w3.weight": "model.safetensors",
296
+ "model.layers.7.ffn_norm.weight": "model.safetensors",
297
+ "model.layers.7.operator_norm.weight": "model.safetensors",
298
+ "model.layers.8.feed_forward.w1.biases": "model.safetensors",
299
+ "model.layers.8.feed_forward.w1.scales": "model.safetensors",
300
+ "model.layers.8.feed_forward.w1.weight": "model.safetensors",
301
+ "model.layers.8.feed_forward.w2.biases": "model.safetensors",
302
+ "model.layers.8.feed_forward.w2.scales": "model.safetensors",
303
+ "model.layers.8.feed_forward.w2.weight": "model.safetensors",
304
+ "model.layers.8.feed_forward.w3.biases": "model.safetensors",
305
+ "model.layers.8.feed_forward.w3.scales": "model.safetensors",
306
+ "model.layers.8.feed_forward.w3.weight": "model.safetensors",
307
+ "model.layers.8.ffn_norm.weight": "model.safetensors",
308
+ "model.layers.8.operator_norm.weight": "model.safetensors",
309
+ "model.layers.8.self_attn.k_layernorm.weight": "model.safetensors",
310
+ "model.layers.8.self_attn.k_proj.biases": "model.safetensors",
311
+ "model.layers.8.self_attn.k_proj.scales": "model.safetensors",
312
+ "model.layers.8.self_attn.k_proj.weight": "model.safetensors",
313
+ "model.layers.8.self_attn.out_proj.biases": "model.safetensors",
314
+ "model.layers.8.self_attn.out_proj.scales": "model.safetensors",
315
+ "model.layers.8.self_attn.out_proj.weight": "model.safetensors",
316
+ "model.layers.8.self_attn.q_layernorm.weight": "model.safetensors",
317
+ "model.layers.8.self_attn.q_proj.biases": "model.safetensors",
318
+ "model.layers.8.self_attn.q_proj.scales": "model.safetensors",
319
+ "model.layers.8.self_attn.q_proj.weight": "model.safetensors",
320
+ "model.layers.8.self_attn.v_proj.biases": "model.safetensors",
321
+ "model.layers.8.self_attn.v_proj.scales": "model.safetensors",
322
+ "model.layers.8.self_attn.v_proj.weight": "model.safetensors",
323
+ "model.layers.9.conv.conv.weight": "model.safetensors",
324
+ "model.layers.9.conv.in_proj.biases": "model.safetensors",
325
+ "model.layers.9.conv.in_proj.scales": "model.safetensors",
326
+ "model.layers.9.conv.in_proj.weight": "model.safetensors",
327
+ "model.layers.9.conv.out_proj.biases": "model.safetensors",
328
+ "model.layers.9.conv.out_proj.scales": "model.safetensors",
329
+ "model.layers.9.conv.out_proj.weight": "model.safetensors",
330
+ "model.layers.9.feed_forward.w1.biases": "model.safetensors",
331
+ "model.layers.9.feed_forward.w1.scales": "model.safetensors",
332
+ "model.layers.9.feed_forward.w1.weight": "model.safetensors",
333
+ "model.layers.9.feed_forward.w2.biases": "model.safetensors",
334
+ "model.layers.9.feed_forward.w2.scales": "model.safetensors",
335
+ "model.layers.9.feed_forward.w2.weight": "model.safetensors",
336
+ "model.layers.9.feed_forward.w3.biases": "model.safetensors",
337
+ "model.layers.9.feed_forward.w3.scales": "model.safetensors",
338
+ "model.layers.9.feed_forward.w3.weight": "model.safetensors",
339
+ "model.layers.9.ffn_norm.weight": "model.safetensors",
340
+ "model.layers.9.operator_norm.weight": "model.safetensors"
341
+ }
342
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": null,
3
+ "backend": "tokenizers",
4
+ "bos_token": "<|startoftext|>",
5
+ "clean_up_tokenization_spaces": false,
6
+ "eos_token": "<|im_end|>",
7
+ "is_local": true,
8
+ "legacy": false,
9
+ "model_input_names": [
10
+ "input_ids",
11
+ "attention_mask"
12
+ ],
13
+ "model_max_length": 1000000000000000019884624838656,
14
+ "model_specific_special_tokens": {},
15
+ "pad_token": "<|pad|>",
16
+ "sp_model_kwargs": {},
17
+ "spaces_between_special_tokens": false,
18
+ "tokenizer_class": "TokenizersBackend",
19
+ "use_default_system_prompt": false,
20
+ "use_fast": true,
21
+ "chat_template": "{{- bos_token -}}\n{%- set keep_past_thinking = keep_past_thinking | default(false) -%}\n{%- set ns = namespace(system_prompt=\"\") -%}\n{%- if messages[0][\"role\"] == \"system\" -%}\n {%- set ns.system_prompt = messages[0][\"content\"] -%}\n {%- set messages = messages[1:] -%}\n{%- endif -%}\n{%- if tools -%}\n {%- set ns.system_prompt = ns.system_prompt + (\"\\n\" if ns.system_prompt else \"\") + \"List of tools: [\" -%}\n {%- for tool in tools -%}\n {%- if tool is not string -%}\n {%- set tool = tool | tojson -%}\n {%- endif -%}\n {%- set ns.system_prompt = ns.system_prompt + tool -%}\n {%- if not loop.last -%}\n {%- set ns.system_prompt = ns.system_prompt + \", \" -%}\n {%- endif -%}\n {%- endfor -%}\n {%- set ns.system_prompt = ns.system_prompt + \"]\" -%}\n{%- endif -%}\n{%- if ns.system_prompt -%}\n {{- \"<|im_start|>system\\n\" + ns.system_prompt + \"<|im_end|>\\n\" -}}\n{%- endif -%}\n{%- set ns.last_assistant_index = -1 -%}\n{%- for message in messages -%}\n {%- if message[\"role\"] == \"assistant\" -%}\n {%- set ns.last_assistant_index = loop.index0 -%}\n {%- endif -%}\n{%- endfor -%}\n{%- for message in messages -%}\n {{- \"<|im_start|>\" + message[\"role\"] + \"\\n\" -}}\n {%- if message.get('tool_calls') %}\n {# \u2500\u2500\u2500\u2500\u2500 create a list to append tool calls to \u2500\u2500\u2500\u2500\u2500 #}\n {%- set tool_calls_ns = namespace(tool_calls=[])%}\n {%- for tool_call in message['tool_calls'] %}\n {%- set func_name = tool_call['function']['name'] %}\n {%- set func_args = tool_call['function']['arguments'] %}\n {# \u2500\u2500\u2500\u2500\u2500 create a list of func_arg strings to accumulate for each tool call \u2500\u2500\u2500\u2500\u2500 #}\n {%- set args_ns = namespace(arg_strings=[])%}\n {%- for arg_name, arg_value in func_args.items() %}\n {%- if arg_value is none %}\n {%- set formatted_arg_value = 'null' %}\n {%- elif arg_value is boolean %}\n {%- set formatted_arg_value = 'True' if arg_value else 'False' %}\n {%- elif arg_value is string %}\n {%- set formatted_arg_value = '\"' ~ arg_value ~ '\"' %}\n {%- elif arg_value is mapping or arg_value is iterable %}\n {%- set formatted_arg_value = arg_value | tojson %}\n {%- else %}\n {%- set formatted_arg_value = arg_value | string %}\n {%- endif %}\n {# \u2500\u2500\u2500\u2500\u2500 format each argument key,value pair \u2500\u2500\u2500\u2500\u2500 #}\n {%- set args_ns.arg_strings = args_ns.arg_strings + [arg_name ~ '=' ~ formatted_arg_value] %}\n {%- endfor %}\n {# \u2500\u2500\u2500\u2500\u2500 append each formatted tool call \u2500\u2500\u2500\u2500\u2500 #}\n {%- set tool_calls_ns.tool_calls = tool_calls_ns.tool_calls + [(func_name + '(' + (args_ns.arg_strings | join(\", \")) + ')' )]%}\n {%- endfor %}\n {# \u2500\u2500\u2500\u2500\u2500 format the final tool calls \u2500\u2500\u2500\u2500\u2500 #}\n {{-'<|tool_call_start|>[' + (tool_calls_ns.tool_calls | join(\", \")) + ']<|tool_call_end|>'}}\n {%- endif %}\n {%- set content = message[\"content\"] -%}\n {%- if content is not string -%}\n {%- set content = content | tojson -%}\n {%- endif -%}\n {%- if message[\"role\"] == \"assistant\" and not keep_past_thinking and loop.index0 != ns.last_assistant_index -%}\n {%- if \"</think>\" in content -%}\n {%- set content = content.split(\"</think>\")[-1] | trim -%}\n {%- endif -%}\n {%- endif -%}\n {{- content + \"<|im_end|>\\n\" -}}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n {{- \"<|im_start|>assistant\\n\" -}}\n{%- endif -%}\n"
22
+ }