cpatonn commited on
Commit
59d9464
·
verified ·
1 Parent(s): ea526ce

Upload folder using huggingface_hub

Browse files
README.md CHANGED
@@ -19,7 +19,7 @@ base_model: upstage/Solar-Open-100B
19
 
20
  # **Solar Open**
21
 
22
- **Solar Open** is Upstage's flagship **102B-parameter** large language model, trained **entirely from scratch** and released under the **Solar-Apache License 2.0** (see [LICENSE](./LICENSE)). As a **Mixture-of-Experts (MoE)** architecture, it delivers enterprise-grade performance in reasoning, instruction-following, and agentic capabilities—all while prioritizing transparency and customization for the open-source community.
23
 
24
  ## Highlights
25
 
@@ -41,6 +41,18 @@ base_model: upstage/Solar-Open-100B
41
  * **Hardware Requirements:**
42
  * **Minimum:** 4x NVIDIA A100 (80GB)
43
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  ## Performance
45
 
46
  TBA
@@ -169,7 +181,7 @@ If you use Solar Open in your research, please cite:
169
 
170
  ```bibtex
171
  @misc{solar-open-2025,
172
- title={Solar Open 100B},
173
  author={Upstage AI},
174
  year={2025},
175
  url={https://huggingface.co/Upstage/Solar-Open-100B}
 
19
 
20
  # **Solar Open**
21
 
22
+ **Solar Open** is Upstage's flagship **102B-parameter** large language model, trained **entirely from scratch** and released under the **Solar-Apache License 2.0** (see [LICENSE](#LICENSE) for details). As a **Mixture-of-Experts (MoE)** architecture, it delivers enterprise-grade performance in reasoning, instruction-following, and agentic capabilities—all while prioritizing transparency and customization for the open-source community.
23
 
24
  ## Highlights
25
 
 
41
  * **Hardware Requirements:**
42
  * **Minimum:** 4x NVIDIA A100 (80GB)
43
 
44
+ ## License
45
+ This repository contains both model weights and code,
46
+ which are licensed under different terms:
47
+
48
+ 1. MODEL WEIGHTS (*.safetensors)
49
+ Licensed under **Solar-Apache License 2.0**
50
+ See: https://huggingface.co/upstage/Solar-Open-100B/blob/main/LICENSE
51
+
52
+ 2. CODE (*.py, *.json, *.jinja files)
53
+ Licensed under **Apache License 2.0**
54
+ See: https://www.apache.org/licenses/LICENSE-2.0
55
+
56
  ## Performance
57
 
58
  TBA
 
181
 
182
  ```bibtex
183
  @misc{solar-open-2025,
184
+ title={Solar Open: Scaling Upstage's LLM Capabilities with MoE},
185
  author={Upstage AI},
186
  year={2025},
187
  url={https://huggingface.co/Upstage/Solar-Open-100B}
configuration_solar_open.py CHANGED
@@ -1,17 +1,24 @@
1
  # coding=utf-8
2
- # Copyright 2025 Upstage AI. All rights reserved.
 
3
  #
4
- # Licensed under the Solar-Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
6
  # You may obtain a copy of the License at
7
  #
8
- # https://huggingface.co/upstage/Solar-Open-100B/blob/main/LICENSE
9
  #
10
  # Unless required by applicable law or agreed to in writing, software
11
  # distributed under the License is distributed on an "AS IS" BASIS,
12
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
 
 
 
 
 
 
15
 
16
  from transformers.configuration_utils import PretrainedConfig
17
  from transformers.modeling_rope_utils import rope_config_validation
 
1
  # coding=utf-8
2
+ # Copyright 2025 Upstage AI.
3
+ # Copyright 2025 The ZhipuAI Inc. team and HuggingFace Inc. team.
4
  #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
  # you may not use this file except in compliance with the License.
7
  # You may obtain a copy of the License at
8
  #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
  #
11
  # Unless required by applicable law or agreed to in writing, software
12
  # distributed under the License is distributed on an "AS IS" BASIS,
13
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
  # See the License for the specific language governing permissions and
15
  # limitations under the License.
16
+ #
17
+ # This file has been modified by Upstage AI including
18
+ # - Hyperparameter Adjustments: Modified the model architecture by increasing vocab_size and num_hidden_layers, while decreasing num_attention_heads, intermediate_size, and moe_intermediate_size.
19
+ # RoPE Configuration: Replaced the generic rope_parameters argument with explicit rope_theta and rope_scaling parameters to define Rotary Positional Embeddings settings.
20
+ #
21
+ # Based on code from: https://github.com/huggingface/transformers/blob/main/src/transformers/models/glm4_moe/configuration_glm4_moe.py
22
 
23
  from transformers.configuration_utils import PretrainedConfig
24
  from transformers.modeling_rope_utils import rope_config_validation
generation_config.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
  "_from_model_config": true,
3
  "bos_token_id": 1,
 
4
  "eos_token_id": [
5
  2,
6
  24,
7
  25
8
  ],
9
  "pad_token_id": 2,
10
- "transformers_version": "4.57.3",
11
- "do_sample": true,
12
  "temperature": 0.8,
13
- "top_p": 0.95
14
- }
 
 
1
  {
2
  "_from_model_config": true,
3
  "bos_token_id": 1,
4
+ "do_sample": true,
5
  "eos_token_id": [
6
  2,
7
  24,
8
  25
9
  ],
10
  "pad_token_id": 2,
 
 
11
  "temperature": 0.8,
12
+ "top_p": 0.95,
13
+ "transformers_version": "4.57.3"
14
+ }
model-00001-of-00013.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:150e9ab222ca9289a3bd5aecae07cd27b66ca7eec8b5a21d2c802c13602efc38
3
  size 5000328728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9260bbc8e6e1425492b6dd96306f607add675907b05383e6de3b44acaa8d8929
3
  size 5000328728
model-00002-of-00013.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c1e1abe541bbd9b4c30bcf9614a9d7c05b6c78ca839f2a2b129a2aa70ab85c48
3
  size 4998530632
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:835d751936c061113ab8583cef82751df39147fcf14e2b0faf67b2d945807044
3
  size 4998530632
model-00003-of-00013.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5a90f1d90c44d63fe368f471307c6670b01e01e4adb09d95d562933471b8c341
3
  size 4999786408
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d8f5e4a8d07e2e988f351009b0adeee7bf8674267e30ca332551990d8050c61
3
  size 4999786408
model-00004-of-00013.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cad0c14c7daca53028b97e253e1695789587a9de0dbc6a3cc5a562c3716fa487
3
  size 4998535288
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84a4661748c1515a45699d38f28d0884d1c4b24af63e7aca18d41caa41c075cb
3
  size 4998535288
model-00005-of-00013.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dec14a7e564366db35f1c27dd2beaea6038498d13e204d899d3dc41901530f87
3
  size 4998535320
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ab023431712224e1600f109be0eab7e8ab406728d16d13f5341b00a047065f0
3
  size 4998535320
model-00006-of-00013.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:74a8329165658e72761caeabd50ac33257e9187faf7b5db4812599af00e7d996
3
  size 4998535320
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d5928333173e3b057656a2196c6dcf3ce49ca04d3506fb5ee2815b4bba611f8
3
  size 4998535320
model-00007-of-00013.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:83db4ea3a9d055c80e37e841955835f238f5ccddde0a8ee244febe63d4cd4781
3
  size 4998535320
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5a683a150b4343a9693a9c5fba24d30395b6950da65e5d04cc9f3c3be75c9bc
3
  size 4998535320
model-00008-of-00013.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7a21b173a46c1181df522e558d097512343614c901ccb84d6de93167f93359b1
3
  size 4998535336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6757548ed17bdc777b3aaa8d3f24d1c6591ef21fe1da109c025febbe890ddff7
3
  size 4998535336
model-00009-of-00013.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aef3cf4745e8607e1ad00ffa402de44384847482261ab4b5dc95acdb7b48fe6a
3
  size 4998535488
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a1ce865921c31a474a44f4698f89a35976e60f28d8e3bf173bf0775e8fc9e7b
3
  size 4998535488
model-00010-of-00013.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e1d3ba0f7d427997a6bb4af69dc6a2ab2fc1990921ee2c5287e337528ac7543d
3
  size 4999790016
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d41300d6e6fe3eb50aee95e48b19a43f3fdf6613e72fbf0e95233b064004242
3
  size 4999790016
model-00011-of-00013.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7649d0e3eb6474fc3845f3351ef6aea75ee993c6c14086a2614759666e8b1e3e
3
  size 4998535264
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2751a64b426a9795c7d04a19b9ece9bb2495afb96ca6d9093546a109a39e8599
3
  size 4998535264
model-00012-of-00013.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ae0186b1d258e8801538c8b3c0d9e4c3eeda1fd20d8da32bca6d765c9b8ba353
3
  size 4587386184
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1099e321bad6a5235e5de4a2961fecede247f4f0f7d1e5d88db9f3e3b45e1878
3
  size 4587386184
modeling_solar_open.py CHANGED
@@ -1,17 +1,25 @@
1
  # coding=utf-8
2
- # Copyright 2025 Upstage AI. All rights reserved.
 
3
  #
4
- # Licensed under the Solar-Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
6
  # You may obtain a copy of the License at
7
  #
8
- # https://huggingface.co/upstage/Solar-Open-100B/blob/main/LICENSE
9
  #
10
  # Unless required by applicable law or agreed to in writing, software
11
  # distributed under the License is distributed on an "AS IS" BASIS,
12
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
  # See the License for the specific language governing permissions and
14
  # limitations under the License.
 
 
 
 
 
 
 
15
 
16
  from typing import Callable, Optional, Union
17
 
 
1
  # coding=utf-8
2
+ # Copyright 2025 Upstage AI.
3
+ # Copyright 2025 The GLM4 & ZhipuAI team and HuggingFace Inc. team.
4
  #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
  # you may not use this file except in compliance with the License.
7
  # You may obtain a copy of the License at
8
  #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
  #
11
  # Unless required by applicable law or agreed to in writing, software
12
  # distributed under the License is distributed on an "AS IS" BASIS,
13
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
  # See the License for the specific language governing permissions and
15
  # limitations under the License.
16
+ #
17
+ # This file has been modified by Upstage AI including:
18
+ # - Hybrid MoE Architecture: Replaced the standard dense structure with a depth-dependent Hybrid MoE, adding `SolarOpenMoE` and `SolarOpenTopkRouter` classes.
19
+ # - RoPE Strategy: Changed the rotary position embedding strategy from GLM4's interleaved rotation to Llama-style block rotation (via modified `rotate_half`).
20
+ # - Normalization Logic: Simplified the layer normalization structure by removing GLM4's extra post-operation norms and adding optional Query-Key Normalization (`use_qk_norm`).
21
+ #
22
+ # Based on code from: https://github.com/huggingface/transformers/blob/main/src/transformers/models/glm4/modeling_glm4.py
23
 
24
  from typing import Callable, Optional, Union
25
 
parallel_tool_call_logits_processor.py CHANGED
@@ -1,11 +1,11 @@
1
  # coding=utf-8
2
- # Copyright 2025 Upstage AI. All rights reserved.
3
  #
4
- # Licensed under the Solar-Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
6
  # You may obtain a copy of the License at
7
  #
8
- # https://huggingface.co/upstage/Solar-Open-100B/blob/main/LICENSE
9
  #
10
  # Unless required by applicable law or agreed to in writing, software
11
  # distributed under the License is distributed on an "AS IS" BASIS,
 
1
  # coding=utf-8
2
+ # Copyright 2025 Upstage AI.
3
  #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
6
  # You may obtain a copy of the License at
7
  #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
  #
10
  # Unless required by applicable law or agreed to in writing, software
11
  # distributed under the License is distributed on an "AS IS" BASIS,
solar_open_logits_processor.py CHANGED
@@ -1,11 +1,11 @@
1
  # coding=utf-8
2
- # Copyright 2025 Upstage AI. All rights reserved.
3
  #
4
- # Licensed under the Solar-Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
6
  # You may obtain a copy of the License at
7
  #
8
- # https://huggingface.co/upstage/Solar-Open-100B/blob/main/LICENSE
9
  #
10
  # Unless required by applicable law or agreed to in writing, software
11
  # distributed under the License is distributed on an "AS IS" BASIS,
 
1
  # coding=utf-8
2
+ # Copyright 2025 Upstage AI.
3
  #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
6
  # You may obtain a copy of the License at
7
  #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
  #
10
  # Unless required by applicable law or agreed to in writing, software
11
  # distributed under the License is distributed on an "AS IS" BASIS,
solar_open_reasoning_parser.py CHANGED
@@ -1,11 +1,11 @@
1
  # coding=utf-8
2
- # Copyright 2025 Upstage AI. All rights reserved.
3
  #
4
- # Licensed under the Solar-Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
6
  # You may obtain a copy of the License at
7
  #
8
- # https://huggingface.co/upstage/Solar-Open-100B/blob/main/LICENSE
9
  #
10
  # Unless required by applicable law or agreed to in writing, software
11
  # distributed under the License is distributed on an "AS IS" BASIS,
 
1
  # coding=utf-8
2
+ # Copyright 2025 Upstage AI.
3
  #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
6
  # You may obtain a copy of the License at
7
  #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
  #
10
  # Unless required by applicable law or agreed to in writing, software
11
  # distributed under the License is distributed on an "AS IS" BASIS,
solar_open_tool_parser.py CHANGED
@@ -1,11 +1,11 @@
1
  # coding=utf-8
2
- # Copyright 2025 Upstage AI. All rights reserved.
3
  #
4
- # Licensed under the Solar-Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
6
  # You may obtain a copy of the License at
7
  #
8
- # https://huggingface.co/upstage/Solar-Open-100B/blob/main/LICENSE
9
  #
10
  # Unless required by applicable law or agreed to in writing, software
11
  # distributed under the License is distributed on an "AS IS" BASIS,
 
1
  # coding=utf-8
2
+ # Copyright 2025 Upstage AI.
3
  #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
  # you may not use this file except in compliance with the License.
6
  # You may obtain a copy of the License at
7
  #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
  #
10
  # Unless required by applicable law or agreed to in writing, software
11
  # distributed under the License is distributed on an "AS IS" BASIS,
tokenizer_config.json CHANGED
@@ -36751,12 +36751,14 @@
36751
  "clean_up_tokenization_spaces": false,
36752
  "eos_token": "<|endoftext|>",
36753
  "extra_special_tokens": {},
36754
- "model_input_names": ["input_ids", "attention_mask"],
 
 
 
36755
  "model_max_length": 1000000000000000019884624838656,
36756
  "pad_token": "<|endoftext|>",
36757
  "padding_side": "left",
36758
  "split_special_tokens": false,
36759
  "tokenizer_class": "PreTrainedTokenizerFast",
36760
- "unk_token": "<unk>",
36761
- "chat_template": "{#- ======== Template Parameters ======== #}\r\n{%- set add_generation_prompt = add_generation_prompt if add_generation_prompt is defined else true %}\r\n{%- set default_system_prompt = default_system_prompt if default_system_prompt is defined else true %}\r\n{%- set reasoning_effort = reasoning_effort if reasoning_effort is defined else \"high\" %}\r\n{%- set think_render_option = think_render_option if think_render_option is defined else \"lastthink\" %}\r\n\r\n{#- ======== System Block State ======== #}\r\n{%- set sys_ns = namespace(is_first_block=true) -%}\r\n\r\n{#- ======== Find last user message index ======== #}\r\n{%- set last_user_idx = namespace(value=-1) -%}\r\n{%- for message in messages -%}\r\n {%- if message.role == 'user' -%}\r\n {%- set last_user_idx.value = loop.index0 -%}\r\n {%- endif -%}\r\n{%- endfor -%}\r\n\r\n{#- ======== System messages renderers ======== #}\r\n{%- macro render_system_message(user_system_messages) %}\r\n {%- if default_system_prompt %}\r\n {%- if not sys_ns.is_first_block %}{{- \"\\n\\n\" }}{%- endif %}\r\n {%- set sys_ns.is_first_block = false %}\r\n {{- \"## Provider System Prompt\\n\\nYou are Solar Open 100B, a large language model trained by Upstage AI, a Korean startup. Your knowledge cutoff is 2025-07. The current date is \" + strftime_now(\"%Y-%m-%d\") + \".\" }}\r\n {%- endif -%}\r\n {%- if user_system_messages %}\r\n {%- if not sys_ns.is_first_block %}{{- \"\\n\\n\" }}{%- endif %}\r\n {%- set sys_ns.is_first_block = false %}\r\n {{- \"## System Prompt\" }}\r\n {%- for system_message in user_system_messages %}\r\n {{- \"\\n\\n\" }}\r\n {{- system_message }}\r\n {%- endfor %}\r\n {%- endif -%}\r\n{%- endmacro %}\r\n\r\n{%- macro render_tool_instruction(tools) %}\r\n {%- if not sys_ns.is_first_block %}{{- \"\\n\\n\" }}{%- endif %}\r\n {%- set sys_ns.is_first_block = false %}\r\n {{- \"## Tools\\n\\n### Tool Call Instruction\" }}\r\n {{- \"\\nYou may invoke one or more tools to assist with the user's query. Available tools are provided in JSON Schema format: <|tools:begin|><|tool:begin|><tools-json-object><|tool:end|>...<|tools:end|>\\n\" }}\r\n {{- \"\\n### Available Tools\\n\" }}\r\n {{- \"<|tools:begin|>\" }}\r\n {%- for tool in tools %}\r\n {{- \"<|tool:begin|>\" }}\r\n {{- tool.function | tojson }}\r\n {{- \"<|tool:end|>\" }}\r\n {%- endfor %}\r\n {{- \"<|tools:end|>\\n\" }}\r\n {{- \"\\n### Tool Call Format\\n\" }}\r\n {{- \"For each tool call, return a JSON object with the following structure, enclosed within <|tool_call:begin|> and <|tool_call:end|> tags: \\n<|tool_call:begin|><tool-call-id><|tool_call:name|><tool-name><|tool_call:args|><args-json-object><|tool_call:end|>\\n\" }}\r\n {{- \"- The <tool-call-id> must be a randomly generated string consisting of 10 lowercase letters (a-z) and\/or digits (0-9) (e.g., a1b2c3d4e5)\\n\" }}\r\n {{- \"\\n### Tool Response Format\\n\" }}\r\n {{- \"Each tool is responded by `tool` with the following structure:\\n<|tool_response:id|><tool-call-id><|tool_response:name|><tool-name><|tool_response:result|><results><|tool_response:end|>\\n\" }}\r\n {{- \"- Ensure the <tool-call-id> matches the corresponding tool call\" -}}\r\n{%- endmacro %}\r\n\r\n{%- macro render_json_response_format_instruction(response_format) %}\r\n {%- if not sys_ns.is_first_block %}{{- \"\\n\\n\" }}{%- endif %}\r\n {%- set sys_ns.is_first_block = false %}\r\n {{- \"## Output Format Constraint\" }}\r\n {{- \"\\n\\nYour final response should follow the JSON schema: \\n[Start of schema]\" }}\r\n {{- response_format }}\r\n {{- \"\\n[End of schema]\\nPlease ensure your answers adhere to this format and do not contain any unnecessary text.\" }}\r\n{%- endmacro %}\r\n\r\n{%- macro get_tool_name(messages, tool_call_id) %}\r\n {%- for msg in messages -%}\r\n {%- if msg.role == 'assistant' and msg.tool_calls -%}\r\n {%- for tool_call in msg.tool_calls -%}\r\n {%- if tool_call.id == tool_call_id -%}\r\n {{- tool_call.function.name }}\r\n {%- endif -%}\r\n {%- endfor -%}\r\n {%- endif -%}\r\n {%- endfor -%}\r\n{%- endmacro %}\r\n\r\n{%- macro render_tool_arguments(tool_arguments) %}\r\n {%- if tool_arguments is mapping -%}\r\n {{- tool_arguments | tojson }}\r\n {%- else -%}\r\n {{- tool_arguments }}\r\n {%- endif -%}\r\n{%- endmacro %}\r\n\r\n{#- ======== Render system message ======== #}\r\n{%- set ns = namespace(system_messages=[]) -%}\r\n{%- for message in messages -%}\r\n {%- if message.role == 'system' -%}\r\n {%- set ns.system_messages = ns.system_messages + [message.content] -%}\r\n {%- endif -%}\r\n{%- endfor -%}\r\n\r\n{%- if ns.system_messages or default_system_prompt or tools or response_format -%}\r\n {{- \"<|begin|>system<|content|>\" }}\r\n {{- render_system_message(ns.system_messages) }}\r\n {%- if tools -%}\r\n {{- render_tool_instruction(tools) }}\r\n {%- endif %}\r\n {%- if response_format -%}\r\n {{- render_json_response_format_instruction(response_format) }}\r\n {%- endif %}\r\n {{- \"<|end|>\" }}\r\n{%- endif -%}\r\n\r\n{#- ======== Render main messages ======== #}\r\n{%- for message in messages -%}\r\n {%- if message.role == 'user' -%}\r\n {{- \"<|begin|>user<|content|>\" + message.content + \"<|end|>\" }}\r\n {%- elif message.role == 'tool' -%}\r\n {%- set prev_is_tool = loop.index0 > 0 and messages[loop.index0 - 1].role == 'tool' -%}\r\n {%- set next_is_tool = loop.index0 < (messages | length - 1) and messages[loop.index0 + 1].role == 'tool' -%}\r\n {%- if not prev_is_tool -%}\r\n {{- \"<|begin|>tool<|tool_response|>\" }}\r\n {%- endif -%}\r\n {{- \"<|tool_response:begin|>\" + message.tool_call_id + \"<|tool_response:name|>\" }}\r\n {{- get_tool_name(messages, message.tool_call_id) }}\r\n {{- \"<|tool_response:result|>\" }}\r\n {{- message.content }}\r\n {{- \"<|tool_response:end|>\" }}\r\n {%- if not next_is_tool -%}\r\n {{- \"<|end|>\" }}\r\n {%- endif -%}\r\n {%- elif message.role == 'assistant' -%}\r\n {#- ======== Assistant Thinking ======== #}\r\n {%- if think_render_option == \"all\" -%}\r\n {%- if message.reasoning -%}\r\n {{- \"<|begin|>assistant<|think|>\" + message.reasoning + \"<|end|>\" }}\r\n {%- endif -%}\r\n {%- elif think_render_option == \"lastthink\" -%}\r\n {%- if message.reasoning and loop.index0 > last_user_idx.value -%}\r\n {{- \"<|begin|>assistant<|think|>\" + message.reasoning + \"<|end|>\" }}\r\n {%- endif -%}\r\n {%- endif -%}\r\n\r\n {#- ======== Assistant Messages ======== #}\r\n {%- if message.tool_calls -%}\r\n {{- \"<|begin|>assistant<|tool_calls|>\" }}\r\n {%- for tool_call in message.tool_calls -%}\r\n {{- \"<|tool_call:begin|>\" + tool_call.id +\"<|tool_call:name|>\" + tool_call.function.name + \"<|tool_call:args|>\" }}\r\n {{- render_tool_arguments(tool_call.function.arguments) }}\r\n {{- \"<|tool_call:end|>\" }}\r\n {%- endfor -%}\r\n {{- \"<|calls|>\" }}\r\n {%- else -%}\r\n {{- \"<|begin|>assistant<|content|>\" + message.content + \"<|end|>\" }}\r\n {%- endif -%}\r\n {%- endif -%}\r\n{%- endfor -%}\r\n\r\n{%- if add_generation_prompt -%}\r\n {%- if reasoning_effort in [\"low\", \"minimal\"] -%}\r\n {{- \"<|begin|>assistant<|think|><|end|>\" }}\r\n {%- endif -%}\r\n {{- \"<|begin|>assistant\" }}\r\n{%- endif -%}"
36762
  }
 
36751
  "clean_up_tokenization_spaces": false,
36752
  "eos_token": "<|endoftext|>",
36753
  "extra_special_tokens": {},
36754
+ "model_input_names": [
36755
+ "input_ids",
36756
+ "attention_mask"
36757
+ ],
36758
  "model_max_length": 1000000000000000019884624838656,
36759
  "pad_token": "<|endoftext|>",
36760
  "padding_side": "left",
36761
  "split_special_tokens": false,
36762
  "tokenizer_class": "PreTrainedTokenizerFast",
36763
+ "unk_token": "<unk>"
 
36764
  }