Upload folder using huggingface_hub
Browse files- README.md +14 -2
- configuration_solar_open.py +10 -3
- generation_config.json +4 -4
- model-00001-of-00013.safetensors +1 -1
- model-00002-of-00013.safetensors +1 -1
- model-00003-of-00013.safetensors +1 -1
- model-00004-of-00013.safetensors +1 -1
- model-00005-of-00013.safetensors +1 -1
- model-00006-of-00013.safetensors +1 -1
- model-00007-of-00013.safetensors +1 -1
- model-00008-of-00013.safetensors +1 -1
- model-00009-of-00013.safetensors +1 -1
- model-00010-of-00013.safetensors +1 -1
- model-00011-of-00013.safetensors +1 -1
- model-00012-of-00013.safetensors +1 -1
- modeling_solar_open.py +11 -3
- parallel_tool_call_logits_processor.py +3 -3
- solar_open_logits_processor.py +3 -3
- solar_open_reasoning_parser.py +3 -3
- solar_open_tool_parser.py +3 -3
- tokenizer_config.json +5 -3
README.md
CHANGED
|
@@ -19,7 +19,7 @@ base_model: upstage/Solar-Open-100B
|
|
| 19 |
|
| 20 |
# **Solar Open**
|
| 21 |
|
| 22 |
-
**Solar Open** is Upstage's flagship **102B-parameter** large language model, trained **entirely from scratch** and released under the **Solar-Apache License 2.0** (see [LICENSE](
|
| 23 |
|
| 24 |
## Highlights
|
| 25 |
|
|
@@ -41,6 +41,18 @@ base_model: upstage/Solar-Open-100B
|
|
| 41 |
* **Hardware Requirements:**
|
| 42 |
* **Minimum:** 4x NVIDIA A100 (80GB)
|
| 43 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
## Performance
|
| 45 |
|
| 46 |
TBA
|
|
@@ -169,7 +181,7 @@ If you use Solar Open in your research, please cite:
|
|
| 169 |
|
| 170 |
```bibtex
|
| 171 |
@misc{solar-open-2025,
|
| 172 |
-
title={Solar Open
|
| 173 |
author={Upstage AI},
|
| 174 |
year={2025},
|
| 175 |
url={https://huggingface.co/Upstage/Solar-Open-100B}
|
|
|
|
| 19 |
|
| 20 |
# **Solar Open**
|
| 21 |
|
| 22 |
+
**Solar Open** is Upstage's flagship **102B-parameter** large language model, trained **entirely from scratch** and released under the **Solar-Apache License 2.0** (see [LICENSE](#LICENSE) for details). As a **Mixture-of-Experts (MoE)** architecture, it delivers enterprise-grade performance in reasoning, instruction-following, and agentic capabilities—all while prioritizing transparency and customization for the open-source community.
|
| 23 |
|
| 24 |
## Highlights
|
| 25 |
|
|
|
|
| 41 |
* **Hardware Requirements:**
|
| 42 |
* **Minimum:** 4x NVIDIA A100 (80GB)
|
| 43 |
|
| 44 |
+
## License
|
| 45 |
+
This repository contains both model weights and code,
|
| 46 |
+
which are licensed under different terms:
|
| 47 |
+
|
| 48 |
+
1. MODEL WEIGHTS (*.safetensors)
|
| 49 |
+
Licensed under **Solar-Apache License 2.0**
|
| 50 |
+
See: https://huggingface.co/upstage/Solar-Open-100B/blob/main/LICENSE
|
| 51 |
+
|
| 52 |
+
2. CODE (*.py, *.json, *.jinja files)
|
| 53 |
+
Licensed under **Apache License 2.0**
|
| 54 |
+
See: https://www.apache.org/licenses/LICENSE-2.0
|
| 55 |
+
|
| 56 |
## Performance
|
| 57 |
|
| 58 |
TBA
|
|
|
|
| 181 |
|
| 182 |
```bibtex
|
| 183 |
@misc{solar-open-2025,
|
| 184 |
+
title={Solar Open: Scaling Upstage's LLM Capabilities with MoE},
|
| 185 |
author={Upstage AI},
|
| 186 |
year={2025},
|
| 187 |
url={https://huggingface.co/Upstage/Solar-Open-100B}
|
configuration_solar_open.py
CHANGED
|
@@ -1,17 +1,24 @@
|
|
| 1 |
# coding=utf-8
|
| 2 |
-
# Copyright 2025 Upstage AI.
|
|
|
|
| 3 |
#
|
| 4 |
-
# Licensed under the
|
| 5 |
# you may not use this file except in compliance with the License.
|
| 6 |
# You may obtain a copy of the License at
|
| 7 |
#
|
| 8 |
-
#
|
| 9 |
#
|
| 10 |
# Unless required by applicable law or agreed to in writing, software
|
| 11 |
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 12 |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 13 |
# See the License for the specific language governing permissions and
|
| 14 |
# limitations under the License.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
from transformers.configuration_utils import PretrainedConfig
|
| 17 |
from transformers.modeling_rope_utils import rope_config_validation
|
|
|
|
| 1 |
# coding=utf-8
|
| 2 |
+
# Copyright 2025 Upstage AI.
|
| 3 |
+
# Copyright 2025 The ZhipuAI Inc. team and HuggingFace Inc. team.
|
| 4 |
#
|
| 5 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 6 |
# you may not use this file except in compliance with the License.
|
| 7 |
# You may obtain a copy of the License at
|
| 8 |
#
|
| 9 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 10 |
#
|
| 11 |
# Unless required by applicable law or agreed to in writing, software
|
| 12 |
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 13 |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 14 |
# See the License for the specific language governing permissions and
|
| 15 |
# limitations under the License.
|
| 16 |
+
#
|
| 17 |
+
# This file has been modified by Upstage AI including
|
| 18 |
+
# - Hyperparameter Adjustments: Modified the model architecture by increasing vocab_size and num_hidden_layers, while decreasing num_attention_heads, intermediate_size, and moe_intermediate_size.
|
| 19 |
+
# RoPE Configuration: Replaced the generic rope_parameters argument with explicit rope_theta and rope_scaling parameters to define Rotary Positional Embeddings settings.
|
| 20 |
+
#
|
| 21 |
+
# Based on code from: https://github.com/huggingface/transformers/blob/main/src/transformers/models/glm4_moe/configuration_glm4_moe.py
|
| 22 |
|
| 23 |
from transformers.configuration_utils import PretrainedConfig
|
| 24 |
from transformers.modeling_rope_utils import rope_config_validation
|
generation_config.json
CHANGED
|
@@ -1,14 +1,14 @@
|
|
| 1 |
{
|
| 2 |
"_from_model_config": true,
|
| 3 |
"bos_token_id": 1,
|
|
|
|
| 4 |
"eos_token_id": [
|
| 5 |
2,
|
| 6 |
24,
|
| 7 |
25
|
| 8 |
],
|
| 9 |
"pad_token_id": 2,
|
| 10 |
-
"transformers_version": "4.57.3",
|
| 11 |
-
"do_sample": true,
|
| 12 |
"temperature": 0.8,
|
| 13 |
-
"top_p": 0.95
|
| 14 |
-
|
|
|
|
|
|
| 1 |
{
|
| 2 |
"_from_model_config": true,
|
| 3 |
"bos_token_id": 1,
|
| 4 |
+
"do_sample": true,
|
| 5 |
"eos_token_id": [
|
| 6 |
2,
|
| 7 |
24,
|
| 8 |
25
|
| 9 |
],
|
| 10 |
"pad_token_id": 2,
|
|
|
|
|
|
|
| 11 |
"temperature": 0.8,
|
| 12 |
+
"top_p": 0.95,
|
| 13 |
+
"transformers_version": "4.57.3"
|
| 14 |
+
}
|
model-00001-of-00013.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 5000328728
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9260bbc8e6e1425492b6dd96306f607add675907b05383e6de3b44acaa8d8929
|
| 3 |
size 5000328728
|
model-00002-of-00013.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4998530632
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:835d751936c061113ab8583cef82751df39147fcf14e2b0faf67b2d945807044
|
| 3 |
size 4998530632
|
model-00003-of-00013.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4999786408
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8d8f5e4a8d07e2e988f351009b0adeee7bf8674267e30ca332551990d8050c61
|
| 3 |
size 4999786408
|
model-00004-of-00013.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4998535288
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:84a4661748c1515a45699d38f28d0884d1c4b24af63e7aca18d41caa41c075cb
|
| 3 |
size 4998535288
|
model-00005-of-00013.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4998535320
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4ab023431712224e1600f109be0eab7e8ab406728d16d13f5341b00a047065f0
|
| 3 |
size 4998535320
|
model-00006-of-00013.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4998535320
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9d5928333173e3b057656a2196c6dcf3ce49ca04d3506fb5ee2815b4bba611f8
|
| 3 |
size 4998535320
|
model-00007-of-00013.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4998535320
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a5a683a150b4343a9693a9c5fba24d30395b6950da65e5d04cc9f3c3be75c9bc
|
| 3 |
size 4998535320
|
model-00008-of-00013.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4998535336
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6757548ed17bdc777b3aaa8d3f24d1c6591ef21fe1da109c025febbe890ddff7
|
| 3 |
size 4998535336
|
model-00009-of-00013.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4998535488
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5a1ce865921c31a474a44f4698f89a35976e60f28d8e3bf173bf0775e8fc9e7b
|
| 3 |
size 4998535488
|
model-00010-of-00013.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4999790016
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9d41300d6e6fe3eb50aee95e48b19a43f3fdf6613e72fbf0e95233b064004242
|
| 3 |
size 4999790016
|
model-00011-of-00013.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4998535264
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2751a64b426a9795c7d04a19b9ece9bb2495afb96ca6d9093546a109a39e8599
|
| 3 |
size 4998535264
|
model-00012-of-00013.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4587386184
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1099e321bad6a5235e5de4a2961fecede247f4f0f7d1e5d88db9f3e3b45e1878
|
| 3 |
size 4587386184
|
modeling_solar_open.py
CHANGED
|
@@ -1,17 +1,25 @@
|
|
| 1 |
# coding=utf-8
|
| 2 |
-
# Copyright 2025 Upstage AI.
|
|
|
|
| 3 |
#
|
| 4 |
-
# Licensed under the
|
| 5 |
# you may not use this file except in compliance with the License.
|
| 6 |
# You may obtain a copy of the License at
|
| 7 |
#
|
| 8 |
-
#
|
| 9 |
#
|
| 10 |
# Unless required by applicable law or agreed to in writing, software
|
| 11 |
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 12 |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 13 |
# See the License for the specific language governing permissions and
|
| 14 |
# limitations under the License.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
from typing import Callable, Optional, Union
|
| 17 |
|
|
|
|
| 1 |
# coding=utf-8
|
| 2 |
+
# Copyright 2025 Upstage AI.
|
| 3 |
+
# Copyright 2025 The GLM4 & ZhipuAI team and HuggingFace Inc. team.
|
| 4 |
#
|
| 5 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 6 |
# you may not use this file except in compliance with the License.
|
| 7 |
# You may obtain a copy of the License at
|
| 8 |
#
|
| 9 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 10 |
#
|
| 11 |
# Unless required by applicable law or agreed to in writing, software
|
| 12 |
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 13 |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 14 |
# See the License for the specific language governing permissions and
|
| 15 |
# limitations under the License.
|
| 16 |
+
#
|
| 17 |
+
# This file has been modified by Upstage AI including:
|
| 18 |
+
# - Hybrid MoE Architecture: Replaced the standard dense structure with a depth-dependent Hybrid MoE, adding `SolarOpenMoE` and `SolarOpenTopkRouter` classes.
|
| 19 |
+
# - RoPE Strategy: Changed the rotary position embedding strategy from GLM4's interleaved rotation to Llama-style block rotation (via modified `rotate_half`).
|
| 20 |
+
# - Normalization Logic: Simplified the layer normalization structure by removing GLM4's extra post-operation norms and adding optional Query-Key Normalization (`use_qk_norm`).
|
| 21 |
+
#
|
| 22 |
+
# Based on code from: https://github.com/huggingface/transformers/blob/main/src/transformers/models/glm4/modeling_glm4.py
|
| 23 |
|
| 24 |
from typing import Callable, Optional, Union
|
| 25 |
|
parallel_tool_call_logits_processor.py
CHANGED
|
@@ -1,11 +1,11 @@
|
|
| 1 |
# coding=utf-8
|
| 2 |
-
# Copyright 2025 Upstage AI.
|
| 3 |
#
|
| 4 |
-
# Licensed under the
|
| 5 |
# you may not use this file except in compliance with the License.
|
| 6 |
# You may obtain a copy of the License at
|
| 7 |
#
|
| 8 |
-
#
|
| 9 |
#
|
| 10 |
# Unless required by applicable law or agreed to in writing, software
|
| 11 |
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
| 1 |
# coding=utf-8
|
| 2 |
+
# Copyright 2025 Upstage AI.
|
| 3 |
#
|
| 4 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
# you may not use this file except in compliance with the License.
|
| 6 |
# You may obtain a copy of the License at
|
| 7 |
#
|
| 8 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 9 |
#
|
| 10 |
# Unless required by applicable law or agreed to in writing, software
|
| 11 |
# distributed under the License is distributed on an "AS IS" BASIS,
|
solar_open_logits_processor.py
CHANGED
|
@@ -1,11 +1,11 @@
|
|
| 1 |
# coding=utf-8
|
| 2 |
-
# Copyright 2025 Upstage AI.
|
| 3 |
#
|
| 4 |
-
# Licensed under the
|
| 5 |
# you may not use this file except in compliance with the License.
|
| 6 |
# You may obtain a copy of the License at
|
| 7 |
#
|
| 8 |
-
#
|
| 9 |
#
|
| 10 |
# Unless required by applicable law or agreed to in writing, software
|
| 11 |
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
| 1 |
# coding=utf-8
|
| 2 |
+
# Copyright 2025 Upstage AI.
|
| 3 |
#
|
| 4 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
# you may not use this file except in compliance with the License.
|
| 6 |
# You may obtain a copy of the License at
|
| 7 |
#
|
| 8 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 9 |
#
|
| 10 |
# Unless required by applicable law or agreed to in writing, software
|
| 11 |
# distributed under the License is distributed on an "AS IS" BASIS,
|
solar_open_reasoning_parser.py
CHANGED
|
@@ -1,11 +1,11 @@
|
|
| 1 |
# coding=utf-8
|
| 2 |
-
# Copyright 2025 Upstage AI.
|
| 3 |
#
|
| 4 |
-
# Licensed under the
|
| 5 |
# you may not use this file except in compliance with the License.
|
| 6 |
# You may obtain a copy of the License at
|
| 7 |
#
|
| 8 |
-
#
|
| 9 |
#
|
| 10 |
# Unless required by applicable law or agreed to in writing, software
|
| 11 |
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
| 1 |
# coding=utf-8
|
| 2 |
+
# Copyright 2025 Upstage AI.
|
| 3 |
#
|
| 4 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
# you may not use this file except in compliance with the License.
|
| 6 |
# You may obtain a copy of the License at
|
| 7 |
#
|
| 8 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 9 |
#
|
| 10 |
# Unless required by applicable law or agreed to in writing, software
|
| 11 |
# distributed under the License is distributed on an "AS IS" BASIS,
|
solar_open_tool_parser.py
CHANGED
|
@@ -1,11 +1,11 @@
|
|
| 1 |
# coding=utf-8
|
| 2 |
-
# Copyright 2025 Upstage AI.
|
| 3 |
#
|
| 4 |
-
# Licensed under the
|
| 5 |
# you may not use this file except in compliance with the License.
|
| 6 |
# You may obtain a copy of the License at
|
| 7 |
#
|
| 8 |
-
#
|
| 9 |
#
|
| 10 |
# Unless required by applicable law or agreed to in writing, software
|
| 11 |
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
| 1 |
# coding=utf-8
|
| 2 |
+
# Copyright 2025 Upstage AI.
|
| 3 |
#
|
| 4 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 5 |
# you may not use this file except in compliance with the License.
|
| 6 |
# You may obtain a copy of the License at
|
| 7 |
#
|
| 8 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 9 |
#
|
| 10 |
# Unless required by applicable law or agreed to in writing, software
|
| 11 |
# distributed under the License is distributed on an "AS IS" BASIS,
|
tokenizer_config.json
CHANGED
|
@@ -36751,12 +36751,14 @@
|
|
| 36751 |
"clean_up_tokenization_spaces": false,
|
| 36752 |
"eos_token": "<|endoftext|>",
|
| 36753 |
"extra_special_tokens": {},
|
| 36754 |
-
"model_input_names": [
|
|
|
|
|
|
|
|
|
|
| 36755 |
"model_max_length": 1000000000000000019884624838656,
|
| 36756 |
"pad_token": "<|endoftext|>",
|
| 36757 |
"padding_side": "left",
|
| 36758 |
"split_special_tokens": false,
|
| 36759 |
"tokenizer_class": "PreTrainedTokenizerFast",
|
| 36760 |
-
"unk_token": "<unk>"
|
| 36761 |
-
"chat_template": "{#- ======== Template Parameters ======== #}\r\n{%- set add_generation_prompt = add_generation_prompt if add_generation_prompt is defined else true %}\r\n{%- set default_system_prompt = default_system_prompt if default_system_prompt is defined else true %}\r\n{%- set reasoning_effort = reasoning_effort if reasoning_effort is defined else \"high\" %}\r\n{%- set think_render_option = think_render_option if think_render_option is defined else \"lastthink\" %}\r\n\r\n{#- ======== System Block State ======== #}\r\n{%- set sys_ns = namespace(is_first_block=true) -%}\r\n\r\n{#- ======== Find last user message index ======== #}\r\n{%- set last_user_idx = namespace(value=-1) -%}\r\n{%- for message in messages -%}\r\n {%- if message.role == 'user' -%}\r\n {%- set last_user_idx.value = loop.index0 -%}\r\n {%- endif -%}\r\n{%- endfor -%}\r\n\r\n{#- ======== System messages renderers ======== #}\r\n{%- macro render_system_message(user_system_messages) %}\r\n {%- if default_system_prompt %}\r\n {%- if not sys_ns.is_first_block %}{{- \"\\n\\n\" }}{%- endif %}\r\n {%- set sys_ns.is_first_block = false %}\r\n {{- \"## Provider System Prompt\\n\\nYou are Solar Open 100B, a large language model trained by Upstage AI, a Korean startup. Your knowledge cutoff is 2025-07. The current date is \" + strftime_now(\"%Y-%m-%d\") + \".\" }}\r\n {%- endif -%}\r\n {%- if user_system_messages %}\r\n {%- if not sys_ns.is_first_block %}{{- \"\\n\\n\" }}{%- endif %}\r\n {%- set sys_ns.is_first_block = false %}\r\n {{- \"## System Prompt\" }}\r\n {%- for system_message in user_system_messages %}\r\n {{- \"\\n\\n\" }}\r\n {{- system_message }}\r\n {%- endfor %}\r\n {%- endif -%}\r\n{%- endmacro %}\r\n\r\n{%- macro render_tool_instruction(tools) %}\r\n {%- if not sys_ns.is_first_block %}{{- \"\\n\\n\" }}{%- endif %}\r\n {%- set sys_ns.is_first_block = false %}\r\n {{- \"## Tools\\n\\n### Tool Call Instruction\" }}\r\n {{- \"\\nYou may invoke one or more tools to assist with the user's query. Available tools are provided in JSON Schema format: <|tools:begin|><|tool:begin|><tools-json-object><|tool:end|>...<|tools:end|>\\n\" }}\r\n {{- \"\\n### Available Tools\\n\" }}\r\n {{- \"<|tools:begin|>\" }}\r\n {%- for tool in tools %}\r\n {{- \"<|tool:begin|>\" }}\r\n {{- tool.function | tojson }}\r\n {{- \"<|tool:end|>\" }}\r\n {%- endfor %}\r\n {{- \"<|tools:end|>\\n\" }}\r\n {{- \"\\n### Tool Call Format\\n\" }}\r\n {{- \"For each tool call, return a JSON object with the following structure, enclosed within <|tool_call:begin|> and <|tool_call:end|> tags: \\n<|tool_call:begin|><tool-call-id><|tool_call:name|><tool-name><|tool_call:args|><args-json-object><|tool_call:end|>\\n\" }}\r\n {{- \"- The <tool-call-id> must be a randomly generated string consisting of 10 lowercase letters (a-z) and\/or digits (0-9) (e.g., a1b2c3d4e5)\\n\" }}\r\n {{- \"\\n### Tool Response Format\\n\" }}\r\n {{- \"Each tool is responded by `tool` with the following structure:\\n<|tool_response:id|><tool-call-id><|tool_response:name|><tool-name><|tool_response:result|><results><|tool_response:end|>\\n\" }}\r\n {{- \"- Ensure the <tool-call-id> matches the corresponding tool call\" -}}\r\n{%- endmacro %}\r\n\r\n{%- macro render_json_response_format_instruction(response_format) %}\r\n {%- if not sys_ns.is_first_block %}{{- \"\\n\\n\" }}{%- endif %}\r\n {%- set sys_ns.is_first_block = false %}\r\n {{- \"## Output Format Constraint\" }}\r\n {{- \"\\n\\nYour final response should follow the JSON schema: \\n[Start of schema]\" }}\r\n {{- response_format }}\r\n {{- \"\\n[End of schema]\\nPlease ensure your answers adhere to this format and do not contain any unnecessary text.\" }}\r\n{%- endmacro %}\r\n\r\n{%- macro get_tool_name(messages, tool_call_id) %}\r\n {%- for msg in messages -%}\r\n {%- if msg.role == 'assistant' and msg.tool_calls -%}\r\n {%- for tool_call in msg.tool_calls -%}\r\n {%- if tool_call.id == tool_call_id -%}\r\n {{- tool_call.function.name }}\r\n {%- endif -%}\r\n {%- endfor -%}\r\n {%- endif -%}\r\n {%- endfor -%}\r\n{%- endmacro %}\r\n\r\n{%- macro render_tool_arguments(tool_arguments) %}\r\n {%- if tool_arguments is mapping -%}\r\n {{- tool_arguments | tojson }}\r\n {%- else -%}\r\n {{- tool_arguments }}\r\n {%- endif -%}\r\n{%- endmacro %}\r\n\r\n{#- ======== Render system message ======== #}\r\n{%- set ns = namespace(system_messages=[]) -%}\r\n{%- for message in messages -%}\r\n {%- if message.role == 'system' -%}\r\n {%- set ns.system_messages = ns.system_messages + [message.content] -%}\r\n {%- endif -%}\r\n{%- endfor -%}\r\n\r\n{%- if ns.system_messages or default_system_prompt or tools or response_format -%}\r\n {{- \"<|begin|>system<|content|>\" }}\r\n {{- render_system_message(ns.system_messages) }}\r\n {%- if tools -%}\r\n {{- render_tool_instruction(tools) }}\r\n {%- endif %}\r\n {%- if response_format -%}\r\n {{- render_json_response_format_instruction(response_format) }}\r\n {%- endif %}\r\n {{- \"<|end|>\" }}\r\n{%- endif -%}\r\n\r\n{#- ======== Render main messages ======== #}\r\n{%- for message in messages -%}\r\n {%- if message.role == 'user' -%}\r\n {{- \"<|begin|>user<|content|>\" + message.content + \"<|end|>\" }}\r\n {%- elif message.role == 'tool' -%}\r\n {%- set prev_is_tool = loop.index0 > 0 and messages[loop.index0 - 1].role == 'tool' -%}\r\n {%- set next_is_tool = loop.index0 < (messages | length - 1) and messages[loop.index0 + 1].role == 'tool' -%}\r\n {%- if not prev_is_tool -%}\r\n {{- \"<|begin|>tool<|tool_response|>\" }}\r\n {%- endif -%}\r\n {{- \"<|tool_response:begin|>\" + message.tool_call_id + \"<|tool_response:name|>\" }}\r\n {{- get_tool_name(messages, message.tool_call_id) }}\r\n {{- \"<|tool_response:result|>\" }}\r\n {{- message.content }}\r\n {{- \"<|tool_response:end|>\" }}\r\n {%- if not next_is_tool -%}\r\n {{- \"<|end|>\" }}\r\n {%- endif -%}\r\n {%- elif message.role == 'assistant' -%}\r\n {#- ======== Assistant Thinking ======== #}\r\n {%- if think_render_option == \"all\" -%}\r\n {%- if message.reasoning -%}\r\n {{- \"<|begin|>assistant<|think|>\" + message.reasoning + \"<|end|>\" }}\r\n {%- endif -%}\r\n {%- elif think_render_option == \"lastthink\" -%}\r\n {%- if message.reasoning and loop.index0 > last_user_idx.value -%}\r\n {{- \"<|begin|>assistant<|think|>\" + message.reasoning + \"<|end|>\" }}\r\n {%- endif -%}\r\n {%- endif -%}\r\n\r\n {#- ======== Assistant Messages ======== #}\r\n {%- if message.tool_calls -%}\r\n {{- \"<|begin|>assistant<|tool_calls|>\" }}\r\n {%- for tool_call in message.tool_calls -%}\r\n {{- \"<|tool_call:begin|>\" + tool_call.id +\"<|tool_call:name|>\" + tool_call.function.name + \"<|tool_call:args|>\" }}\r\n {{- render_tool_arguments(tool_call.function.arguments) }}\r\n {{- \"<|tool_call:end|>\" }}\r\n {%- endfor -%}\r\n {{- \"<|calls|>\" }}\r\n {%- else -%}\r\n {{- \"<|begin|>assistant<|content|>\" + message.content + \"<|end|>\" }}\r\n {%- endif -%}\r\n {%- endif -%}\r\n{%- endfor -%}\r\n\r\n{%- if add_generation_prompt -%}\r\n {%- if reasoning_effort in [\"low\", \"minimal\"] -%}\r\n {{- \"<|begin|>assistant<|think|><|end|>\" }}\r\n {%- endif -%}\r\n {{- \"<|begin|>assistant\" }}\r\n{%- endif -%}"
|
| 36762 |
}
|
|
|
|
| 36751 |
"clean_up_tokenization_spaces": false,
|
| 36752 |
"eos_token": "<|endoftext|>",
|
| 36753 |
"extra_special_tokens": {},
|
| 36754 |
+
"model_input_names": [
|
| 36755 |
+
"input_ids",
|
| 36756 |
+
"attention_mask"
|
| 36757 |
+
],
|
| 36758 |
"model_max_length": 1000000000000000019884624838656,
|
| 36759 |
"pad_token": "<|endoftext|>",
|
| 36760 |
"padding_side": "left",
|
| 36761 |
"split_special_tokens": false,
|
| 36762 |
"tokenizer_class": "PreTrainedTokenizerFast",
|
| 36763 |
+
"unk_token": "<unk>"
|
|
|
|
| 36764 |
}
|