Add model card, inference code, license, configs

Browse files

Files changed (7) hide show

INFERE~1.PY +217 -0
LICENSE +29 -0
README.md +238 -0
REQUIR~1.TXT +4 -0
adapter_config.json +45 -0
chat_template.jinja +54 -0
tokenizer_config.json +30 -0

INFERE~1.PY ADDED Viewed

	@@ -0,0 +1,217 @@

+"""
+tool_call_validator_zh - Inference Reference Implementation
+提供給 HF Hub 使用者的最小可用推論程式碼。
+包含 Filter 1 (Schema) + Filter 2 (Provenance) 雙層保險。
+使用範例（quickstart）：
+  from inference import Detector
+  detector = Detector("Qwen/Qwen2.5-3B-Instruct", "GOSHUNCLE/tool_call_validator_zh")
+  result = detector.detect(
+      user_prompt="請幫我查一下今天台北的 PM2.5 空氣品質指數。",
+      tools=[
+          {"name": "web_search", "description": "透過搜尋引擎即時取得網路上最新資訊"},
+          {"name": "calendar_view", "description": "查看使用者的行事曆"},
+      ],
+  )
+  print(result)
+"""
+from __future__ import annotations
+import json
+from typing import Optional
+import torch
+from peft import PeftModel
+from transformers import AutoModelForCausalLM, AutoTokenizer
+SYSTEM_PROMPT = """你是工具選擇守門員（Tool Selection Guardrail）。
+你的職責是分析使用者請求，從候選工具清單中選出最適合的工具，或在無合適工具時拒絕匹配。
+任務：
+1. 閱讀使用者的請求（user_prompt）與候選工具清單（tools，含 name 與 description）。
+2. 判斷哪一個 tool 最符合使用者意圖，或所有候選皆不適用。
+3. 輸出嚴格 JSON 結果。
+輸出格式：
+{
+  "reasoning": {
+    "intent_summary": "<30-60字：辨識使用者意圖>",
+    "key_signals": "<20-40字：抓出使用者請求中的關鍵詞與語意訊號>",
+    "conclusion": "<30-60字：說明為什麼選 X 或為什麼拒絕匹配>"
+  },
+  "selected_tool": "<候選工具名稱，或在拒絕匹配時為 null>",
+  "signal": "commit" 或 "abstain",
+  "confidence": "high" 、 "medium" 或 "low"
+}
+判斷原則：
+1. selected_tool 必須是候選清單中的 tool name 之一（commit 時）或 null（abstain 時）。
+2. signal = "commit"：候選中至少有 1 個明確相關工具，能涵蓋使用者意圖。
+3. signal = "abstain"：候選清單中沒有任何工具能涵蓋使用者核心意圖；即使部分功能沾邊也應拒答。
+4. confidence 等級：
+   - high：候選中僅 1 個明確相關（或全部明確不相關），無語意混淆。
+   - medium：候選中有 1~2 個邊緣相關（混淆 pair），需轉一個彎才能對應。
+   - low：多個候選都可能適用，理由勉強選 1 個（或極邊緣拒答）。
+規則：
+1. selected_tool 必須逐字符合候選清單中的 name（含大小寫與底線）。
+2. 不要為了避免 abstain 而強選不適用的工具——abstain 是有效輸出。
+3. reasoning 用繁體中文，不直接抄 tool description 全文，要重組為意圖陳述。
+4. 只回傳 JSON，無其他說明文字。
+"""
+VALID_SIGNAL = {"commit", "abstain"}
+VALID_CONFIDENCE = {"high", "medium", "low"}
+REQUIRED_REASONING = {"intent_summary", "key_signals", "conclusion"}
+class Detector:
+    """LoRA + Filter 1 + Filter 2 完整推論器"""
+    def __init__(
+        self,
+        base_model: str = "Qwen/Qwen2.5-3B-Instruct",
+        adapter: Optional[str] = "GOSHUNCLE/tool_call_validator_zh",
+        max_new_tokens: int = 384,
+        device: Optional[str] = None,
+    ):
+        self.tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
+        if self.tokenizer.pad_token is None:
+            self.tokenizer.pad_token = self.tokenizer.eos_token
+        dtype = torch.float16 if torch.cuda.is_available() else torch.float32
+        kwargs = {"torch_dtype": dtype, "trust_remote_code": True}
+        if torch.cuda.is_available():
+            kwargs["device_map"] = "auto"
+        else:
+            kwargs["low_cpu_mem_usage"] = True
+        self.model = AutoModelForCausalLM.from_pretrained(base_model, **kwargs)
+        if adapter:
+            self.model = PeftModel.from_pretrained(self.model, adapter)
+        self.model.eval()
+        self.max_new_tokens = max_new_tokens
+    @staticmethod
+    def _format_user_message(user_prompt: str, tools: list) -> str:
+        tools_block = "\n".join(
+            f"{i+1}. {t['name']}: {t['description']}" for i, t in enumerate(tools)
+        )
+        return f"使用者請求：\n{user_prompt}\n\n候選工具：\n{tools_block}"
+    @torch.inference_mode()
+    def generate_raw(self, user_prompt: str, tools: list) -> str:
+        messages = [
+            {"role": "system", "content": SYSTEM_PROMPT},
+            {"role": "user", "content": self._format_user_message(user_prompt, tools)},
+        ]
+        prompt = self.tokenizer.apply_chat_template(
+            messages, tokenize=False, add_generation_prompt=True
+        )
+        inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
+        outputs = self.model.generate(
+            **inputs,
+            max_new_tokens=self.max_new_tokens,
+            do_sample=False,
+            pad_token_id=self.tokenizer.pad_token_id,
+        )
+        gen = outputs[0][inputs.input_ids.shape[1]:]
+        return self.tokenizer.decode(gen, skip_special_tokens=True).strip()
+    # ------------------------------------------------------------------
+    # Filter 1: Schema validation
+    # ------------------------------------------------------------------
+    @staticmethod
+    def _parse_json_lenient(text: str) -> Optional[dict]:
+        text = text.strip()
+        start = text.find("{")
+        if start < 0:
+            return None
+        depth = 0
+        for i in range(start, len(text)):
+            if text[i] == "{":
+                depth += 1
+            elif text[i] == "}":
+                depth -= 1
+                if depth == 0:
+                    try:
+                        return json.loads(text[start:i+1])
+                    except json.JSONDecodeError:
+                        return None
+        return None
+    @staticmethod
+    def _filter_schema(parsed: Optional[dict]) -> tuple[dict, bool]:
+        fallback = {
+            "reasoning": {
+                "intent_summary": "[Filter fallback]",
+                "key_signals": "[Filter fallback]",
+                "conclusion": "[Filter fallback] 輸出格式錯誤，安全拒答。",
+            },
+            "selected_tool": None,
+            "signal": "abstain",
+            "confidence": "low",
+        }
+        if not isinstance(parsed, dict):
+            return fallback, False
+        if not all(k in parsed for k in ("reasoning", "selected_tool", "signal", "confidence")):
+            return fallback, False
+        if parsed["signal"] not in VALID_SIGNAL:
+            return fallback, False
+        if parsed["confidence"] not in VALID_CONFIDENCE:
+            return fallback, False
+        if not isinstance(parsed["reasoning"], dict):
+            return fallback, False
+        if not REQUIRED_REASONING.issubset(parsed["reasoning"].keys()):
+            return fallback, False
+        if parsed["signal"] == "commit" and parsed["selected_tool"] is None:
+            return fallback, False
+        if parsed["signal"] == "abstain":
+            parsed["selected_tool"] = None
+        return parsed, True
+    # ------------------------------------------------------------------
+    # Filter 2: Provenance check
+    # ------------------------------------------------------------------
+    @staticmethod
+    def _filter_provenance(parsed: dict, tools: list) -> dict:
+        if parsed["signal"] != "commit":
+            return parsed
+        names = {t["name"] for t in tools}
+        if parsed.get("selected_tool") not in names:
+            parsed = dict(parsed)
+            parsed["signal"] = "abstain"
+            parsed["selected_tool"] = None
+            parsed["confidence"] = "low"
+            parsed["reasoning"] = dict(parsed["reasoning"])
+            parsed["reasoning"]["conclusion"] = (
+                "[Filter fallback] 模型輸出的 selected_tool 不在候選清單中，安全拒答。"
+            )
+        return parsed
+    def detect(self, user_prompt: str, tools: list, apply_filters: bool = True) -> dict:
+        raw = self.generate_raw(user_prompt, tools)
+        parsed = self._parse_json_lenient(raw)
+        if not apply_filters:
+            return parsed if parsed else {"_unparseable": True, "_raw": raw}
+        parsed, _ = self._filter_schema(parsed)
+        parsed = self._filter_provenance(parsed, tools)
+        return parsed
+if __name__ == "__main__":
+    # Quick demo
+    detector = Detector()
+    result = detector.detect(
+        user_prompt="請幫我查一下今天台北的 PM2.5 空氣品質指數。",
+        tools=[
+            {"name": "web_search",    "description": "透過搜尋引擎即時取得網路上最新資訊"},
+            {"name": "calendar_view", "description": "查看使用者的行事曆"},
+            {"name": "calculator",    "description": "進行數值與數學運算"},
+        ],
+    )
+    print(json.dumps(result, ensure_ascii=False, indent=2))

LICENSE ADDED Viewed

	@@ -0,0 +1,29 @@

+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+   Copyright 2026 GOSHUNCLE
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

README.md CHANGED Viewed

@@ -1,3 +1,241 @@
 ---
 license: apache-2.0
 ---

 ---
 license: apache-2.0
+language:
+- zh
+- en
+base_model: Qwen/Qwen2.5-3B-Instruct
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- lora
+- peft
+- tool-selection
+- tool-call
+- guardrail
+- chinese
+- traditional-chinese
+- fine-tuned
+- qwen2
 ---
+# tool_call_validator_zh
+> 中文 (繁體) Tool Call 驗證 / Guardrail 模型 · LoRA fine-tune of Qwen2.5-3B-Instruct
+> Traditional Chinese tool-call validator (guardrail) — LoRA fine-tune of Qwen2.5-3B-Instruct
+---
+## 中文說明
+本模型是針對 **Tool Call Validation / Guardrail** 場景微調的繁體中文模型。基於 [Qwen/Qwen2.5-3B-Instruct](https://huggingface.co/Qwen/Qwen2.5-3B-Instruct) 用 LoRA 訓練，能夠：
+1. 讀取使用者請求（user prompt）與多個候選工具的 description
+2. 透過語意比對選出最適合的工具，或在無合適工具時拒絕匹配
+3. 同時輸出結構化的 reasoning（含意圖識別、關鍵詞訊號、結論）
+設計用途為**與服務模型並行運行的獨立驗證器**：當服務模型做出 tool call 決策時，本 guardrail 同步給出獨立判斷，提供下游決策機制（人工或仲裁邏輯）參考。
+### 任務輸出格式
+```json
+{
+  "reasoning": {
+    "intent_summary": "<30-60字：辨識使用者意圖>",
+    "key_signals": "<20-40字：抓出使用者請求中的關鍵詞與語意訊號>",
+    "conclusion": "<30-60字：說明為什麼選 X 或為什麼拒絕匹配>"
+  },
+  "selected_tool": "<候選工具名稱，或在拒絕匹配時為 null>",
+  "signal": "commit | abstain",
+  "confidence": "high | medium | low"
+}
+```
+| 欄位 | 說明 |
+|---|---|
+| `selected_tool` | commit 時必為候選清單之一，abstain 時為 `null` |
+| `signal` | `commit`（明確選定工具）/ `abstain`（候選清單無合適工具）|
+| `confidence` | `high` / `medium` / `low`，反映模型自我評估強度 |
+| `reasoning.intent_summary` | 使用者意圖的精煉描述 |
+| `reasoning.key_signals` | 觸發決策的關鍵詞 / 語意訊號 |
+| `reasoning.conclusion` | 為何選定（或拒絕）的具體理由 |
+### Performance（三層次評估）
+對齊 [memory_2 IC Firewall](https://huggingface.co/GOSHUNCLE/ic_content_firewall_zh) 的三層次評估設計：
+| Metric | L1 base | L2 adapter | L3 +Filter |
+|---|---:|---:|---:|
+| Format Validity | 100.0% | 100.0% | 100.0% |
+| **Tool Accuracy** | 57.0% | **100.0%** | **100.0%** |
+| **Signal Accuracy** | 73.0% | **100.0%** | **100.0%** |
+| **Confidence Accuracy** | 48.0% | **99.0%** | **99.0%** |
+| False Alarm Rate | 0.0% | 0.0% | 0.0% |
+| Miss Rate | 40.9% | 0.0% | 0.0% |
+- **L1 base**：base Qwen2.5-3B（無微調，無 Filter）
+- **L2 adapter**：套用 LoRA adapter，無 Filter
+- **L3 adapter + Filter**：套用 LoRA adapter + Schema validation + Provenance check
+#### 三個關鍵發現
+1. **微調貢獻 +27% ~ +51%**（L1 → L2）：base model 偏向過度保守（miss rate 40.9% — 該 commit 卻 abstain），confidence 級別接近瞎猜（48%）。微調全部修正。
+2. **Filter 貢獻 = 0**（L2 ≡ L3）：與 memory_2 IC Firewall 相同現象。微調後輸出已無格式錯誤、selected_tool 必在候選中。Filter 仍保留作為 OOD 保險網。
+3. **Confidence 是微調貢獻最大維度**（+51%）：base 對 high/medium/low 無 calibration 能力，微調學到 99%。
+### Quick Start
+```python
+import json
+import torch
+from peft import PeftModel
+from transformers import AutoModelForCausalLM, AutoTokenizer
+base_model = "Qwen/Qwen2.5-3B-Instruct"
+adapter = "GOSHUNCLE/tool_call_validator_zh"
+tokenizer = AutoTokenizer.from_pretrained(base_model)
+model = AutoModelForCausalLM.from_pretrained(
+    base_model, torch_dtype=torch.float16, device_map="auto"
+)
+model = PeftModel.from_pretrained(model, adapter)
+model.eval()
+SYSTEM_PROMPT = """你是工具選擇守門員（Tool Selection Guardrail）。
+（完整 system prompt 見 inference.py）"""
+def detect(user_prompt: str, tools: list) -> dict:
+    tools_block = "\n".join(f"{i+1}. {t['name']}: {t['description']}"
+                              for i, t in enumerate(tools))
+    user_msg = f"使用者請求：\n{user_prompt}\n\n候選工具：\n{tools_block}"
+    messages = [
+        {"role": "system", "content": SYSTEM_PROMPT},
+        {"role": "user", "content": user_msg},
+    ]
+    prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
+    with torch.inference_mode():
+        outputs = model.generate(**inputs, max_new_tokens=384, do_sample=False,
+                                  pad_token_id=tokenizer.pad_token_id)
+    text = tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
+    start = text.find("{")
+    end = text.rfind("}")
+    return json.loads(text[start:end+1])
+# 範例
+result = detect(
+    user_prompt="請幫我查一下今天台北的 PM2.5 空氣品質指數。",
+    tools=[
+        {"name": "web_search", "description": "透過搜尋引擎即時取得網路上最新資訊"},
+        {"name": "calendar_view", "description": "查看使用者的行事曆"},
+        {"name": "calculator", "description": "進行數值與數學運算"},
+    ],
+)
+print(json.dumps(result, ensure_ascii=False, indent=2))
+```
+### Inference Safeguards
+雖然 L2 ≡ L3 顯示 Filter 在 in-distribution 上未激活，但建議**在 production 部署仍保留以下安全層**：
+#### Filter 1: Schema Validation
+驗證模型輸出 JSON 是否符合預期結構：
+- `signal` 必為 `commit` 或 `abstain`
+- `confidence` 必為 `high` / `medium` / `low`
+- `reasoning` 必含三段（intent_summary, key_signals, conclusion）
+- commit 時 `selected_tool` 不可為 null
+Invalid 時 fallback：`{signal: "abstain", confidence: "low", selected_tool: null}`
+#### Filter 2: Provenance Check
+驗證 commit 時的 `selected_tool` 必逐字出現在輸入候選清單中。若不在 → fallback abstain。這層保護避免模型在 OOD 時幻覺出不存在的 tool 名稱。
+完整實作見 [inference.py](./inference.py)。
+### Limitations
+#### 限制 A：Holdout In-distribution
+訓練資料與 holdout 共用 template + slot pool。100% 命中**僅反映 in-distribution 表現**，真實業界口語（OOD）的泛化能力**未經實測**。實際使用時請以 confidence 訊號 + Filter 作為保險。
+#### 限制 B：8 個工具受限
+模型訓練資料限定於 8 個合成虛構工具（web_search / knowledge_qa / news_lookup / fact_check / translator / calculator / calendar_view / summarizer），對 8 個工具以外的場景未驗證。但設計上模型應該能對任何 tool description 做語意比對，因為訓練時 description 是動態填入 prompt 的。
+#### 限制 C：Reasoning 中文偏正式書面語
+訓練樣本 reasoning 風格偏向「翻譯式書面語」（如 memory_2 IC Firewall），對極口語化的輸入可能略顯生硬。
+### Disclaimer
+訓練資料中的工具名稱（web_search 等 8 個）為**合成虛構**，用於 demonstrate 方法論。所有股票標的、人物、地點等 slot pool 內容皆為公開資訊範例，無暗示任何商業關係。
+---
+## English
+This is a **LoRA fine-tune of Qwen2.5-3B-Instruct** for Traditional Chinese tool-call validation (guardrail). The model:
+1. Reads a user prompt and a list of candidate tools (with descriptions)
+2. Selects the most appropriate tool via semantic matching, or abstains if none is suitable
+3. Outputs structured reasoning (intent summary, key signals, conclusion)
+It is designed to run **as an independent validator in parallel with a serving LLM** that produces actual tool calls. The guardrail's output serves as a reference for downstream arbitration (human review or programmatic logic).
+### Performance Summary
+| Metric | L1 base | L2 adapter | L3 +Filter |
+|---|---:|---:|---:|
+| Format Validity | 100.0% | 100.0% | 100.0% |
+| Tool Accuracy | 57.0% | **100.0%** | 100.0% |
+| Signal Accuracy | 73.0% | **100.0%** | 100.0% |
+| Confidence Accuracy | 48.0% | **99.0%** | 99.0% |
+| False Alarm Rate | 0.0% | 0.0% | 0.0% |
+| Miss Rate | 40.9% | 0.0% | 0.0% |
+The base Qwen2.5-3B-Instruct achieves 57% tool accuracy and 48% confidence accuracy. After LoRA fine-tuning on 600 synthetic samples (Traditional Chinese), the model reaches 100% tool accuracy and 99% confidence accuracy on the in-distribution holdout. The two-layer post-processing filter (Schema + Provenance) is retained as a safety net for out-of-distribution inputs.
+### Training Details
+| Item | Value |
+|---|---|
+| Base model | Qwen/Qwen2.5-3B-Instruct |
+| Method | LoRA (r=16, alpha=32, dropout=0.05) |
+| Target modules | q_proj, k_proj, v_proj, o_proj |
+| Training data | 600 synthetic samples (Traditional Chinese) |
+| Validation data | 100 in-distribution holdout samples |
+| Epochs | 3 |
+| Batch size | 2 × grad_accum 4 (effective 8) |
+| Learning rate | 2e-4 (cosine schedule, warmup 5%) |
+| Max length | 1024 |
+| Hardware | Google Colab T4 (15 GB VRAM, fp16) |
+| Training time | ~4.4 hours |
+| Best eval_loss | 0.0051 |
+### Methodology Inheritance
+This model inherits the methodology from [GOSHUNCLE/ic_content_firewall_zh](https://huggingface.co/GOSHUNCLE/ic_content_firewall_zh) (IC design industry content firewall):
+- Dual-track data synthesis (handwritten seed + template-based expansion)
+- Three-tier evaluation design (base / adapter / adapter+filter)
+- Filter philosophy (Schema validation + Provenance check as healthy minimal set)
+- Open-source minimal disclosure strategy
+### License
+Apache 2.0. See [LICENSE](./LICENSE).
+### Citation
+If this model contributes to your research or product, please cite:
+```bibtex
+@misc{tool_call_validator_zh_2026,
+  author = {GOSHUNCLE},
+  title  = {tool_call_validator_zh: Traditional Chinese Tool Call Validator (LoRA fine-tune of Qwen2.5-3B)},
+  year   = {2026},
+  url    = {https://huggingface.co/GOSHUNCLE/tool_call_validator_zh},
+}
+```

REQUIR~1.TXT ADDED Viewed

	@@ -0,0 +1,4 @@

+torch>=2.1
+transformers>=4.45,<5
+peft>=0.13
+accelerate>=0.34

adapter_config.json ADDED Viewed

	@@ -0,0 +1,45 @@

+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": "Qwen/Qwen2.5-3B-Instruct",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_bias": false,
+  "lora_dropout": 0.05,
+  "lora_ga_config": null,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "peft_version": "0.19.1",
+  "qalora_group_size": 16,
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "k_proj",
+    "v_proj",
+    "q_proj",
+    "o_proj"
+  ],
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_bdlora": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}

chat_template.jinja ADDED Viewed

	@@ -0,0 +1,54 @@

+{%- if tools %}
+    {{- '<|im_start|>system\n' }}
+    {%- if messages[0]['role'] == 'system' %}
+        {{- messages[0]['content'] }}
+    {%- else %}
+        {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}
+    {%- endif %}
+    {{- "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
+    {%- for tool in tools %}
+        {{- "\n" }}
+        {{- tool | tojson }}
+    {%- endfor %}
+    {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
+{%- else %}
+    {%- if messages[0]['role'] == 'system' %}
+        {{- '<|im_start|>system\n' + messages[0]['content'] + '<|im_end|>\n' }}
+    {%- else %}
+        {{- '<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\n' }}
+    {%- endif %}
+{%- endif %}
+{%- for message in messages %}
+    {%- if (message.role == "user") or (message.role == "system" and not loop.first) or (message.role == "assistant" and not message.tool_calls) %}
+        {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
+    {%- elif message.role == "assistant" %}
+        {{- '<|im_start|>' + message.role }}
+        {%- if message.content %}
+            {{- '\n' + message.content }}
+        {%- endif %}
+        {%- for tool_call in message.tool_calls %}
+            {%- if tool_call.function is defined %}
+                {%- set tool_call = tool_call.function %}
+            {%- endif %}
+            {{- '\n<tool_call>\n{"name": "' }}
+            {{- tool_call.name }}
+            {{- '", "arguments": ' }}
+            {{- tool_call.arguments | tojson }}
+            {{- '}\n</tool_call>' }}
+        {%- endfor %}
+        {{- '<|im_end|>\n' }}
+    {%- elif message.role == "tool" %}
+        {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != "tool") %}
+            {{- '<|im_start|>user' }}
+        {%- endif %}
+        {{- '\n<tool_response>\n' }}
+        {{- message.content }}
+        {{- '\n</tool_response>' }}
+        {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
+            {{- '<|im_end|>\n' }}
+        {%- endif %}
+    {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+    {{- '<|im_start|>assistant\n' }}
+{%- endif %}

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "add_prefix_space": false,
+  "backend": "tokenizers",
+  "bos_token": null,
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|im_end|>",
+  "errors": "replace",
+  "extra_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "is_local": false,
+  "local_files_only": false,
+  "model_max_length": 131072,
+  "pad_token": "<|endoftext|>",
+  "split_special_tokens": false,
+  "tokenizer_class": "Qwen2Tokenizer",
+  "unk_token": null
+}