GOSHUNCLE commited on
Commit
9d3cc69
·
verified ·
1 Parent(s): 2c6aeb2

Add model card, inference code, license, configs

Browse files
Files changed (7) hide show
  1. INFERE~1.PY +217 -0
  2. LICENSE +29 -0
  3. README.md +238 -0
  4. REQUIR~1.TXT +4 -0
  5. adapter_config.json +45 -0
  6. chat_template.jinja +54 -0
  7. tokenizer_config.json +30 -0
INFERE~1.PY ADDED
@@ -0,0 +1,217 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ tool_call_validator_zh - Inference Reference Implementation
3
+
4
+ 提供給 HF Hub 使用者的最小可用推論程式碼。
5
+ 包含 Filter 1 (Schema) + Filter 2 (Provenance) 雙層保險。
6
+
7
+ 使用範例(quickstart):
8
+
9
+ from inference import Detector
10
+ detector = Detector("Qwen/Qwen2.5-3B-Instruct", "GOSHUNCLE/tool_call_validator_zh")
11
+ result = detector.detect(
12
+ user_prompt="請幫我查一下今天台北的 PM2.5 空氣品質指數。",
13
+ tools=[
14
+ {"name": "web_search", "description": "透過搜尋引擎即時取得網路上最新資訊"},
15
+ {"name": "calendar_view", "description": "查看使用者的行事曆"},
16
+ ],
17
+ )
18
+ print(result)
19
+ """
20
+ from __future__ import annotations
21
+
22
+ import json
23
+ from typing import Optional
24
+
25
+ import torch
26
+ from peft import PeftModel
27
+ from transformers import AutoModelForCausalLM, AutoTokenizer
28
+
29
+
30
+ SYSTEM_PROMPT = """你是工具選擇守門員(Tool Selection Guardrail)。
31
+ 你的職責是分析使用者請求,從候選工具清單中選出最適合的工具,或在無合適工具時拒絕匹配。
32
+
33
+ 任務:
34
+ 1. 閱讀使用者的請求(user_prompt)與候選工具清單(tools,含 name 與 description)。
35
+ 2. 判斷哪一個 tool 最符合使用者意圖,或所有候選皆不適用。
36
+ 3. 輸出嚴格 JSON 結果。
37
+
38
+ 輸出格式:
39
+ {
40
+ "reasoning": {
41
+ "intent_summary": "<30-60字:辨識使用者意圖>",
42
+ "key_signals": "<20-40字:抓出使用者請求中的關鍵詞與語意訊號>",
43
+ "conclusion": "<30-60字:說明為什麼選 X 或為什麼拒絕匹配>"
44
+ },
45
+ "selected_tool": "<候選工具名稱,或在拒絕匹配時為 null>",
46
+ "signal": "commit" 或 "abstain",
47
+ "confidence": "high" 、 "medium" 或 "low"
48
+ }
49
+
50
+ 判斷原則:
51
+ 1. selected_tool 必須是候選清單中的 tool name 之一(commit 時)或 null(abstain 時)。
52
+ 2. signal = "commit":候選中至少有 1 個明確相關工具,能涵蓋使用者意圖。
53
+ 3. signal = "abstain":候選清單中沒有任何工具能涵蓋使用者核心意圖;即使部分功能沾邊也應拒答。
54
+ 4. confidence 等級:
55
+ - high:候選中僅 1 個明確相關(或全部明確不相關),無語意混淆。
56
+ - medium:候選中有 1~2 個邊緣相關(混淆 pair),需轉一個彎才能對應。
57
+ - low:多個候選都可能適用,理由勉強選 1 個(或極邊緣拒答)。
58
+
59
+ 規則:
60
+ 1. selected_tool 必須逐字符合候選清單中的 name(含大小寫與底線)。
61
+ 2. 不要為了避免 abstain 而強選不適用的工具——abstain 是有效輸出。
62
+ 3. reasoning 用繁體中文,不直接抄 tool description 全文,要重組為意圖陳述。
63
+ 4. 只回傳 JSON,無其他說明文字。
64
+ """
65
+
66
+
67
+ VALID_SIGNAL = {"commit", "abstain"}
68
+ VALID_CONFIDENCE = {"high", "medium", "low"}
69
+ REQUIRED_REASONING = {"intent_summary", "key_signals", "conclusion"}
70
+
71
+
72
+ class Detector:
73
+ """LoRA + Filter 1 + Filter 2 完整推論器"""
74
+
75
+ def __init__(
76
+ self,
77
+ base_model: str = "Qwen/Qwen2.5-3B-Instruct",
78
+ adapter: Optional[str] = "GOSHUNCLE/tool_call_validator_zh",
79
+ max_new_tokens: int = 384,
80
+ device: Optional[str] = None,
81
+ ):
82
+ self.tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
83
+ if self.tokenizer.pad_token is None:
84
+ self.tokenizer.pad_token = self.tokenizer.eos_token
85
+
86
+ dtype = torch.float16 if torch.cuda.is_available() else torch.float32
87
+ kwargs = {"torch_dtype": dtype, "trust_remote_code": True}
88
+ if torch.cuda.is_available():
89
+ kwargs["device_map"] = "auto"
90
+ else:
91
+ kwargs["low_cpu_mem_usage"] = True
92
+
93
+ self.model = AutoModelForCausalLM.from_pretrained(base_model, **kwargs)
94
+ if adapter:
95
+ self.model = PeftModel.from_pretrained(self.model, adapter)
96
+ self.model.eval()
97
+ self.max_new_tokens = max_new_tokens
98
+
99
+ @staticmethod
100
+ def _format_user_message(user_prompt: str, tools: list) -> str:
101
+ tools_block = "\n".join(
102
+ f"{i+1}. {t['name']}: {t['description']}" for i, t in enumerate(tools)
103
+ )
104
+ return f"使用者請求:\n{user_prompt}\n\n候選工具:\n{tools_block}"
105
+
106
+ @torch.inference_mode()
107
+ def generate_raw(self, user_prompt: str, tools: list) -> str:
108
+ messages = [
109
+ {"role": "system", "content": SYSTEM_PROMPT},
110
+ {"role": "user", "content": self._format_user_message(user_prompt, tools)},
111
+ ]
112
+ prompt = self.tokenizer.apply_chat_template(
113
+ messages, tokenize=False, add_generation_prompt=True
114
+ )
115
+ inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
116
+ outputs = self.model.generate(
117
+ **inputs,
118
+ max_new_tokens=self.max_new_tokens,
119
+ do_sample=False,
120
+ pad_token_id=self.tokenizer.pad_token_id,
121
+ )
122
+ gen = outputs[0][inputs.input_ids.shape[1]:]
123
+ return self.tokenizer.decode(gen, skip_special_tokens=True).strip()
124
+
125
+ # ------------------------------------------------------------------
126
+ # Filter 1: Schema validation
127
+ # ------------------------------------------------------------------
128
+ @staticmethod
129
+ def _parse_json_lenient(text: str) -> Optional[dict]:
130
+ text = text.strip()
131
+ start = text.find("{")
132
+ if start < 0:
133
+ return None
134
+ depth = 0
135
+ for i in range(start, len(text)):
136
+ if text[i] == "{":
137
+ depth += 1
138
+ elif text[i] == "}":
139
+ depth -= 1
140
+ if depth == 0:
141
+ try:
142
+ return json.loads(text[start:i+1])
143
+ except json.JSONDecodeError:
144
+ return None
145
+ return None
146
+
147
+ @staticmethod
148
+ def _filter_schema(parsed: Optional[dict]) -> tuple[dict, bool]:
149
+ fallback = {
150
+ "reasoning": {
151
+ "intent_summary": "[Filter fallback]",
152
+ "key_signals": "[Filter fallback]",
153
+ "conclusion": "[Filter fallback] 輸出格式錯誤,安全拒答。",
154
+ },
155
+ "selected_tool": None,
156
+ "signal": "abstain",
157
+ "confidence": "low",
158
+ }
159
+ if not isinstance(parsed, dict):
160
+ return fallback, False
161
+ if not all(k in parsed for k in ("reasoning", "selected_tool", "signal", "confidence")):
162
+ return fallback, False
163
+ if parsed["signal"] not in VALID_SIGNAL:
164
+ return fallback, False
165
+ if parsed["confidence"] not in VALID_CONFIDENCE:
166
+ return fallback, False
167
+ if not isinstance(parsed["reasoning"], dict):
168
+ return fallback, False
169
+ if not REQUIRED_REASONING.issubset(parsed["reasoning"].keys()):
170
+ return fallback, False
171
+ if parsed["signal"] == "commit" and parsed["selected_tool"] is None:
172
+ return fallback, False
173
+ if parsed["signal"] == "abstain":
174
+ parsed["selected_tool"] = None
175
+ return parsed, True
176
+
177
+ # ------------------------------------------------------------------
178
+ # Filter 2: Provenance check
179
+ # ------------------------------------------------------------------
180
+ @staticmethod
181
+ def _filter_provenance(parsed: dict, tools: list) -> dict:
182
+ if parsed["signal"] != "commit":
183
+ return parsed
184
+ names = {t["name"] for t in tools}
185
+ if parsed.get("selected_tool") not in names:
186
+ parsed = dict(parsed)
187
+ parsed["signal"] = "abstain"
188
+ parsed["selected_tool"] = None
189
+ parsed["confidence"] = "low"
190
+ parsed["reasoning"] = dict(parsed["reasoning"])
191
+ parsed["reasoning"]["conclusion"] = (
192
+ "[Filter fallback] 模型輸出的 selected_tool 不在候選清單中,安全拒答。"
193
+ )
194
+ return parsed
195
+
196
+ def detect(self, user_prompt: str, tools: list, apply_filters: bool = True) -> dict:
197
+ raw = self.generate_raw(user_prompt, tools)
198
+ parsed = self._parse_json_lenient(raw)
199
+ if not apply_filters:
200
+ return parsed if parsed else {"_unparseable": True, "_raw": raw}
201
+ parsed, _ = self._filter_schema(parsed)
202
+ parsed = self._filter_provenance(parsed, tools)
203
+ return parsed
204
+
205
+
206
+ if __name__ == "__main__":
207
+ # Quick demo
208
+ detector = Detector()
209
+ result = detector.detect(
210
+ user_prompt="請幫我查一下今天台北的 PM2.5 空氣品質指數。",
211
+ tools=[
212
+ {"name": "web_search", "description": "透過搜尋引擎即時取得網路上最新資訊"},
213
+ {"name": "calendar_view", "description": "查看使用者的行事曆"},
214
+ {"name": "calculator", "description": "進行數值與數學運算"},
215
+ ],
216
+ )
217
+ print(json.dumps(result, ensure_ascii=False, indent=2))
LICENSE ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ Licensed under the Apache License, Version 2.0 (the "License");
6
+ you may not use this file except in compliance with the License.
7
+ You may obtain a copy of the License at
8
+
9
+ http://www.apache.org/licenses/LICENSE-2.0
10
+
11
+ Unless required by applicable law or agreed to in writing, software
12
+ distributed under the License is distributed on an "AS IS" BASIS,
13
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ See the License for the specific language governing permissions and
15
+ limitations under the License.
16
+
17
+ Copyright 2026 GOSHUNCLE
18
+
19
+ Licensed under the Apache License, Version 2.0 (the "License");
20
+ you may not use this file except in compliance with the License.
21
+ You may obtain a copy of the License at
22
+
23
+ http://www.apache.org/licenses/LICENSE-2.0
24
+
25
+ Unless required by applicable law or agreed to in writing, software
26
+ distributed under the License is distributed on an "AS IS" BASIS,
27
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
28
+ See the License for the specific language governing permissions and
29
+ limitations under the License.
README.md CHANGED
@@ -1,3 +1,241 @@
1
  ---
2
  license: apache-2.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  license: apache-2.0
3
+ language:
4
+ - zh
5
+ - en
6
+ base_model: Qwen/Qwen2.5-3B-Instruct
7
+ library_name: peft
8
+ pipeline_tag: text-generation
9
+ tags:
10
+ - lora
11
+ - peft
12
+ - tool-selection
13
+ - tool-call
14
+ - guardrail
15
+ - chinese
16
+ - traditional-chinese
17
+ - fine-tuned
18
+ - qwen2
19
  ---
20
+
21
+ # tool_call_validator_zh
22
+
23
+ > 中文 (繁體) Tool Call 驗證 / Guardrail 模型 · LoRA fine-tune of Qwen2.5-3B-Instruct
24
+ > Traditional Chinese tool-call validator (guardrail) — LoRA fine-tune of Qwen2.5-3B-Instruct
25
+
26
+ ---
27
+
28
+ ## 中文說明
29
+
30
+ 本模型是針對 **Tool Call Validation / Guardrail** 場景微調的繁體中文模型。基於 [Qwen/Qwen2.5-3B-Instruct](https://huggingface.co/Qwen/Qwen2.5-3B-Instruct) 用 LoRA 訓練,能夠:
31
+
32
+ 1. 讀取使用者請求(user prompt)與多個候選工具的 description
33
+ 2. 透過語意比對選出最適合的工具,或在無合適工具時拒絕匹配
34
+ 3. 同時輸出結構化的 reasoning(含意圖識別、關鍵詞訊號、結論)
35
+
36
+ 設計用途為**與服務模型並行運行的獨立驗證器**:當服務模型做出 tool call 決策時,本 guardrail 同步給出獨立判斷,提供下游決策機制(人工或仲裁邏輯)參考。
37
+
38
+ ### 任務輸出格式
39
+
40
+ ```json
41
+ {
42
+ "reasoning": {
43
+ "intent_summary": "<30-60字:辨識使用者意圖>",
44
+ "key_signals": "<20-40字:抓出使用者請求中的關鍵詞與語意訊號>",
45
+ "conclusion": "<30-60字:說明為什麼選 X 或為什麼拒絕匹配>"
46
+ },
47
+ "selected_tool": "<候選工具名稱,或在拒絕匹配時為 null>",
48
+ "signal": "commit | abstain",
49
+ "confidence": "high | medium | low"
50
+ }
51
+ ```
52
+
53
+ | 欄位 | 說明 |
54
+ |---|---|
55
+ | `selected_tool` | commit 時必為候選清單之一,abstain 時為 `null` |
56
+ | `signal` | `commit`(明確選定工具)/ `abstain`(候選清單無合適工具)|
57
+ | `confidence` | `high` / `medium` / `low`,反映模型自我評估強度 |
58
+ | `reasoning.intent_summary` | 使用者意圖的精煉描述 |
59
+ | `reasoning.key_signals` | 觸發決策的關鍵詞 / 語意訊號 |
60
+ | `reasoning.conclusion` | 為何選定(或拒絕)的具體理由 |
61
+
62
+ ### Performance(三層次評估)
63
+
64
+ 對齊 [memory_2 IC Firewall](https://huggingface.co/GOSHUNCLE/ic_content_firewall_zh) 的三層次評估設計:
65
+
66
+ | Metric | L1 base | L2 adapter | L3 +Filter |
67
+ |---|---:|---:|---:|
68
+ | Format Validity | 100.0% | 100.0% | 100.0% |
69
+ | **Tool Accuracy** | 57.0% | **100.0%** | **100.0%** |
70
+ | **Signal Accuracy** | 73.0% | **100.0%** | **100.0%** |
71
+ | **Confidence Accuracy** | 48.0% | **99.0%** | **99.0%** |
72
+ | False Alarm Rate | 0.0% | 0.0% | 0.0% |
73
+ | Miss Rate | 40.9% | 0.0% | 0.0% |
74
+
75
+ - **L1 base**:base Qwen2.5-3B(無微調,無 Filter)
76
+ - **L2 adapter**:套用 LoRA adapter,無 Filter
77
+ - **L3 adapter + Filter**:套用 LoRA adapter + Schema validation + Provenance check
78
+
79
+ #### 三個關鍵發現
80
+
81
+ 1. **微調貢獻 +27% ~ +51%**(L1 → L2):base model 偏向過度保守(miss rate 40.9% — 該 commit 卻 abstain),confidence 級別接近瞎猜(48%)。微調全部修正。
82
+ 2. **Filter 貢獻 = 0**(L2 ≡ L3):與 memory_2 IC Firewall 相同現象。微調後輸出已無格式錯誤、selected_tool 必在候選中。Filter 仍保留作為 OOD 保險網。
83
+ 3. **Confidence 是微調貢獻最大維度**(+51%):base 對 high/medium/low 無 calibration 能力,微調學到 99%。
84
+
85
+ ### Quick Start
86
+
87
+ ```python
88
+ import json
89
+ import torch
90
+ from peft import PeftModel
91
+ from transformers import AutoModelForCausalLM, AutoTokenizer
92
+
93
+ base_model = "Qwen/Qwen2.5-3B-Instruct"
94
+ adapter = "GOSHUNCLE/tool_call_validator_zh"
95
+
96
+ tokenizer = AutoTokenizer.from_pretrained(base_model)
97
+ model = AutoModelForCausalLM.from_pretrained(
98
+ base_model, torch_dtype=torch.float16, device_map="auto"
99
+ )
100
+ model = PeftModel.from_pretrained(model, adapter)
101
+ model.eval()
102
+
103
+ SYSTEM_PROMPT = """你是工具選擇守門員(Tool Selection Guardrail)。
104
+ (完整 system prompt 見 inference.py)"""
105
+
106
+ def detect(user_prompt: str, tools: list) -> dict:
107
+ tools_block = "\n".join(f"{i+1}. {t['name']}: {t['description']}"
108
+ for i, t in enumerate(tools))
109
+ user_msg = f"使用者請求:\n{user_prompt}\n\n候選工具:\n{tools_block}"
110
+ messages = [
111
+ {"role": "system", "content": SYSTEM_PROMPT},
112
+ {"role": "user", "content": user_msg},
113
+ ]
114
+ prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
115
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
116
+ with torch.inference_mode():
117
+ outputs = model.generate(**inputs, max_new_tokens=384, do_sample=False,
118
+ pad_token_id=tokenizer.pad_token_id)
119
+ text = tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
120
+ start = text.find("{")
121
+ end = text.rfind("}")
122
+ return json.loads(text[start:end+1])
123
+
124
+ # 範例
125
+ result = detect(
126
+ user_prompt="請幫我查一下今天台北的 PM2.5 空氣品質指數。",
127
+ tools=[
128
+ {"name": "web_search", "description": "透過搜尋引擎即時取得網路上最新資訊"},
129
+ {"name": "calendar_view", "description": "查看使用者的行事曆"},
130
+ {"name": "calculator", "description": "進行數值與數學運算"},
131
+ ],
132
+ )
133
+ print(json.dumps(result, ensure_ascii=False, indent=2))
134
+ ```
135
+
136
+ ### Inference Safeguards
137
+
138
+ 雖然 L2 ≡ L3 顯示 Filter 在 in-distribution 上未激活,但建議**在 production 部署仍保留以下安全層**:
139
+
140
+ #### Filter 1: Schema Validation
141
+
142
+ 驗證模型輸出 JSON 是否符合預期結構:
143
+
144
+ - `signal` 必為 `commit` 或 `abstain`
145
+ - `confidence` 必為 `high` / `medium` / `low`
146
+ - `reasoning` 必含三段(intent_summary, key_signals, conclusion)
147
+ - commit 時 `selected_tool` 不可為 null
148
+
149
+ Invalid 時 fallback:`{signal: "abstain", confidence: "low", selected_tool: null}`
150
+
151
+ #### Filter 2: Provenance Check
152
+
153
+ 驗證 commit 時的 `selected_tool` 必逐字出現在輸入候選清單中。若不在 → fallback abstain。這層保護避免模型在 OOD 時幻覺出不存在的 tool 名稱。
154
+
155
+ 完整實作見 [inference.py](./inference.py)。
156
+
157
+ ### Limitations
158
+
159
+ #### 限制 A:Holdout In-distribution
160
+
161
+ 訓練資料與 holdout 共用 template + slot pool。100% 命中**僅反映 in-distribution 表現**,真實業界口語(OOD)的泛化能力**未經實測**。實際使用時請以 confidence 訊號 + Filter 作為保險。
162
+
163
+ #### 限制 B:8 個工具受限
164
+
165
+ 模型訓練資料限定於 8 個合成虛構工具(web_search / knowledge_qa / news_lookup / fact_check / translator / calculator / calendar_view / summarizer),對 8 個工具以外的場景未驗證。但設計上模型應該能對任何 tool description 做語意比對,因為訓練時 description 是動態填入 prompt 的。
166
+
167
+ #### 限制 C:Reasoning 中文偏正式書面語
168
+
169
+ 訓練樣本 reasoning 風格偏向「翻譯式書面語」(如 memory_2 IC Firewall),對極口語化的輸入可能略顯生硬。
170
+
171
+ ### Disclaimer
172
+
173
+ 訓練資料中的工具名稱(web_search 等 8 個)為**合成虛構**,用於 demonstrate 方法論。所有股票標的、人物、地點等 slot pool 內容皆為公開資訊範例,無暗示任何商業關係。
174
+
175
+ ---
176
+
177
+ ## English
178
+
179
+ This is a **LoRA fine-tune of Qwen2.5-3B-Instruct** for Traditional Chinese tool-call validation (guardrail). The model:
180
+
181
+ 1. Reads a user prompt and a list of candidate tools (with descriptions)
182
+ 2. Selects the most appropriate tool via semantic matching, or abstains if none is suitable
183
+ 3. Outputs structured reasoning (intent summary, key signals, conclusion)
184
+
185
+ It is designed to run **as an independent validator in parallel with a serving LLM** that produces actual tool calls. The guardrail's output serves as a reference for downstream arbitration (human review or programmatic logic).
186
+
187
+ ### Performance Summary
188
+
189
+ | Metric | L1 base | L2 adapter | L3 +Filter |
190
+ |---|---:|---:|---:|
191
+ | Format Validity | 100.0% | 100.0% | 100.0% |
192
+ | Tool Accuracy | 57.0% | **100.0%** | 100.0% |
193
+ | Signal Accuracy | 73.0% | **100.0%** | 100.0% |
194
+ | Confidence Accuracy | 48.0% | **99.0%** | 99.0% |
195
+ | False Alarm Rate | 0.0% | 0.0% | 0.0% |
196
+ | Miss Rate | 40.9% | 0.0% | 0.0% |
197
+
198
+ The base Qwen2.5-3B-Instruct achieves 57% tool accuracy and 48% confidence accuracy. After LoRA fine-tuning on 600 synthetic samples (Traditional Chinese), the model reaches 100% tool accuracy and 99% confidence accuracy on the in-distribution holdout. The two-layer post-processing filter (Schema + Provenance) is retained as a safety net for out-of-distribution inputs.
199
+
200
+ ### Training Details
201
+
202
+ | Item | Value |
203
+ |---|---|
204
+ | Base model | Qwen/Qwen2.5-3B-Instruct |
205
+ | Method | LoRA (r=16, alpha=32, dropout=0.05) |
206
+ | Target modules | q_proj, k_proj, v_proj, o_proj |
207
+ | Training data | 600 synthetic samples (Traditional Chinese) |
208
+ | Validation data | 100 in-distribution holdout samples |
209
+ | Epochs | 3 |
210
+ | Batch size | 2 × grad_accum 4 (effective 8) |
211
+ | Learning rate | 2e-4 (cosine schedule, warmup 5%) |
212
+ | Max length | 1024 |
213
+ | Hardware | Google Colab T4 (15 GB VRAM, fp16) |
214
+ | Training time | ~4.4 hours |
215
+ | Best eval_loss | 0.0051 |
216
+
217
+ ### Methodology Inheritance
218
+
219
+ This model inherits the methodology from [GOSHUNCLE/ic_content_firewall_zh](https://huggingface.co/GOSHUNCLE/ic_content_firewall_zh) (IC design industry content firewall):
220
+
221
+ - Dual-track data synthesis (handwritten seed + template-based expansion)
222
+ - Three-tier evaluation design (base / adapter / adapter+filter)
223
+ - Filter philosophy (Schema validation + Provenance check as healthy minimal set)
224
+ - Open-source minimal disclosure strategy
225
+
226
+ ### License
227
+
228
+ Apache 2.0. See [LICENSE](./LICENSE).
229
+
230
+ ### Citation
231
+
232
+ If this model contributes to your research or product, please cite:
233
+
234
+ ```bibtex
235
+ @misc{tool_call_validator_zh_2026,
236
+ author = {GOSHUNCLE},
237
+ title = {tool_call_validator_zh: Traditional Chinese Tool Call Validator (LoRA fine-tune of Qwen2.5-3B)},
238
+ year = {2026},
239
+ url = {https://huggingface.co/GOSHUNCLE/tool_call_validator_zh},
240
+ }
241
+ ```
REQUIR~1.TXT ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ torch>=2.1
2
+ transformers>=4.45,<5
3
+ peft>=0.13
4
+ accelerate>=0.34
adapter_config.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alora_invocation_tokens": null,
3
+ "alpha_pattern": {},
4
+ "arrow_config": null,
5
+ "auto_mapping": null,
6
+ "base_model_name_or_path": "Qwen/Qwen2.5-3B-Instruct",
7
+ "bias": "none",
8
+ "corda_config": null,
9
+ "ensure_weight_tying": false,
10
+ "eva_config": null,
11
+ "exclude_modules": null,
12
+ "fan_in_fan_out": false,
13
+ "inference_mode": true,
14
+ "init_lora_weights": true,
15
+ "layer_replication": null,
16
+ "layers_pattern": null,
17
+ "layers_to_transform": null,
18
+ "loftq_config": {},
19
+ "lora_alpha": 32,
20
+ "lora_bias": false,
21
+ "lora_dropout": 0.05,
22
+ "lora_ga_config": null,
23
+ "megatron_config": null,
24
+ "megatron_core": "megatron.core",
25
+ "modules_to_save": null,
26
+ "peft_type": "LORA",
27
+ "peft_version": "0.19.1",
28
+ "qalora_group_size": 16,
29
+ "r": 16,
30
+ "rank_pattern": {},
31
+ "revision": null,
32
+ "target_modules": [
33
+ "k_proj",
34
+ "v_proj",
35
+ "q_proj",
36
+ "o_proj"
37
+ ],
38
+ "target_parameters": null,
39
+ "task_type": "CAUSAL_LM",
40
+ "trainable_token_indices": null,
41
+ "use_bdlora": null,
42
+ "use_dora": false,
43
+ "use_qalora": false,
44
+ "use_rslora": false
45
+ }
chat_template.jinja ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- if tools %}
2
+ {{- '<|im_start|>system\n' }}
3
+ {%- if messages[0]['role'] == 'system' %}
4
+ {{- messages[0]['content'] }}
5
+ {%- else %}
6
+ {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}
7
+ {%- endif %}
8
+ {{- "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
9
+ {%- for tool in tools %}
10
+ {{- "\n" }}
11
+ {{- tool | tojson }}
12
+ {%- endfor %}
13
+ {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
14
+ {%- else %}
15
+ {%- if messages[0]['role'] == 'system' %}
16
+ {{- '<|im_start|>system\n' + messages[0]['content'] + '<|im_end|>\n' }}
17
+ {%- else %}
18
+ {{- '<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\n' }}
19
+ {%- endif %}
20
+ {%- endif %}
21
+ {%- for message in messages %}
22
+ {%- if (message.role == "user") or (message.role == "system" and not loop.first) or (message.role == "assistant" and not message.tool_calls) %}
23
+ {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
24
+ {%- elif message.role == "assistant" %}
25
+ {{- '<|im_start|>' + message.role }}
26
+ {%- if message.content %}
27
+ {{- '\n' + message.content }}
28
+ {%- endif %}
29
+ {%- for tool_call in message.tool_calls %}
30
+ {%- if tool_call.function is defined %}
31
+ {%- set tool_call = tool_call.function %}
32
+ {%- endif %}
33
+ {{- '\n<tool_call>\n{"name": "' }}
34
+ {{- tool_call.name }}
35
+ {{- '", "arguments": ' }}
36
+ {{- tool_call.arguments | tojson }}
37
+ {{- '}\n</tool_call>' }}
38
+ {%- endfor %}
39
+ {{- '<|im_end|>\n' }}
40
+ {%- elif message.role == "tool" %}
41
+ {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != "tool") %}
42
+ {{- '<|im_start|>user' }}
43
+ {%- endif %}
44
+ {{- '\n<tool_response>\n' }}
45
+ {{- message.content }}
46
+ {{- '\n</tool_response>' }}
47
+ {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
48
+ {{- '<|im_end|>\n' }}
49
+ {%- endif %}
50
+ {%- endif %}
51
+ {%- endfor %}
52
+ {%- if add_generation_prompt %}
53
+ {{- '<|im_start|>assistant\n' }}
54
+ {%- endif %}
tokenizer_config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "backend": "tokenizers",
4
+ "bos_token": null,
5
+ "clean_up_tokenization_spaces": false,
6
+ "eos_token": "<|im_end|>",
7
+ "errors": "replace",
8
+ "extra_special_tokens": [
9
+ "<|im_start|>",
10
+ "<|im_end|>",
11
+ "<|object_ref_start|>",
12
+ "<|object_ref_end|>",
13
+ "<|box_start|>",
14
+ "<|box_end|>",
15
+ "<|quad_start|>",
16
+ "<|quad_end|>",
17
+ "<|vision_start|>",
18
+ "<|vision_end|>",
19
+ "<|vision_pad|>",
20
+ "<|image_pad|>",
21
+ "<|video_pad|>"
22
+ ],
23
+ "is_local": false,
24
+ "local_files_only": false,
25
+ "model_max_length": 131072,
26
+ "pad_token": "<|endoftext|>",
27
+ "split_special_tokens": false,
28
+ "tokenizer_class": "Qwen2Tokenizer",
29
+ "unk_token": null
30
+ }