ss22345 commited on
Commit
701b56b
·
1 Parent(s): 4f59ab4

test: expand tool calling test suite with assertions and new cases

Browse files

Add automated pass/fail assertions (check/check_not helpers) and new
test cases: -tools model suffix, tool_choice=required, tool_choice with
specific function, and streaming regression. Print summary with pass/fail
counts.

Files changed (1) hide show
  1. scripts/test_tool_call.sh +170 -20
scripts/test_tool_call.sh CHANGED
@@ -8,6 +8,31 @@
8
  TOKEN="${1:-free}"
9
  BASE_URL="${2:-http://localhost:8000}"
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  echo "=== 测试 Tool/Function Calling ==="
12
  echo "BASE_URL: $BASE_URL"
13
  echo "TOKEN: ${TOKEN:0:10}..."
@@ -15,7 +40,7 @@ echo ""
15
 
16
  # ===== 测试 1: 带 tools 的流式请求 =====
17
  echo "--- 测试 1: 流式 tool calling ---"
18
- curl -sS "${BASE_URL}/v1/chat/completions" \
19
  -H "Authorization: Bearer ${TOKEN}" \
20
  -H "Content-Type: application/json" \
21
  -d '{
@@ -42,13 +67,17 @@ curl -sS "${BASE_URL}/v1/chat/completions" \
42
  }
43
  }],
44
  "tool_choice": "auto"
45
- }' 2>&1
46
- echo ""
 
 
 
 
47
  echo ""
48
 
49
  # ===== 测试 2: 带 tools 的非流式请求 =====
50
  echo "--- 测试 2: 非流式 tool calling ---"
51
- curl -sS "${BASE_URL}/v1/chat/completions" \
52
  -H "Authorization: Bearer ${TOKEN}" \
53
  -H "Content-Type: application/json" \
54
  -d '{
@@ -75,13 +104,17 @@ curl -sS "${BASE_URL}/v1/chat/completions" \
75
  }
76
  }],
77
  "tool_choice": "auto"
78
- }' 2>&1 | python3 -m json.tool 2>/dev/null || cat
79
- echo ""
 
 
 
 
80
  echo ""
81
 
82
  # ===== 测试 3: 多工具 =====
83
  echo "--- 测试 3: 多工具非流式 ---"
84
- curl -sS "${BASE_URL}/v1/chat/completions" \
85
  -H "Authorization: Bearer ${TOKEN}" \
86
  -H "Content-Type: application/json" \
87
  -d '{
@@ -109,13 +142,16 @@ curl -sS "${BASE_URL}/v1/chat/completions" \
109
  }
110
  ],
111
  "tool_choice": "auto"
112
- }' 2>&1 | python3 -m json.tool 2>/dev/null || cat
113
- echo ""
 
 
 
114
  echo ""
115
 
116
  # ===== 测试 4: 完整多轮对话(tool result 回传)=====
117
  echo "--- 测试 4: 多轮对话 (tool result 回传) ---"
118
- curl -sS "${BASE_URL}/v1/chat/completions" \
119
  -H "Authorization: Bearer ${TOKEN}" \
120
  -H "Content-Type: application/json" \
121
  -d '{
@@ -146,13 +182,16 @@ curl -sS "${BASE_URL}/v1/chat/completions" \
146
  "parameters": {"type": "object", "properties": {"location": {"type": "string"}}, "required": ["location"]}
147
  }
148
  }]
149
- }' 2>&1 | python3 -m json.tool 2>/dev/null || cat
150
- echo ""
 
 
 
151
  echo ""
152
 
153
  # ===== 测试 5: 不带 tools 的普通请求(回归测试)=====
154
  echo "--- 测试 5: 不带 tools 的普通请求(回归)---"
155
- curl -sS "${BASE_URL}/v1/chat/completions" \
156
  -H "Authorization: Bearer ${TOKEN}" \
157
  -H "Content-Type: application/json" \
158
  -d '{
@@ -161,14 +200,125 @@ curl -sS "${BASE_URL}/v1/chat/completions" \
161
  "messages": [
162
  {"role": "user", "content": "你好,1+1等于几?"}
163
  ]
164
- }' 2>&1 | python3 -m json.tool 2>/dev/null || cat
 
 
 
 
165
  echo ""
166
 
167
- echo "=== 测试完成 ==="
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
  echo ""
169
  echo "检查要点:"
170
- echo " 1. 测试 1/2: 查看响应中是否有 tool_calls 字段和 finish_reason=tool_calls"
171
- echo " 2. 测试 3: 是否返回多个 tool_calls"
172
- echo " 3. 测试 4: 模型是否基于 tool result 生成自然语言回复"
173
- echo " 4. 测试 5: 不带 tools 时是否正常返回文本无 tool_calls 字段"
174
- echo " 5. 查看服务端日志中的 [ToolCall] 行,确认上游返回的原始格式"
 
 
 
 
 
 
 
 
8
  TOKEN="${1:-free}"
9
  BASE_URL="${2:-http://localhost:8000}"
10
 
11
+ PASS=0
12
+ FAIL=0
13
+
14
+ check() {
15
+ local desc="$1" output="$2" pattern="$3"
16
+ if echo "$output" | grep -qE "$pattern"; then
17
+ echo " ✓ $desc"
18
+ ((PASS++))
19
+ else
20
+ echo " ✗ $desc (未匹配: $pattern)"
21
+ ((FAIL++))
22
+ fi
23
+ }
24
+
25
+ check_not() {
26
+ local desc="$1" output="$2" pattern="$3"
27
+ if echo "$output" | grep -qE "$pattern"; then
28
+ echo " ✗ $desc (不应包含: $pattern)"
29
+ ((FAIL++))
30
+ else
31
+ echo " ✓ $desc"
32
+ ((PASS++))
33
+ fi
34
+ }
35
+
36
  echo "=== 测试 Tool/Function Calling ==="
37
  echo "BASE_URL: $BASE_URL"
38
  echo "TOKEN: ${TOKEN:0:10}..."
 
40
 
41
  # ===== 测试 1: 带 tools 的流式请求 =====
42
  echo "--- 测试 1: 流式 tool calling ---"
43
+ OUT=$(curl -sS "${BASE_URL}/v1/chat/completions" \
44
  -H "Authorization: Bearer ${TOKEN}" \
45
  -H "Content-Type: application/json" \
46
  -d '{
 
67
  }
68
  }],
69
  "tool_choice": "auto"
70
+ }' 2>&1)
71
+ echo "$OUT" | head -20
72
+ check "包含 tool_calls" "$OUT" '"tool_calls"'
73
+ check "包含函数名 get_weather" "$OUT" '"get_weather"'
74
+ check "finish_reason 为 tool_calls" "$OUT" '"finish_reason"\s*:\s*"tool_calls"'
75
+ check "包含 [DONE]" "$OUT" 'data: \[DONE\]'
76
  echo ""
77
 
78
  # ===== 测试 2: 带 tools 的非流式请求 =====
79
  echo "--- 测试 2: 非流式 tool calling ---"
80
+ OUT=$(curl -sS "${BASE_URL}/v1/chat/completions" \
81
  -H "Authorization: Bearer ${TOKEN}" \
82
  -H "Content-Type: application/json" \
83
  -d '{
 
104
  }
105
  }],
106
  "tool_choice": "auto"
107
+ }' 2>&1)
108
+ echo "$OUT" | python3 -m json.tool 2>/dev/null || echo "$OUT"
109
+ check "包含 tool_calls" "$OUT" '"tool_calls"'
110
+ check "包含函数名 get_weather" "$OUT" '"get_weather"'
111
+ check "finish_reason 为 tool_calls" "$OUT" '"finish_reason"\s*:\s*"tool_calls"'
112
+ check_not "不包含 delta 字段" "$OUT" '"delta"'
113
  echo ""
114
 
115
  # ===== 测试 3: 多工具 =====
116
  echo "--- 测试 3: 多工具非流式 ---"
117
+ OUT=$(curl -sS "${BASE_URL}/v1/chat/completions" \
118
  -H "Authorization: Bearer ${TOKEN}" \
119
  -H "Content-Type: application/json" \
120
  -d '{
 
142
  }
143
  ],
144
  "tool_choice": "auto"
145
+ }' 2>&1)
146
+ echo "$OUT" | python3 -m json.tool 2>/dev/null || echo "$OUT"
147
+ check "包含 tool_calls" "$OUT" '"tool_calls"'
148
+ check "包含 get_weather" "$OUT" '"get_weather"'
149
+ check "包含 get_current_time" "$OUT" '"get_current_time"'
150
  echo ""
151
 
152
  # ===== 测试 4: 完整多轮对话(tool result 回传)=====
153
  echo "--- 测试 4: 多轮对话 (tool result 回传) ---"
154
+ OUT=$(curl -sS "${BASE_URL}/v1/chat/completions" \
155
  -H "Authorization: Bearer ${TOKEN}" \
156
  -H "Content-Type: application/json" \
157
  -d '{
 
182
  "parameters": {"type": "object", "properties": {"location": {"type": "string"}}, "required": ["location"]}
183
  }
184
  }]
185
+ }' 2>&1)
186
+ echo "$OUT" | python3 -m json.tool 2>/dev/null || echo "$OUT"
187
+ check "finish_reason 为 stop" "$OUT" '"finish_reason"\s*:\s*"stop"'
188
+ check "包含 message 字段" "$OUT" '"message"'
189
+ check "包含回复内容 (content 非空)" "$OUT" '"content"'
190
  echo ""
191
 
192
  # ===== 测试 5: 不带 tools 的普通请求(回归测试)=====
193
  echo "--- 测试 5: 不带 tools 的普通请求(回归)---"
194
+ OUT=$(curl -sS "${BASE_URL}/v1/chat/completions" \
195
  -H "Authorization: Bearer ${TOKEN}" \
196
  -H "Content-Type: application/json" \
197
  -d '{
 
200
  "messages": [
201
  {"role": "user", "content": "你好,1+1等于几?"}
202
  ]
203
+ }' 2>&1)
204
+ echo "$OUT" | python3 -m json.tool 2>/dev/null || echo "$OUT"
205
+ check "finish_reason 为 stop" "$OUT" '"finish_reason"\s*:\s*"stop"'
206
+ check_not "不包含 tool_calls" "$OUT" '"tool_calls"'
207
+ check_not "不包含 delta" "$OUT" '"delta"'
208
  echo ""
209
 
210
+ # ===== 测试 6: -tools 模型后缀 =====
211
+ echo "--- 测试 6: -tools 模型后缀 (GLM-4.7-tools) ---"
212
+ OUT=$(curl -sS "${BASE_URL}/v1/chat/completions" \
213
+ -H "Authorization: Bearer ${TOKEN}" \
214
+ -H "Content-Type: application/json" \
215
+ -d '{
216
+ "model": "GLM-4.7-tools",
217
+ "stream": false,
218
+ "messages": [
219
+ {"role": "user", "content": "现在几点了?"}
220
+ ]
221
+ }' 2>&1)
222
+ echo "$OUT" | python3 -m json.tool 2>/dev/null || echo "$OUT"
223
+ check "包含 tool_calls 或正常回复" "$OUT" '"choices"'
224
+ echo "(注意: -tools 模型自动注入内置工具���模型可能调用也可能不调用)"
225
+ echo ""
226
+
227
+ # ===== 测试 7: tool_choice required =====
228
+ echo "--- 测试 7: tool_choice=required ---"
229
+ OUT=$(curl -sS "${BASE_URL}/v1/chat/completions" \
230
+ -H "Authorization: Bearer ${TOKEN}" \
231
+ -H "Content-Type: application/json" \
232
+ -d '{
233
+ "model": "GLM-4.7",
234
+ "stream": false,
235
+ "messages": [
236
+ {"role": "user", "content": "查询北京天气"}
237
+ ],
238
+ "tools": [{
239
+ "type": "function",
240
+ "function": {
241
+ "name": "get_weather",
242
+ "description": "获取指定城市的当前天气信息",
243
+ "parameters": {
244
+ "type": "object",
245
+ "properties": {"location": {"type": "string", "description": "城市名称"}},
246
+ "required": ["location"]
247
+ }
248
+ }
249
+ }],
250
+ "tool_choice": "required"
251
+ }' 2>&1)
252
+ echo "$OUT" | python3 -m json.tool 2>/dev/null || echo "$OUT"
253
+ check "包含 tool_calls" "$OUT" '"tool_calls"'
254
+ check "finish_reason 为 tool_calls" "$OUT" '"finish_reason"\s*:\s*"tool_calls"'
255
+ echo ""
256
+
257
+ # ===== 测试 8: tool_choice 指定具体函数 =====
258
+ echo "--- 测试 8: tool_choice 指定具体函数 ---"
259
+ OUT=$(curl -sS "${BASE_URL}/v1/chat/completions" \
260
+ -H "Authorization: Bearer ${TOKEN}" \
261
+ -H "Content-Type: application/json" \
262
+ -d '{
263
+ "model": "GLM-4.7",
264
+ "stream": false,
265
+ "messages": [
266
+ {"role": "user", "content": "你好"}
267
+ ],
268
+ "tools": [{
269
+ "type": "function",
270
+ "function": {
271
+ "name": "get_weather",
272
+ "description": "获取指定城市的当前天气信息",
273
+ "parameters": {
274
+ "type": "object",
275
+ "properties": {"location": {"type": "string"}},
276
+ "required": ["location"]
277
+ }
278
+ }
279
+ }],
280
+ "tool_choice": {"type": "function", "function": {"name": "get_weather"}}
281
+ }' 2>&1)
282
+ echo "$OUT" | python3 -m json.tool 2>/dev/null || echo "$OUT"
283
+ check "包含 get_weather" "$OUT" '"get_weather"'
284
+ echo ""
285
+
286
+ # ===== 测试 9: 流式普通请求回归 =====
287
+ echo "--- 测试 9: 流式不带 tools(回归)---"
288
+ OUT=$(curl -sS "${BASE_URL}/v1/chat/completions" \
289
+ -H "Authorization: Bearer ${TOKEN}" \
290
+ -H "Content-Type: application/json" \
291
+ -d '{
292
+ "model": "GLM-4.7",
293
+ "stream": true,
294
+ "messages": [
295
+ {"role": "user", "content": "你好"}
296
+ ]
297
+ }' 2>&1)
298
+ echo "$OUT" | head -10
299
+ check "finish_reason 为 stop" "$OUT" '"finish_reason"\s*:\s*"stop"'
300
+ check "包含 [DONE]" "$OUT" 'data: \[DONE\]'
301
+ check_not "不包含 tool_calls" "$OUT" '"tool_calls"'
302
+ echo ""
303
+
304
+ # ===== 汇总 =====
305
+ echo "================================"
306
+ echo "=== 测试汇总 ==="
307
+ echo " 通过: $PASS"
308
+ echo " 失败: $FAIL"
309
+ echo " 总计: $((PASS + FAIL))"
310
+ echo "================================"
311
  echo ""
312
  echo "检查要点:"
313
+ echo " 1. 测试 1/2: 响应中有 tool_calls 字段和 finish_reason=tool_calls"
314
+ echo " 2. 测试 3: 返回多个 tool_calls(get_weather 和 get_current_time)"
315
+ echo " 3. 测试 4: 模型基于 tool result 生成自然语言回复,finish_reason=stop"
316
+ echo " 4. 测试 5/9: 不带 tools 时正常返回文本无 tool_calls 字段"
317
+ echo " 5. 测试 6: -tools 后缀会自动注入内置工具(模型可能触发也可能不触发)"
318
+ echo " 6. 测试 7: tool_choice=required 应强制模型调用工具"
319
+ echo " 7. 测试 8: tool_choice 指定函数名应调用该函数"
320
+ echo " 8. 查看服务端日志中的 [ToolCall] 行,确认上游返回的原始格式"
321
+
322
+ if [ "$FAIL" -gt 0 ]; then
323
+ exit 1
324
+ fi