vllm-tool-calling-guide / examples /test_tool_calling.py
Joshua Odmark
Initial release: VLLM tool calling guide for open source models
634c038
#!/usr/bin/env python3
"""
VLLM Tool Calling Smoke Test
=============================
Quick verification that your VLLM instance supports tool calling.
Tests single tool calls, parallel tool calls, and multi-turn conversations.
Usage:
python test_tool_calling.py --url http://localhost:8000 --model NousResearch/Hermes-3-Llama-3.1-70B-FP8
"""
import argparse
import json
import sys
import requests
def test_single_tool_call(url: str, model: str) -> bool:
"""Test basic single tool call."""
print("Test 1: Single Tool Call")
print("-" * 40)
payload = {
"model": model,
"messages": [
{"role": "user", "content": "What's the weather in San Francisco? Use the get_weather tool."}
],
"tools": [{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get the current weather for a location",
"parameters": {
"type": "object",
"properties": {
"location": {"type": "string", "description": "City name"},
"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]}
},
"required": ["location"]
}
}
}],
"tool_choice": "auto",
"temperature": 0.1,
"max_tokens": 500
}
try:
response = requests.post(f"{url}/v1/chat/completions", json=payload, timeout=60)
response.raise_for_status()
result = response.json()
message = result["choices"][0]["message"]
if message.get("tool_calls"):
tc = message["tool_calls"][0]
print(f" Tool: {tc['function']['name']}")
print(f" Arguments: {tc['function']['arguments']}")
print(" PASSED\n")
return True
else:
print(f" Model returned text: {message.get('content', '(empty)')[:100]}")
print(" FAILED\n")
return False
except Exception as e:
print(f" ERROR: {e}")
print(" FAILED\n")
return False
def test_parallel_tool_calls(url: str, model: str) -> bool:
"""Test if model can make multiple tool calls in one turn."""
print("Test 2: Parallel Tool Calls")
print("-" * 40)
payload = {
"model": model,
"messages": [
{"role": "user", "content": "Get the weather in Tokyo AND London."}
],
"tools": [{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get the current weather for a location",
"parameters": {
"type": "object",
"properties": {
"location": {"type": "string"}
},
"required": ["location"]
}
}
}],
"tool_choice": "auto",
"temperature": 0.1,
"max_tokens": 500
}
try:
response = requests.post(f"{url}/v1/chat/completions", json=payload, timeout=60)
response.raise_for_status()
result = response.json()
message = result["choices"][0]["message"]
if message.get("tool_calls") and len(message["tool_calls"]) >= 2:
for tc in message["tool_calls"]:
print(f" Tool: {tc['function']['name']}({tc['function']['arguments']})")
print(" PASSED\n")
return True
elif message.get("tool_calls"):
print(f" Only {len(message['tool_calls'])} tool call(s) — expected 2+")
print(" PARTIAL (some models serialize parallel calls differently)\n")
return True # Still counts as working
else:
print(f" Model returned text: {message.get('content', '(empty)')[:100]}")
print(" FAILED\n")
return False
except Exception as e:
print(f" ERROR: {e}")
print(" FAILED\n")
return False
def test_multi_turn(url: str, model: str) -> bool:
"""Test multi-turn: tool call -> tool result -> final answer."""
print("Test 3: Multi-Turn Conversation")
print("-" * 40)
tools = [{
"type": "function",
"function": {
"name": "get_weather",
"description": "Get weather for a location",
"parameters": {
"type": "object",
"properties": {"location": {"type": "string"}},
"required": ["location"]
}
}
}]
messages = [
{"role": "user", "content": "What's the weather in Paris?"}
]
try:
# Turn 1: Get tool call
response = requests.post(
f"{url}/v1/chat/completions",
json={"model": model, "messages": messages, "tools": tools, "tool_choice": "auto",
"temperature": 0.1, "max_tokens": 500},
timeout=60
)
response.raise_for_status()
msg1 = response.json()["choices"][0]["message"]
if not msg1.get("tool_calls"):
print(" Turn 1: No tool calls")
print(" FAILED\n")
return False
print(f" Turn 1: {msg1['tool_calls'][0]['function']['name']}() called")
messages.append(msg1)
# Add tool result
messages.append({
"role": "tool",
"tool_call_id": msg1["tool_calls"][0]["id"],
"content": json.dumps({"temperature": 18, "condition": "Sunny", "unit": "celsius"})
})
# Turn 2: Get final answer
response = requests.post(
f"{url}/v1/chat/completions",
json={"model": model, "messages": messages, "tools": tools,
"temperature": 0.1, "max_tokens": 500},
timeout=60
)
response.raise_for_status()
msg2 = response.json()["choices"][0]["message"]
if msg2.get("content"):
print(f" Turn 2: {msg2['content'][:100]}")
print(" PASSED\n")
return True
else:
print(" Turn 2: Empty response")
print(" FAILED\n")
return False
except Exception as e:
print(f" ERROR: {e}")
print(" FAILED\n")
return False
def test_connection(url: str) -> bool:
"""Test basic connectivity to VLLM."""
print("Test 0: Connection")
print("-" * 40)
try:
response = requests.get(f"{url}/v1/models", timeout=10)
response.raise_for_status()
models = response.json()
model_ids = [m["id"] for m in models.get("data", [])]
print(f" Available models: {', '.join(model_ids)}")
print(" PASSED\n")
return True
except Exception as e:
print(f" Cannot connect to {url}: {e}")
print(" FAILED\n")
return False
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="VLLM tool calling smoke test")
parser.add_argument("--url", default="http://localhost:8000", help="VLLM server URL")
parser.add_argument("--model", default="NousResearch/Hermes-3-Llama-3.1-70B-FP8")
args = parser.parse_args()
print("=" * 60)
print(f"VLLM Tool Calling Smoke Test")
print(f"Server: {args.url}")
print(f"Model: {args.model}")
print("=" * 60 + "\n")
results = {}
# Test connectivity first
if not test_connection(args.url):
print("Cannot connect to VLLM server. Is it running?")
sys.exit(1)
# Run tests
results["Single tool call"] = test_single_tool_call(args.url, args.model)
results["Parallel tool calls"] = test_parallel_tool_calls(args.url, args.model)
results["Multi-turn"] = test_multi_turn(args.url, args.model)
# Summary
print("=" * 60)
print("SUMMARY")
print("=" * 60)
for name, passed in results.items():
status = "PASSED" if passed else "FAILED"
print(f" {name}: {status}")
all_passed = all(results.values())
print(f"\n{'All tests passed!' if all_passed else 'Some tests failed.'}")
if not results.get("Single tool call"):
print("\nTroubleshooting:")
print(" 1. Is --enable-auto-tool-choice set in VLLM launch?")
print(" 2. Is --tool-call-parser set correctly? (hermes/llama3_json/mistral)")
print(" 3. Is --max-model-len large enough? (128K recommended)")
print(" 4. Check VLLM server logs for errors")
sys.exit(0 if all_passed else 1)