#!/usr/bin/env python3 """ VLLM Tool Calling Smoke Test ============================= Quick verification that your VLLM instance supports tool calling. Tests single tool calls, parallel tool calls, and multi-turn conversations. Usage: python test_tool_calling.py --url http://localhost:8000 --model NousResearch/Hermes-3-Llama-3.1-70B-FP8 """ import argparse import json import sys import requests def test_single_tool_call(url: str, model: str) -> bool: """Test basic single tool call.""" print("Test 1: Single Tool Call") print("-" * 40) payload = { "model": model, "messages": [ {"role": "user", "content": "What's the weather in San Francisco? Use the get_weather tool."} ], "tools": [{ "type": "function", "function": { "name": "get_weather", "description": "Get the current weather for a location", "parameters": { "type": "object", "properties": { "location": {"type": "string", "description": "City name"}, "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]} }, "required": ["location"] } } }], "tool_choice": "auto", "temperature": 0.1, "max_tokens": 500 } try: response = requests.post(f"{url}/v1/chat/completions", json=payload, timeout=60) response.raise_for_status() result = response.json() message = result["choices"][0]["message"] if message.get("tool_calls"): tc = message["tool_calls"][0] print(f" Tool: {tc['function']['name']}") print(f" Arguments: {tc['function']['arguments']}") print(" PASSED\n") return True else: print(f" Model returned text: {message.get('content', '(empty)')[:100]}") print(" FAILED\n") return False except Exception as e: print(f" ERROR: {e}") print(" FAILED\n") return False def test_parallel_tool_calls(url: str, model: str) -> bool: """Test if model can make multiple tool calls in one turn.""" print("Test 2: Parallel Tool Calls") print("-" * 40) payload = { "model": model, "messages": [ {"role": "user", "content": "Get the weather in Tokyo AND London."} ], "tools": [{ "type": "function", "function": { "name": "get_weather", "description": "Get the current weather for a location", "parameters": { "type": "object", "properties": { "location": {"type": "string"} }, "required": ["location"] } } }], "tool_choice": "auto", "temperature": 0.1, "max_tokens": 500 } try: response = requests.post(f"{url}/v1/chat/completions", json=payload, timeout=60) response.raise_for_status() result = response.json() message = result["choices"][0]["message"] if message.get("tool_calls") and len(message["tool_calls"]) >= 2: for tc in message["tool_calls"]: print(f" Tool: {tc['function']['name']}({tc['function']['arguments']})") print(" PASSED\n") return True elif message.get("tool_calls"): print(f" Only {len(message['tool_calls'])} tool call(s) — expected 2+") print(" PARTIAL (some models serialize parallel calls differently)\n") return True # Still counts as working else: print(f" Model returned text: {message.get('content', '(empty)')[:100]}") print(" FAILED\n") return False except Exception as e: print(f" ERROR: {e}") print(" FAILED\n") return False def test_multi_turn(url: str, model: str) -> bool: """Test multi-turn: tool call -> tool result -> final answer.""" print("Test 3: Multi-Turn Conversation") print("-" * 40) tools = [{ "type": "function", "function": { "name": "get_weather", "description": "Get weather for a location", "parameters": { "type": "object", "properties": {"location": {"type": "string"}}, "required": ["location"] } } }] messages = [ {"role": "user", "content": "What's the weather in Paris?"} ] try: # Turn 1: Get tool call response = requests.post( f"{url}/v1/chat/completions", json={"model": model, "messages": messages, "tools": tools, "tool_choice": "auto", "temperature": 0.1, "max_tokens": 500}, timeout=60 ) response.raise_for_status() msg1 = response.json()["choices"][0]["message"] if not msg1.get("tool_calls"): print(" Turn 1: No tool calls") print(" FAILED\n") return False print(f" Turn 1: {msg1['tool_calls'][0]['function']['name']}() called") messages.append(msg1) # Add tool result messages.append({ "role": "tool", "tool_call_id": msg1["tool_calls"][0]["id"], "content": json.dumps({"temperature": 18, "condition": "Sunny", "unit": "celsius"}) }) # Turn 2: Get final answer response = requests.post( f"{url}/v1/chat/completions", json={"model": model, "messages": messages, "tools": tools, "temperature": 0.1, "max_tokens": 500}, timeout=60 ) response.raise_for_status() msg2 = response.json()["choices"][0]["message"] if msg2.get("content"): print(f" Turn 2: {msg2['content'][:100]}") print(" PASSED\n") return True else: print(" Turn 2: Empty response") print(" FAILED\n") return False except Exception as e: print(f" ERROR: {e}") print(" FAILED\n") return False def test_connection(url: str) -> bool: """Test basic connectivity to VLLM.""" print("Test 0: Connection") print("-" * 40) try: response = requests.get(f"{url}/v1/models", timeout=10) response.raise_for_status() models = response.json() model_ids = [m["id"] for m in models.get("data", [])] print(f" Available models: {', '.join(model_ids)}") print(" PASSED\n") return True except Exception as e: print(f" Cannot connect to {url}: {e}") print(" FAILED\n") return False if __name__ == "__main__": parser = argparse.ArgumentParser(description="VLLM tool calling smoke test") parser.add_argument("--url", default="http://localhost:8000", help="VLLM server URL") parser.add_argument("--model", default="NousResearch/Hermes-3-Llama-3.1-70B-FP8") args = parser.parse_args() print("=" * 60) print(f"VLLM Tool Calling Smoke Test") print(f"Server: {args.url}") print(f"Model: {args.model}") print("=" * 60 + "\n") results = {} # Test connectivity first if not test_connection(args.url): print("Cannot connect to VLLM server. Is it running?") sys.exit(1) # Run tests results["Single tool call"] = test_single_tool_call(args.url, args.model) results["Parallel tool calls"] = test_parallel_tool_calls(args.url, args.model) results["Multi-turn"] = test_multi_turn(args.url, args.model) # Summary print("=" * 60) print("SUMMARY") print("=" * 60) for name, passed in results.items(): status = "PASSED" if passed else "FAILED" print(f" {name}: {status}") all_passed = all(results.values()) print(f"\n{'All tests passed!' if all_passed else 'Some tests failed.'}") if not results.get("Single tool call"): print("\nTroubleshooting:") print(" 1. Is --enable-auto-tool-choice set in VLLM launch?") print(" 2. Is --tool-call-parser set correctly? (hermes/llama3_json/mistral)") print(" 3. Is --max-model-len large enough? (128K recommended)") print(" 4. Check VLLM server logs for errors") sys.exit(0 if all_passed else 1)