| | |
| | """ |
| | VLLM Tool Calling Smoke Test |
| | ============================= |
| | |
| | Quick verification that your VLLM instance supports tool calling. |
| | Tests single tool calls, parallel tool calls, and multi-turn conversations. |
| | |
| | Usage: |
| | python test_tool_calling.py --url http://localhost:8000 --model NousResearch/Hermes-3-Llama-3.1-70B-FP8 |
| | """ |
| |
|
| | import argparse |
| | import json |
| | import sys |
| | import requests |
| |
|
| |
|
| | def test_single_tool_call(url: str, model: str) -> bool: |
| | """Test basic single tool call.""" |
| | print("Test 1: Single Tool Call") |
| | print("-" * 40) |
| |
|
| | payload = { |
| | "model": model, |
| | "messages": [ |
| | {"role": "user", "content": "What's the weather in San Francisco? Use the get_weather tool."} |
| | ], |
| | "tools": [{ |
| | "type": "function", |
| | "function": { |
| | "name": "get_weather", |
| | "description": "Get the current weather for a location", |
| | "parameters": { |
| | "type": "object", |
| | "properties": { |
| | "location": {"type": "string", "description": "City name"}, |
| | "unit": {"type": "string", "enum": ["celsius", "fahrenheit"]} |
| | }, |
| | "required": ["location"] |
| | } |
| | } |
| | }], |
| | "tool_choice": "auto", |
| | "temperature": 0.1, |
| | "max_tokens": 500 |
| | } |
| |
|
| | try: |
| | response = requests.post(f"{url}/v1/chat/completions", json=payload, timeout=60) |
| | response.raise_for_status() |
| | result = response.json() |
| | message = result["choices"][0]["message"] |
| |
|
| | if message.get("tool_calls"): |
| | tc = message["tool_calls"][0] |
| | print(f" Tool: {tc['function']['name']}") |
| | print(f" Arguments: {tc['function']['arguments']}") |
| | print(" PASSED\n") |
| | return True |
| | else: |
| | print(f" Model returned text: {message.get('content', '(empty)')[:100]}") |
| | print(" FAILED\n") |
| | return False |
| |
|
| | except Exception as e: |
| | print(f" ERROR: {e}") |
| | print(" FAILED\n") |
| | return False |
| |
|
| |
|
| | def test_parallel_tool_calls(url: str, model: str) -> bool: |
| | """Test if model can make multiple tool calls in one turn.""" |
| | print("Test 2: Parallel Tool Calls") |
| | print("-" * 40) |
| |
|
| | payload = { |
| | "model": model, |
| | "messages": [ |
| | {"role": "user", "content": "Get the weather in Tokyo AND London."} |
| | ], |
| | "tools": [{ |
| | "type": "function", |
| | "function": { |
| | "name": "get_weather", |
| | "description": "Get the current weather for a location", |
| | "parameters": { |
| | "type": "object", |
| | "properties": { |
| | "location": {"type": "string"} |
| | }, |
| | "required": ["location"] |
| | } |
| | } |
| | }], |
| | "tool_choice": "auto", |
| | "temperature": 0.1, |
| | "max_tokens": 500 |
| | } |
| |
|
| | try: |
| | response = requests.post(f"{url}/v1/chat/completions", json=payload, timeout=60) |
| | response.raise_for_status() |
| | result = response.json() |
| | message = result["choices"][0]["message"] |
| |
|
| | if message.get("tool_calls") and len(message["tool_calls"]) >= 2: |
| | for tc in message["tool_calls"]: |
| | print(f" Tool: {tc['function']['name']}({tc['function']['arguments']})") |
| | print(" PASSED\n") |
| | return True |
| | elif message.get("tool_calls"): |
| | print(f" Only {len(message['tool_calls'])} tool call(s) — expected 2+") |
| | print(" PARTIAL (some models serialize parallel calls differently)\n") |
| | return True |
| | else: |
| | print(f" Model returned text: {message.get('content', '(empty)')[:100]}") |
| | print(" FAILED\n") |
| | return False |
| |
|
| | except Exception as e: |
| | print(f" ERROR: {e}") |
| | print(" FAILED\n") |
| | return False |
| |
|
| |
|
| | def test_multi_turn(url: str, model: str) -> bool: |
| | """Test multi-turn: tool call -> tool result -> final answer.""" |
| | print("Test 3: Multi-Turn Conversation") |
| | print("-" * 40) |
| |
|
| | tools = [{ |
| | "type": "function", |
| | "function": { |
| | "name": "get_weather", |
| | "description": "Get weather for a location", |
| | "parameters": { |
| | "type": "object", |
| | "properties": {"location": {"type": "string"}}, |
| | "required": ["location"] |
| | } |
| | } |
| | }] |
| |
|
| | messages = [ |
| | {"role": "user", "content": "What's the weather in Paris?"} |
| | ] |
| |
|
| | try: |
| | |
| | response = requests.post( |
| | f"{url}/v1/chat/completions", |
| | json={"model": model, "messages": messages, "tools": tools, "tool_choice": "auto", |
| | "temperature": 0.1, "max_tokens": 500}, |
| | timeout=60 |
| | ) |
| | response.raise_for_status() |
| | msg1 = response.json()["choices"][0]["message"] |
| |
|
| | if not msg1.get("tool_calls"): |
| | print(" Turn 1: No tool calls") |
| | print(" FAILED\n") |
| | return False |
| |
|
| | print(f" Turn 1: {msg1['tool_calls'][0]['function']['name']}() called") |
| | messages.append(msg1) |
| |
|
| | |
| | messages.append({ |
| | "role": "tool", |
| | "tool_call_id": msg1["tool_calls"][0]["id"], |
| | "content": json.dumps({"temperature": 18, "condition": "Sunny", "unit": "celsius"}) |
| | }) |
| |
|
| | |
| | response = requests.post( |
| | f"{url}/v1/chat/completions", |
| | json={"model": model, "messages": messages, "tools": tools, |
| | "temperature": 0.1, "max_tokens": 500}, |
| | timeout=60 |
| | ) |
| | response.raise_for_status() |
| | msg2 = response.json()["choices"][0]["message"] |
| |
|
| | if msg2.get("content"): |
| | print(f" Turn 2: {msg2['content'][:100]}") |
| | print(" PASSED\n") |
| | return True |
| | else: |
| | print(" Turn 2: Empty response") |
| | print(" FAILED\n") |
| | return False |
| |
|
| | except Exception as e: |
| | print(f" ERROR: {e}") |
| | print(" FAILED\n") |
| | return False |
| |
|
| |
|
| | def test_connection(url: str) -> bool: |
| | """Test basic connectivity to VLLM.""" |
| | print("Test 0: Connection") |
| | print("-" * 40) |
| | try: |
| | response = requests.get(f"{url}/v1/models", timeout=10) |
| | response.raise_for_status() |
| | models = response.json() |
| | model_ids = [m["id"] for m in models.get("data", [])] |
| | print(f" Available models: {', '.join(model_ids)}") |
| | print(" PASSED\n") |
| | return True |
| | except Exception as e: |
| | print(f" Cannot connect to {url}: {e}") |
| | print(" FAILED\n") |
| | return False |
| |
|
| |
|
| | if __name__ == "__main__": |
| | parser = argparse.ArgumentParser(description="VLLM tool calling smoke test") |
| | parser.add_argument("--url", default="http://localhost:8000", help="VLLM server URL") |
| | parser.add_argument("--model", default="NousResearch/Hermes-3-Llama-3.1-70B-FP8") |
| | args = parser.parse_args() |
| |
|
| | print("=" * 60) |
| | print(f"VLLM Tool Calling Smoke Test") |
| | print(f"Server: {args.url}") |
| | print(f"Model: {args.model}") |
| | print("=" * 60 + "\n") |
| |
|
| | results = {} |
| |
|
| | |
| | if not test_connection(args.url): |
| | print("Cannot connect to VLLM server. Is it running?") |
| | sys.exit(1) |
| |
|
| | |
| | results["Single tool call"] = test_single_tool_call(args.url, args.model) |
| | results["Parallel tool calls"] = test_parallel_tool_calls(args.url, args.model) |
| | results["Multi-turn"] = test_multi_turn(args.url, args.model) |
| |
|
| | |
| | print("=" * 60) |
| | print("SUMMARY") |
| | print("=" * 60) |
| | for name, passed in results.items(): |
| | status = "PASSED" if passed else "FAILED" |
| | print(f" {name}: {status}") |
| |
|
| | all_passed = all(results.values()) |
| | print(f"\n{'All tests passed!' if all_passed else 'Some tests failed.'}") |
| |
|
| | if not results.get("Single tool call"): |
| | print("\nTroubleshooting:") |
| | print(" 1. Is --enable-auto-tool-choice set in VLLM launch?") |
| | print(" 2. Is --tool-call-parser set correctly? (hermes/llama3_json/mistral)") |
| | print(" 3. Is --max-model-len large enough? (128K recommended)") |
| | print(" 4. Check VLLM server logs for errors") |
| |
|
| | sys.exit(0 if all_passed else 1) |
| |
|