examples/basic_tool_call.py · joshuaeric/vllm-tool-calling-guide at main

vllm-tool-calling-guide / examples /basic_tool_call.py

Joshua Odmark

Initial release: VLLM tool calling guide for open source models

634c038 3 days ago

5.8 kB

	#!/usr/bin/env python3
	"""
	Basic Tool Calling with VLLM
	============================

	Minimal working example of tool calling via VLLM's OpenAI-compatible API.
	Works with any model that supports tool calling (Hermes-3, Llama 3.3, Qwen2, Mistral).

	Usage:
	python basic_tool_call.py --url http://localhost:8000 --model NousResearch/Hermes-3-Llama-3.1-70B-FP8
	"""

	import argparse
	import json
	import requests


	def make_tool_call(vllm_url: str, model: str):
	"""Send a tool-enabled chat completion request to VLLM."""

	# Define a simple tool
	tools = [
	{
	"type": "function",
	"function": {
	"name": "get_weather",
	"description": "Get the current weather for a location",
	"parameters": {
	"type": "object",
	"properties": {
	"location": {
	"type": "string",
	"description": "City name, e.g. 'San Francisco'"
	},
	"unit": {
	"type": "string",
	"enum": ["celsius", "fahrenheit"],
	"description": "Temperature unit"
	}
	},
	"required": ["location"]
	}
	}
	}
	]

	# Send request
	payload = {
	"model": model,
	"messages": [
	{
	"role": "user",
	"content": "What's the weather like in San Francisco?"
	}
	],
	"tools": tools,
	"tool_choice": "auto",
	"temperature": 0.1,
	"max_tokens": 500
	}

	response = requests.post(
	f"{vllm_url}/v1/chat/completions",
	json=payload,
	timeout=60
	)
	response.raise_for_status()
	result = response.json()

	# Extract tool calls
	message = result["choices"][0]["message"]

	if "tool_calls" in message and message["tool_calls"]:
	print("Tool calls received:")
	for tc in message["tool_calls"]:
	print(f" Function: {tc['function']['name']}")
	print(f" Arguments: {tc['function']['arguments']}")
	return message["tool_calls"]
	else:
	print("No tool calls — model responded with text:")
	print(f" {message.get('content', '(empty)')}")
	return None


	def multi_turn_tool_call(vllm_url: str, model: str):
	"""Demonstrate a full tool calling conversation: request -> execute -> respond."""

	tools = [
	{
	"type": "function",
	"function": {
	"name": "get_weather",
	"description": "Get the current weather for a location",
	"parameters": {
	"type": "object",
	"properties": {
	"location": {"type": "string"},
	"unit": {"type": "string", "enum": ["celsius", "fahrenheit"]}
	},
	"required": ["location"]
	}
	}
	}
	]

	messages = [
	{"role": "user", "content": "What's the weather in Tokyo and London?"}
	]

	# Turn 1: Get tool calls
	print("\n--- Turn 1: Request ---")
	response = requests.post(
	f"{vllm_url}/v1/chat/completions",
	json={
	"model": model,
	"messages": messages,
	"tools": tools,
	"tool_choice": "auto",
	"temperature": 0.1,
	"max_tokens": 500
	},
	timeout=60
	)
	response.raise_for_status()
	assistant_message = response.json()["choices"][0]["message"]

	if not assistant_message.get("tool_calls"):
	print("Model did not call tools.")
	return

	# Show tool calls
	for tc in assistant_message["tool_calls"]:
	print(f" Tool: {tc['function']['name']}({tc['function']['arguments']})")

	# Add assistant message to conversation
	messages.append(assistant_message)

	# Turn 2: Simulate tool responses
	print("\n--- Turn 2: Tool Responses ---")
	for tc in assistant_message["tool_calls"]:
	# In a real app, you'd execute the actual function here
	fake_result = json.dumps({
	"temperature": 22,
	"unit": "celsius",
	"condition": "Partly cloudy",
	"location": json.loads(tc["function"]["arguments"]).get("location", "Unknown")
	})
	messages.append({
	"role": "tool",
	"tool_call_id": tc["id"],
	"content": fake_result
	})
	print(f" Sent result for {tc['function']['name']}: {fake_result}")

	# Turn 3: Get final response
	print("\n--- Turn 3: Final Response ---")
	response = requests.post(
	f"{vllm_url}/v1/chat/completions",
	json={
	"model": model,
	"messages": messages,
	"tools": tools,
	"temperature": 0.1,
	"max_tokens": 500
	},
	timeout=60
	)
	response.raise_for_status()
	final_message = response.json()["choices"][0]["message"]
	print(f" {final_message.get('content', '(empty)')}")


	if __name__ == "__main__":
	parser = argparse.ArgumentParser(description="Test VLLM tool calling")
	parser.add_argument("--url", default="http://localhost:8000", help="VLLM server URL")
	parser.add_argument("--model", default="NousResearch/Hermes-3-Llama-3.1-70B-FP8", help="Model name")
	args = parser.parse_args()

	print("=" * 60)
	print("Test 1: Single Tool Call")
	print("=" * 60)
	make_tool_call(args.url, args.model)

	print("\n" + "=" * 60)
	print("Test 2: Multi-Turn Tool Calling")
	print("=" * 60)
	multi_turn_tool_call(args.url, args.model)