api_client.py · onnx-community/WebWorld-8B-Onnx at main

Add files using upload-large-folder tool

5abb996 verified 11 days ago

7.33 kB

	"""
	Client for Qwen ONNX Model API
	Use this to interact with the api_server.py
	"""

	import requests
	import json
	from typing import List, Dict, Optional

	class QwenAPIClient:
	def __init__(self, base_url: str = "http://localhost:8000"):
	"""
	Initialize the API client.

	Args:
	base_url: Base URL of the API server (default: localhost:8000)
	"""
	self.base_url = base_url.rstrip("/")
	self.session = requests.Session()

	def health_check(self) -> Dict:
	"""Check if the API is healthy"""
	response = self.session.get(f"{self.base_url}/health")
	response.raise_for_status()
	return response.json()

	def generate(
	self,
	prompt: str,
	max_length: int = 100,
	temperature: float = 0.6,
	top_p: float = 0.95,
	top_k: int = 20
	) -> Dict:
	"""
	Generate text from a prompt.

	Args:
	prompt: Input prompt
	max_length: Maximum tokens to generate
	temperature: Sampling temperature
	top_p: Top-p sampling parameter
	top_k: Top-k sampling parameter

	Returns:
	Response with generated text
	"""
	payload = {
	"prompt": prompt,
	"max_length": max_length,
	"temperature": temperature,
	"top_p": top_p,
	"top_k": top_k
	}

	response = self.session.post(
	f"{self.base_url}/generate",
	json=payload
	)
	response.raise_for_status()
	return response.json()

	def chat(
	self,
	messages: List[Dict[str, str]],
	max_length: int = 200,
	temperature: float = 0.6,
	top_p: float = 0.95,
	top_k: int = 20
	) -> Dict:
	"""
	Chat with the model.

	Args:
	messages: List of message dicts with 'role' and 'content'
	max_length: Maximum tokens to generate
	temperature: Sampling temperature
	top_p: Top-p sampling parameter
	top_k: Top-k sampling parameter

	Returns:
	Response with assistant message
	"""
	payload = {
	"messages": messages,
	"max_length": max_length,
	"temperature": temperature,
	"top_p": top_p,
	"top_k": top_k
	}

	response = self.session.post(
	f"{self.base_url}/chat",
	json=payload
	)
	response.raise_for_status()
	return response.json()

	def tokenize(self, text: str) -> Dict:
	"""
	Tokenize text.

	Args:
	text: Text to tokenize

	Returns:
	Response with token IDs
	"""
	payload = {"text": text}

	response = self.session.post(
	f"{self.base_url}/tokenize",
	json=payload
	)
	response.raise_for_status()
	return response.json()

	def model_info(self) -> Dict:
	"""Get model information"""
	response = self.session.get(f"{self.base_url}/info")
	response.raise_for_status()
	return response.json()


	def main():
	"""Example usage of the API client"""

	# Initialize client
	client = QwenAPIClient("http://localhost:8000")

	# Check health
	print("Checking API health...")
	try:
	health = client.health_check()
	print(f"Status: {health['status']}\n")
	except requests.exceptions.ConnectionError:
	print("ERROR: Could not connect to API server.")
	print("Make sure to run: python api_server.py")
	return

	# Example 1: Generate text
	print("="*60)
	print("Example 1: Text Generation")
	print("="*60)

	try:
	result = client.generate(
	prompt="What is artificial intelligence?",
	max_length=150,
	temperature=0.7
	)
	print(f"Prompt: {result['prompt']}")
	print(f"Response: {result['generated_text']}\n")
	except Exception as e:
	print(f"Error: {e}\n")

	# Example 2: Chat
	print("="*60)
	print("Example 2: Chat")
	print("="*60)

	try:
	messages = [
	{"role": "system", "content": "You are a helpful Python assistant."},
	{"role": "user", "content": "How do I read a file in Python?"}
	]

	result = client.chat(messages, max_length=200)
	print(f"User: {messages[-1]['content']}")
	print(f"Assistant: {result['assistant_response']}\n")

	except Exception as e:
	print(f"Error: {e}\n")

	# Example 3: Multi-turn conversation
	print("="*60)
	print("Example 3: Multi-turn Chat")
	print("="*60)

	try:
	conversation = [
	{"role": "system", "content": "You are an expert programmer."}
	]

	# Turn 1
	conversation.append({"role": "user", "content": "What is recursion?"})
	result1 = client.chat(conversation, max_length=150)
	response1 = result1['assistant_response']
	print(f"User: {conversation[-1]['content']}")
	print(f"Assistant: {response1}\n")

	# Add to conversation and continue
	conversation.append({"role": "assistant", "content": response1})
	conversation.append({"role": "user", "content": "Can you give a code example?"})

	result2 = client.chat(conversation, max_length=200)
	response2 = result2['assistant_response']
	print(f"User: {conversation[-1]['content']}")
	print(f"Assistant: {response2}\n")

	except Exception as e:
	print(f"Error: {e}\n")

	# Example 4: Tokenization
	print("="*60)
	print("Example 4: Tokenization")
	print("="*60)

	try:
	result = client.tokenize("Hello, world!")
	print(f"Text: {result['text']}")
	print(f"Tokens: {result['token_ids']}")
	print(f"Number of tokens: {result['num_tokens']}\n")

	except Exception as e:
	print(f"Error: {e}\n")

	# Example 5: Model info
	print("="*60)
	print("Example 5: Model Information")
	print("="*60)

	try:
	info = client.model_info()
	print(f"Model Type: {info['model_type']}")
	print(f"Context Length: {info['context_length']}")
	print(f"Vocabulary Size: {info['vocab_size']}")
	print(f"Default Max Length: {info['default_max_length']}")
	print(f"Default Temperature: {info['default_temperature']}\n")

	except Exception as e:
	print(f"Error: {e}\n")

	# Interactive chat mode
	print("="*60)
	print("Interactive Chat Mode")
	print("="*60)
	print("Enter 'quit' to exit\n")

	conversation = [
	{"role": "system", "content": "You are a helpful assistant."}
	]

	while True:
	user_input = input("You: ").strip()

	if user_input.lower() == "quit":
	break

	if not user_input:
	continue

	try:
	conversation.append({"role": "user", "content": user_input})

	result = client.chat(conversation, max_length=300, temperature=0.7)
	response = result['assistant_response']

	print(f"Assistant: {response}\n")

	conversation.append({"role": "assistant", "content": response})

	except Exception as e:
	print(f"Error: {e}\n")


	if __name__ == "__main__":
	main()