WebWorld-8B-Onnx / api_client.py
Prince-1's picture
Add files using upload-large-folder tool
5abb996 verified
"""
Client for Qwen ONNX Model API
Use this to interact with the api_server.py
"""
import requests
import json
from typing import List, Dict, Optional
class QwenAPIClient:
def __init__(self, base_url: str = "http://localhost:8000"):
"""
Initialize the API client.
Args:
base_url: Base URL of the API server (default: localhost:8000)
"""
self.base_url = base_url.rstrip("/")
self.session = requests.Session()
def health_check(self) -> Dict:
"""Check if the API is healthy"""
response = self.session.get(f"{self.base_url}/health")
response.raise_for_status()
return response.json()
def generate(
self,
prompt: str,
max_length: int = 100,
temperature: float = 0.6,
top_p: float = 0.95,
top_k: int = 20
) -> Dict:
"""
Generate text from a prompt.
Args:
prompt: Input prompt
max_length: Maximum tokens to generate
temperature: Sampling temperature
top_p: Top-p sampling parameter
top_k: Top-k sampling parameter
Returns:
Response with generated text
"""
payload = {
"prompt": prompt,
"max_length": max_length,
"temperature": temperature,
"top_p": top_p,
"top_k": top_k
}
response = self.session.post(
f"{self.base_url}/generate",
json=payload
)
response.raise_for_status()
return response.json()
def chat(
self,
messages: List[Dict[str, str]],
max_length: int = 200,
temperature: float = 0.6,
top_p: float = 0.95,
top_k: int = 20
) -> Dict:
"""
Chat with the model.
Args:
messages: List of message dicts with 'role' and 'content'
max_length: Maximum tokens to generate
temperature: Sampling temperature
top_p: Top-p sampling parameter
top_k: Top-k sampling parameter
Returns:
Response with assistant message
"""
payload = {
"messages": messages,
"max_length": max_length,
"temperature": temperature,
"top_p": top_p,
"top_k": top_k
}
response = self.session.post(
f"{self.base_url}/chat",
json=payload
)
response.raise_for_status()
return response.json()
def tokenize(self, text: str) -> Dict:
"""
Tokenize text.
Args:
text: Text to tokenize
Returns:
Response with token IDs
"""
payload = {"text": text}
response = self.session.post(
f"{self.base_url}/tokenize",
json=payload
)
response.raise_for_status()
return response.json()
def model_info(self) -> Dict:
"""Get model information"""
response = self.session.get(f"{self.base_url}/info")
response.raise_for_status()
return response.json()
def main():
"""Example usage of the API client"""
# Initialize client
client = QwenAPIClient("http://localhost:8000")
# Check health
print("Checking API health...")
try:
health = client.health_check()
print(f"Status: {health['status']}\n")
except requests.exceptions.ConnectionError:
print("ERROR: Could not connect to API server.")
print("Make sure to run: python api_server.py")
return
# Example 1: Generate text
print("="*60)
print("Example 1: Text Generation")
print("="*60)
try:
result = client.generate(
prompt="What is artificial intelligence?",
max_length=150,
temperature=0.7
)
print(f"Prompt: {result['prompt']}")
print(f"Response: {result['generated_text']}\n")
except Exception as e:
print(f"Error: {e}\n")
# Example 2: Chat
print("="*60)
print("Example 2: Chat")
print("="*60)
try:
messages = [
{"role": "system", "content": "You are a helpful Python assistant."},
{"role": "user", "content": "How do I read a file in Python?"}
]
result = client.chat(messages, max_length=200)
print(f"User: {messages[-1]['content']}")
print(f"Assistant: {result['assistant_response']}\n")
except Exception as e:
print(f"Error: {e}\n")
# Example 3: Multi-turn conversation
print("="*60)
print("Example 3: Multi-turn Chat")
print("="*60)
try:
conversation = [
{"role": "system", "content": "You are an expert programmer."}
]
# Turn 1
conversation.append({"role": "user", "content": "What is recursion?"})
result1 = client.chat(conversation, max_length=150)
response1 = result1['assistant_response']
print(f"User: {conversation[-1]['content']}")
print(f"Assistant: {response1}\n")
# Add to conversation and continue
conversation.append({"role": "assistant", "content": response1})
conversation.append({"role": "user", "content": "Can you give a code example?"})
result2 = client.chat(conversation, max_length=200)
response2 = result2['assistant_response']
print(f"User: {conversation[-1]['content']}")
print(f"Assistant: {response2}\n")
except Exception as e:
print(f"Error: {e}\n")
# Example 4: Tokenization
print("="*60)
print("Example 4: Tokenization")
print("="*60)
try:
result = client.tokenize("Hello, world!")
print(f"Text: {result['text']}")
print(f"Tokens: {result['token_ids']}")
print(f"Number of tokens: {result['num_tokens']}\n")
except Exception as e:
print(f"Error: {e}\n")
# Example 5: Model info
print("="*60)
print("Example 5: Model Information")
print("="*60)
try:
info = client.model_info()
print(f"Model Type: {info['model_type']}")
print(f"Context Length: {info['context_length']}")
print(f"Vocabulary Size: {info['vocab_size']}")
print(f"Default Max Length: {info['default_max_length']}")
print(f"Default Temperature: {info['default_temperature']}\n")
except Exception as e:
print(f"Error: {e}\n")
# Interactive chat mode
print("="*60)
print("Interactive Chat Mode")
print("="*60)
print("Enter 'quit' to exit\n")
conversation = [
{"role": "system", "content": "You are a helpful assistant."}
]
while True:
user_input = input("You: ").strip()
if user_input.lower() == "quit":
break
if not user_input:
continue
try:
conversation.append({"role": "user", "content": user_input})
result = client.chat(conversation, max_length=300, temperature=0.7)
response = result['assistant_response']
print(f"Assistant: {response}\n")
conversation.append({"role": "assistant", "content": response})
except Exception as e:
print(f"Error: {e}\n")
if __name__ == "__main__":
main()