| """ |
| Client for Qwen ONNX Model API |
| Use this to interact with the api_server.py |
| """ |
|
|
| import requests |
| import json |
| from typing import List, Dict, Optional |
|
|
| class QwenAPIClient: |
| def __init__(self, base_url: str = "http://localhost:8000"): |
| """ |
| Initialize the API client. |
| |
| Args: |
| base_url: Base URL of the API server (default: localhost:8000) |
| """ |
| self.base_url = base_url.rstrip("/") |
| self.session = requests.Session() |
|
|
| def health_check(self) -> Dict: |
| """Check if the API is healthy""" |
| response = self.session.get(f"{self.base_url}/health") |
| response.raise_for_status() |
| return response.json() |
|
|
| def generate( |
| self, |
| prompt: str, |
| max_length: int = 100, |
| temperature: float = 0.6, |
| top_p: float = 0.95, |
| top_k: int = 20 |
| ) -> Dict: |
| """ |
| Generate text from a prompt. |
| |
| Args: |
| prompt: Input prompt |
| max_length: Maximum tokens to generate |
| temperature: Sampling temperature |
| top_p: Top-p sampling parameter |
| top_k: Top-k sampling parameter |
| |
| Returns: |
| Response with generated text |
| """ |
| payload = { |
| "prompt": prompt, |
| "max_length": max_length, |
| "temperature": temperature, |
| "top_p": top_p, |
| "top_k": top_k |
| } |
|
|
| response = self.session.post( |
| f"{self.base_url}/generate", |
| json=payload |
| ) |
| response.raise_for_status() |
| return response.json() |
|
|
| def chat( |
| self, |
| messages: List[Dict[str, str]], |
| max_length: int = 200, |
| temperature: float = 0.6, |
| top_p: float = 0.95, |
| top_k: int = 20 |
| ) -> Dict: |
| """ |
| Chat with the model. |
| |
| Args: |
| messages: List of message dicts with 'role' and 'content' |
| max_length: Maximum tokens to generate |
| temperature: Sampling temperature |
| top_p: Top-p sampling parameter |
| top_k: Top-k sampling parameter |
| |
| Returns: |
| Response with assistant message |
| """ |
| payload = { |
| "messages": messages, |
| "max_length": max_length, |
| "temperature": temperature, |
| "top_p": top_p, |
| "top_k": top_k |
| } |
|
|
| response = self.session.post( |
| f"{self.base_url}/chat", |
| json=payload |
| ) |
| response.raise_for_status() |
| return response.json() |
|
|
| def tokenize(self, text: str) -> Dict: |
| """ |
| Tokenize text. |
| |
| Args: |
| text: Text to tokenize |
| |
| Returns: |
| Response with token IDs |
| """ |
| payload = {"text": text} |
|
|
| response = self.session.post( |
| f"{self.base_url}/tokenize", |
| json=payload |
| ) |
| response.raise_for_status() |
| return response.json() |
|
|
| def model_info(self) -> Dict: |
| """Get model information""" |
| response = self.session.get(f"{self.base_url}/info") |
| response.raise_for_status() |
| return response.json() |
|
|
|
|
| def main(): |
| """Example usage of the API client""" |
|
|
| |
| client = QwenAPIClient("http://localhost:8000") |
|
|
| |
| print("Checking API health...") |
| try: |
| health = client.health_check() |
| print(f"Status: {health['status']}\n") |
| except requests.exceptions.ConnectionError: |
| print("ERROR: Could not connect to API server.") |
| print("Make sure to run: python api_server.py") |
| return |
|
|
| |
| print("="*60) |
| print("Example 1: Text Generation") |
| print("="*60) |
|
|
| try: |
| result = client.generate( |
| prompt="What is artificial intelligence?", |
| max_length=150, |
| temperature=0.7 |
| ) |
| print(f"Prompt: {result['prompt']}") |
| print(f"Response: {result['generated_text']}\n") |
| except Exception as e: |
| print(f"Error: {e}\n") |
|
|
| |
| print("="*60) |
| print("Example 2: Chat") |
| print("="*60) |
|
|
| try: |
| messages = [ |
| {"role": "system", "content": "You are a helpful Python assistant."}, |
| {"role": "user", "content": "How do I read a file in Python?"} |
| ] |
|
|
| result = client.chat(messages, max_length=200) |
| print(f"User: {messages[-1]['content']}") |
| print(f"Assistant: {result['assistant_response']}\n") |
|
|
| except Exception as e: |
| print(f"Error: {e}\n") |
|
|
| |
| print("="*60) |
| print("Example 3: Multi-turn Chat") |
| print("="*60) |
|
|
| try: |
| conversation = [ |
| {"role": "system", "content": "You are an expert programmer."} |
| ] |
|
|
| |
| conversation.append({"role": "user", "content": "What is recursion?"}) |
| result1 = client.chat(conversation, max_length=150) |
| response1 = result1['assistant_response'] |
| print(f"User: {conversation[-1]['content']}") |
| print(f"Assistant: {response1}\n") |
|
|
| |
| conversation.append({"role": "assistant", "content": response1}) |
| conversation.append({"role": "user", "content": "Can you give a code example?"}) |
|
|
| result2 = client.chat(conversation, max_length=200) |
| response2 = result2['assistant_response'] |
| print(f"User: {conversation[-1]['content']}") |
| print(f"Assistant: {response2}\n") |
|
|
| except Exception as e: |
| print(f"Error: {e}\n") |
|
|
| |
| print("="*60) |
| print("Example 4: Tokenization") |
| print("="*60) |
|
|
| try: |
| result = client.tokenize("Hello, world!") |
| print(f"Text: {result['text']}") |
| print(f"Tokens: {result['token_ids']}") |
| print(f"Number of tokens: {result['num_tokens']}\n") |
|
|
| except Exception as e: |
| print(f"Error: {e}\n") |
|
|
| |
| print("="*60) |
| print("Example 5: Model Information") |
| print("="*60) |
|
|
| try: |
| info = client.model_info() |
| print(f"Model Type: {info['model_type']}") |
| print(f"Context Length: {info['context_length']}") |
| print(f"Vocabulary Size: {info['vocab_size']}") |
| print(f"Default Max Length: {info['default_max_length']}") |
| print(f"Default Temperature: {info['default_temperature']}\n") |
|
|
| except Exception as e: |
| print(f"Error: {e}\n") |
|
|
| |
| print("="*60) |
| print("Interactive Chat Mode") |
| print("="*60) |
| print("Enter 'quit' to exit\n") |
|
|
| conversation = [ |
| {"role": "system", "content": "You are a helpful assistant."} |
| ] |
|
|
| while True: |
| user_input = input("You: ").strip() |
|
|
| if user_input.lower() == "quit": |
| break |
|
|
| if not user_input: |
| continue |
|
|
| try: |
| conversation.append({"role": "user", "content": user_input}) |
|
|
| result = client.chat(conversation, max_length=300, temperature=0.7) |
| response = result['assistant_response'] |
|
|
| print(f"Assistant: {response}\n") |
|
|
| conversation.append({"role": "assistant", "content": response}) |
|
|
| except Exception as e: |
| print(f"Error: {e}\n") |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|