"""
Client for Qwen ONNX Model API
Use this to interact with the api_server.py
"""

import requests
import json
from typing import List, Dict, Optional

class QwenAPIClient:
    def __init__(self, base_url: str = "http://localhost:8000"):
        """
        Initialize the API client.

        Args:
            base_url: Base URL of the API server (default: localhost:8000)
        """
        self.base_url = base_url.rstrip("/")
        self.session = requests.Session()

    def health_check(self) -> Dict:
        """Check if the API is healthy"""
        response = self.session.get(f"{self.base_url}/health")
        response.raise_for_status()
        return response.json()

    def generate(
        self,
        prompt: str,
        max_length: int = 100,
        temperature: float = 0.6,
        top_p: float = 0.95,
        top_k: int = 20
    ) -> Dict:
        """
        Generate text from a prompt.

        Args:
            prompt: Input prompt
            max_length: Maximum tokens to generate
            temperature: Sampling temperature
            top_p: Top-p sampling parameter
            top_k: Top-k sampling parameter

        Returns:
            Response with generated text
        """
        payload = {
            "prompt": prompt,
            "max_length": max_length,
            "temperature": temperature,
            "top_p": top_p,
            "top_k": top_k
        }

        response = self.session.post(
            f"{self.base_url}/generate",
            json=payload
        )
        response.raise_for_status()
        return response.json()

    def chat(
        self,
        messages: List[Dict[str, str]],
        max_length: int = 200,
        temperature: float = 0.6,
        top_p: float = 0.95,
        top_k: int = 20
    ) -> Dict:
        """
        Chat with the model.

        Args:
            messages: List of message dicts with 'role' and 'content'
            max_length: Maximum tokens to generate
            temperature: Sampling temperature
            top_p: Top-p sampling parameter
            top_k: Top-k sampling parameter

        Returns:
            Response with assistant message
        """
        payload = {
            "messages": messages,
            "max_length": max_length,
            "temperature": temperature,
            "top_p": top_p,
            "top_k": top_k
        }

        response = self.session.post(
            f"{self.base_url}/chat",
            json=payload
        )
        response.raise_for_status()
        return response.json()

    def tokenize(self, text: str) -> Dict:
        """
        Tokenize text.

        Args:
            text: Text to tokenize

        Returns:
            Response with token IDs
        """
        payload = {"text": text}

        response = self.session.post(
            f"{self.base_url}/tokenize",
            json=payload
        )
        response.raise_for_status()
        return response.json()

    def model_info(self) -> Dict:
        """Get model information"""
        response = self.session.get(f"{self.base_url}/info")
        response.raise_for_status()
        return response.json()


def main():
    """Example usage of the API client"""

    # Initialize client
    client = QwenAPIClient("http://localhost:8000")

    # Check health
    print("Checking API health...")
    try:
        health = client.health_check()
        print(f"Status: {health['status']}\n")
    except requests.exceptions.ConnectionError:
        print("ERROR: Could not connect to API server.")
        print("Make sure to run: python api_server.py")
        return

    # Example 1: Generate text
    print("="*60)
    print("Example 1: Text Generation")
    print("="*60)

    try:
        result = client.generate(
            prompt="What is artificial intelligence?",
            max_length=150,
            temperature=0.7
        )
        print(f"Prompt: {result['prompt']}")
        print(f"Response: {result['generated_text']}\n")
    except Exception as e:
        print(f"Error: {e}\n")

    # Example 2: Chat
    print("="*60)
    print("Example 2: Chat")
    print("="*60)

    try:
        messages = [
            {"role": "system", "content": "You are a helpful Python assistant."},
            {"role": "user", "content": "How do I read a file in Python?"}
        ]

        result = client.chat(messages, max_length=200)
        print(f"User: {messages[-1]['content']}")
        print(f"Assistant: {result['assistant_response']}\n")

    except Exception as e:
        print(f"Error: {e}\n")

    # Example 3: Multi-turn conversation
    print("="*60)
    print("Example 3: Multi-turn Chat")
    print("="*60)

    try:
        conversation = [
            {"role": "system", "content": "You are an expert programmer."}
        ]

        # Turn 1
        conversation.append({"role": "user", "content": "What is recursion?"})
        result1 = client.chat(conversation, max_length=150)
        response1 = result1['assistant_response']
        print(f"User: {conversation[-1]['content']}")
        print(f"Assistant: {response1}\n")

        # Add to conversation and continue
        conversation.append({"role": "assistant", "content": response1})
        conversation.append({"role": "user", "content": "Can you give a code example?"})

        result2 = client.chat(conversation, max_length=200)
        response2 = result2['assistant_response']
        print(f"User: {conversation[-1]['content']}")
        print(f"Assistant: {response2}\n")

    except Exception as e:
        print(f"Error: {e}\n")

    # Example 4: Tokenization
    print("="*60)
    print("Example 4: Tokenization")
    print("="*60)

    try:
        result = client.tokenize("Hello, world!")
        print(f"Text: {result['text']}")
        print(f"Tokens: {result['token_ids']}")
        print(f"Number of tokens: {result['num_tokens']}\n")

    except Exception as e:
        print(f"Error: {e}\n")

    # Example 5: Model info
    print("="*60)
    print("Example 5: Model Information")
    print("="*60)

    try:
        info = client.model_info()
        print(f"Model Type: {info['model_type']}")
        print(f"Context Length: {info['context_length']}")
        print(f"Vocabulary Size: {info['vocab_size']}")
        print(f"Default Max Length: {info['default_max_length']}")
        print(f"Default Temperature: {info['default_temperature']}\n")

    except Exception as e:
        print(f"Error: {e}\n")

    # Interactive chat mode
    print("="*60)
    print("Interactive Chat Mode")
    print("="*60)
    print("Enter 'quit' to exit\n")

    conversation = [
        {"role": "system", "content": "You are a helpful assistant."}
    ]

    while True:
        user_input = input("You: ").strip()

        if user_input.lower() == "quit":
            break

        if not user_input:
            continue

        try:
            conversation.append({"role": "user", "content": user_input})

            result = client.chat(conversation, max_length=300, temperature=0.7)
            response = result['assistant_response']

            print(f"Assistant: {response}\n")

            conversation.append({"role": "assistant", "content": response})

        except Exception as e:
            print(f"Error: {e}\n")


if __name__ == "__main__":
    main()