"""
Qwen3:0.6B Text Generation App for Hugging Face Spaces

This app allows you to generate text using a trained Qwen3:0.6B model with TinyStories dataset .
You can control:
- The starting text (prompt)
- How many new words to generate (max_new_tokens)
- How creative the output should be (temperature)
"""

import gradio as gr
import torch
import tiktoken
from pathlib import Path
from huggingface_hub import hf_hub_download

# Import our Qwen3 model
from Qwen3_model import Qwen3Model, generate_text_simple, text_to_token_ids, token_ids_to_text


class TextGenerator:
    """
    A simple class to load the model and generate text
    
    This makes it easy to:
    1. Load the trained model once at startup
    2. Generate text multiple times without reloading
    """
    
    def __init__(self, repo_id="vuminhtue/qwen3_sentiment_tinystories"):
        """
        Initialize the text generator
        
        Parameters:
        -----------
        repo_id : str
            HuggingFace repository ID to download the model from
            Default: "vuminhtue/qwen3_sentiment_tinystories"
        """
        print("🚀 Loading Qwen3 model from HuggingFace...")
        print(f"   Repository: {repo_id}")
        
        # Configuration for Qwen3 0.6B model
        # These settings define the architecture of the model
        self.config = {
            "vocab_size": 151_936,      # Number of different tokens the model knows
            "context_length": 40_960,   # Maximum length of text it can process
            "emb_dim": 1024,            # Size of the embedding vectors
            "n_heads": 16,              # Number of attention heads
            "n_layers": 28,             # Number of transformer layers
            "hidden_dim": 3072,         # Size of the feed-forward network
            "head_dim": 128,            # Size of each attention head
            "qk_norm": True,            # Whether to normalize queries and keys
            "n_kv_groups": 8,           # Number of key-value groups
            "rope_base": 1_000_000.0,   # Base for rotary position encoding
            "dtype": torch.bfloat16,    # Data type for model weights
        }
        
        # Detect if we have a GPU available
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        print(f"   Using device: {self.device}")
        
        # Load the tokenizer (converts text to numbers and back)
        # We use GPT-2's tokenizer which works well for English text
        self.tokenizer = tiktoken.get_encoding("gpt2")
        print("   ✓ Tokenizer loaded")
        
        # Download the model file from HuggingFace
        # This will cache the file locally, so it only downloads once
        print("   📥 Downloading model from HuggingFace (this may take a moment)...")
        try:
            model_path = hf_hub_download(
                repo_id=repo_id,
                filename="Qwen3_200k_model_params.pt",
                repo_type="model"
            )
            print(f"   ✓ Model downloaded to: {model_path}")
        except Exception as e:
            print(f"   ❌ Error downloading model: {e}")
            raise
        
        # Create the model with our configuration
        self.model = Qwen3Model(self.config)
        
        # Load the trained weights from the downloaded file
        print("   ⚙️  Loading model weights...")
        self.model.load_state_dict(
            torch.load(
                model_path, 
                map_location=torch.device(self.device),
                weights_only=True
            )
        )
        
        # Move model to the appropriate device (CPU or GPU)
        self.model = self.model.to(self.device)
        
        # Set to evaluation mode (disables training-specific features)
        self.model.eval()
        
        print("   ✓ Model loaded successfully!")
        print("✅ Ready to generate text!\n")
    
    def generate(self, prompt, max_new_tokens=50, temperature=1.0):
        """
        Generate text based on a prompt
        
        Parameters:
        -----------
        prompt : str
            The starting text (what you want the model to continue)
        max_new_tokens : int
            How many new tokens (roughly words) to generate
        temperature : float
            Controls creativity:
            - Lower (0.1-0.7): More predictable, focused
            - Medium (0.8-1.0): Balanced
            - Higher (1.1-2.0): More creative, random
        
        Returns:
        --------
        str : The generated text (including the original prompt)
        """
        try:
            # Convert the text prompt to token IDs (numbers)
            input_ids = text_to_token_ids(prompt, self.tokenizer)
            input_ids = input_ids.to(self.device)
            
            # Generate new tokens
            output_ids = generate_text_simple(
                model=self.model,
                idx=input_ids,
                max_new_tokens=max_new_tokens,
                context_size=self.config["context_length"],
                temperature=temperature
            )
            
            # Convert the token IDs back to text
            generated_text = token_ids_to_text(output_ids, self.tokenizer)
            
            return generated_text
            
        except Exception as e:
            return f"❌ Error generating text: {str(e)}"


# Initialize the generator once when the app starts
print("="*70)
print("INITIALIZING TEXT GENERATION APP")
print("="*70)
generator = TextGenerator()


def generate_text_interface(prompt, max_new_tokens, temperature):
    """
    Interface function for Gradio
    
    This function:
    1. Takes inputs from the user interface
    2. Calls our generator
    3. Returns the result to display
    """
    # Check if prompt is empty
    if not prompt or len(prompt.strip()) == 0:
        return "⚠️ Please enter some text to start with!"
    
    # Limit max tokens to prevent very long generation times
    max_new_tokens = min(max_new_tokens, 200)
    
    # Generate text
    result = generator.generate(prompt, max_new_tokens, temperature)
    
    return result


# Create the Gradio interface
# This defines what the web app looks like and how it behaves
with gr.Blocks(title="Qwen3:0.6B Text Generator", theme=gr.themes.Soft()) as demo:
    
    # Header
    gr.Markdown(
        """
        # 🤖 Qwen3:0.6B Text Generator
        
        Generate creative stories and text using a Qwen3:0.6B model trained on TinyStories!
        
        ### How to use:
        1. **Enter your starting text** (e.g., "Once upon a time")
        2. **Adjust the sliders** to control the output
        3. **Click Generate** to create text
        """
    )
    
    # Main content area
    with gr.Row():
        with gr.Column(scale=1):
            # Input section
            gr.Markdown("### 📝 Input")
            
            prompt_input = gr.Textbox(
                label="Starting Text (Prompt)",
                placeholder="Once upon a time...",
                lines=3,
                info="Enter the text you want the model to continue"
            )
            
            # Control sliders
            gr.Markdown("### ⚙️ Generation Settings")
            
            max_tokens_slider = gr.Slider(
                minimum=10,
                maximum=200,
                value=50,
                step=10,
                label="Max New Tokens",
                info="How many new tokens to generate (roughly = number of words)"
            )
            
            temperature_slider = gr.Slider(
                minimum=0.1,
                maximum=2.0,
                value=1.0,
                step=0.1,
                label="Temperature",
                info="Lower = more predictable, Higher = more creative"
            )
            
            # Generate button
            generate_btn = gr.Button(
                "✨ Generate Text", 
                variant="primary", 
                size="lg"
            )
        
        with gr.Column(scale=1):
            # Output section
            gr.Markdown("### 📖 Generated Text")
            
            output_text = gr.Textbox(
                label="Result",
                lines=15,
                interactive=False,
                show_copy_button=True
            )
    
    # Example prompts to try
    gr.Markdown("### 💡 Try these examples:")
    gr.Examples(
        examples=[
            ["Once upon a time", 50, 0.8],
            ["There was a little girl named", 60, 1.0],
            ["In a magical forest", 70, 1.2],
            ["A brave knight", 50, 0.7],
            ["The sun was shining and", 60, 0.9],
        ],
        inputs=[prompt_input, max_tokens_slider, temperature_slider],
        label="Click any example to try it"
    )
    
    # Information section
    gr.Markdown(
        """
        ---
        ### 📊 About This Model
        
        - **Model**: Qwen3:0.6B (596M parameters)
        - **Training Data**: TinyStories dataset (children's stories)
        - **Architecture**: 28 transformer layers with Grouped Query Attention
        - **Model Source**: [vuminhtue/qwen3_sentiment_tinystories](https://huggingface.co/vuminhtue/qwen3_sentiment_tinystories)
        
        ### 🎯 Understanding the Parameters
        
        **Max New Tokens:**
        - Controls the length of generated text
        - One token ≈ one word (roughly)
        - More tokens = longer output = slower generation
        
        **Temperature:**
        - `0.1 - 0.7`: Safe, predictable, focused responses
        - `0.8 - 1.0`: Balanced creativity and coherence
        - `1.1 - 2.0`: Very creative but may be less coherent
        
        ### ⚠️ Note
        
        This model was trained on children's stories, so it works best for:
        - Simple, clear narratives
        - Stories about everyday situations
        - Children's vocabulary and themes
        
        ---
        *Built with Qwen3:0.6B architecture • Trained on TinyStories • Powered by PyTorch • Model hosted on 🤗 HuggingFace*
        """
    )
    
    # Connect the button to the generation function
    generate_btn.click(
        fn=generate_text_interface,
        inputs=[prompt_input, max_tokens_slider, temperature_slider],
        outputs=output_text
    )
    
    # Also allow pressing Enter in the text box to generate
    prompt_input.submit(
        fn=generate_text_interface,
        inputs=[prompt_input, max_tokens_slider, temperature_slider],
        outputs=output_text
    )


# Launch the app
if __name__ == "__main__":
    print("\n" + "="*70)
    print("LAUNCHING GRADIO APP")
    print("="*70)
    demo.launch()