import gradio as gr
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
import os

# Model configuration
MODEL_REPO = "druvx13/Qwen3-0.6B-Q5_0-GGUF"
MODEL_FILE = "qwen3-0.6b-q5_0.gguf"
CACHE_DIR = "./model_cache"
MAX_TOKENS = 200

# Initialize model (loads once at startup)
def load_model():
    """Download and load GGUF model with proper path handling"""
    os.makedirs(CACHE_DIR, exist_ok=True)
    
    # Download model if not cached
    model_path = hf_hub_download(
        repo_id=MODEL_REPO,
        filename=MODEL_FILE,
        cache_dir=CACHE_DIR,
        force_download=False  # Set to True to bypass cache
    )
    
    return Llama(
        model_path=model_path,  # Now a valid path string
        n_ctx=2048,             # Context window size
        n_threads=4,            # CPU threads for faster inference
        verbose=False           # Disable debug logs
    )

# Load model at startup
llm = load_model()

# Generation function with parameters
def generate_text(prompt, max_tokens=MAX_TOKENS, temp=0.7, top_p=0.95):
    """Generate text using GGUF model with parameter control"""
    try:
        output = llm(
            prompt=prompt,
            max_tokens=max_tokens,
            temperature=temp,
            top_p=top_p,
            echo=False  # Don't repeat input in output
        )
        return output["choices"][0]["text"]
    except Exception as e:
        return f"Error generating text: {str(e)}"

# UI Components
with gr.Blocks(theme="soft") as demo:
    gr.Markdown("""
    # 🧠 GPT2 Text Generator (GGUF Version)
    Enter a prompt and adjust parameters to generate AI text using the quantized GPT2 model.
    """)
    
    with gr.Row():
        with gr.Column():
            # Input components
            prompt = gr.Textbox(
                label="Input Prompt", 
                placeholder="Enter your prompt here...",
                lines=5
            )
            max_tokens = gr.Slider(
                minimum=50, 
                maximum=500, 
                value=200, 
                step=50,
                label="Max Output Length"
            )
            temp = gr.Slider(
                minimum=0.1, 
                maximum=1.0, 
                value=0.7, 
                step=0.1,
                label="Creativity (Temperature)"
            )
            top_p = gr.Slider(
                minimum=0.1, 
                maximum=1.0, 
                value=0.95, 
                step=0.05,
                label="Top-p Sampling"
            )
            
        with gr.Column():
            # Output and button
            output = gr.Textbox(label="Generated Text", lines=10)
            generate_btn = gr.Button("🚀 Generate", variant="primary")
    
    # Event handler
    generate_btn.click(
        fn=generate_text,
        inputs=[prompt, max_tokens, temp, top_p],
        outputs=output
    )

# Launch app
demo.launch()