import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import re

# Initialize the model and tokenizer
@torch.no_grad()
def load_model():
    print("Loading Qwen3-0.6B model...")
    
    try:
        tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen3-0.6B", trust_remote_code=True)
        model = AutoModelForCausalLM.from_pretrained(
            "Qwen/Qwen3-0.6B",
            torch_dtype=torch.float16,
            device_map="auto",
            trust_remote_code=True
        )
        print("Qwen3-0.6B model loaded successfully!")
        return tokenizer, model
        
    except Exception as e:
        print(f"Error loading Qwen3-0.6B: {e}")
        return None, None

# Load the model
tokenizer, model = load_model()

def remove_think_tags(text):
    """
    Remove <think>...</think> tags from text - METHOD 1
    """
    cleaned_text = re.sub(r'<think>.*?</think>', '', text, flags=re.DOTALL)
    return cleaned_text.strip()

def generate_response(message, history, temperature=0.7, max_length=256):
    """
    Generate a response using Qwen3-0.6B with your specified method
    """
    if tokenizer is None or model is None:
        return "⚠️ Model is not loaded properly. Please check the console logs."
    
    try:
        # Convert history to messages format
        messages = []
        for human_msg, assistant_msg in history:
            messages.extend([
                {"role": "user", "content": human_msg},
                {"role": "assistant", "content": assistant_msg}
            ])
        
        # Add current message
        messages.append({"role": "user", "content": message})
        
        # Apply chat template exactly as in your example
        inputs = tokenizer.apply_chat_template(
            messages,
            add_generation_prompt=True,
            tokenize=True,
            return_dict=True,
            return_tensors="pt",
        ).to(model.device)
        
        # Generate response
        with torch.no_grad():
            outputs = model.generate(
                **inputs,
                max_new_tokens=max_length,
                temperature=temperature,
                do_sample=True if temperature > 0.1 else False,
                top_p=0.9,
                repetition_penalty=1.1,
                eos_token_id=tokenizer.eos_token_id,
                pad_token_id=tokenizer.eos_token_id
            )
        
        # Extract only the new generated text
        response = tokenizer.decode(
            outputs[0][inputs["input_ids"].shape[-1]:], 
            skip_special_tokens=True
        )
        
        # Clean think tags from response
        clean_response = remove_think_tags(response)
        
        return clean_response if clean_response else "I'm here to help! What would you like to know?"
        
    except Exception as e:
        print(f"Generation error: {e}")
        return f"⚠️ Error generating response: {str(e)}"

def chat_interface(message, history, temperature, max_length):
    """
    Main chat interface function
    """
    if not message or not message.strip():
        return "", history or []
    
    # Generate response
    bot_response = generate_response(message, history or [], temperature, max_length)
    
    # Update history
    new_history = (history or []) + [[message, bot_response]]
    
    return "", new_history

def clear_chat():
    """
    Clear the chat history
    """
    return []

def retry_last_response(history, temperature, max_length):
    """
    Retry the last user message
    """
    if not history:
        return history
    
    # Remove the last assistant response
    last_conversation = history[:-1]
    last_user_message = history[-1][0]
    
    # Regenerate response
    bot_response = generate_response(last_user_message, last_conversation, temperature, max_length)
    
    # Update history
    new_history = last_conversation + [[last_user_message, bot_response]]
    
    return new_history

# Custom CSS with more space for chat and settings moved to right
custom_css = """
.gradio-container {
    font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
    min-height: 100vh;
    padding: 20px;
}

.main-container {
    max-width: 1400px;
    margin: 0 auto;
    background: white;
    border-radius: 20px;
    box-shadow: 0 20px 40px rgba(0,0,0,0.1);
    overflow: hidden;
}

.header {
    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
    color: white;
    padding: 30px;
    text-align: center;
}

.header h1 {
    margin: 0;
    font-size: 2.5em;
    font-weight: 700;
}

.header p {
    margin: 10px 0 0 0;
    opacity: 0.9;
    font-size: 1.2em;
}

.content {
    display: flex;
    min-height: 700px;
    gap: 0;
}

/* Chat column - 80% width */
.chat-column {
    flex: 4;  /* Increased from 3 to 4 for more space */
    display: flex;
    flex-direction: column;
    min-width: 0; /* Allow shrinking */
}

/* Control column - 20% width */
.control-column {
    flex: 1;  /* Reduced to take less space */
    background: #f8f9fa;
    padding: 20px;
    border-left: 1px solid #e1e5e9;
    min-width: 280px; /* Minimum width for controls */
    max-width: 320px; /* Maximum width for controls */
}

.chatbot-container {
    flex: 1;
    display: flex;
    flex-direction: column;
    min-height: 600px;
}

#chatbot {
    flex: 1;
    min-height: 550px !important;
    max-height: 550px !important;
    border: none !important;
    background: white !important;
    padding: 25px !important;
    margin: 0 !important;
    overflow-y: auto !important;
}

#chatbot .message {
    padding: 18px 22px !important;
    margin: 12px 0 !important;
    border-radius: 18px !important;
    max-width: 85% !important;
    line-height: 1.5 !important;
    font-size: 15px !important;
}

#chatbot .user-message {
    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
    color: white !important;
    margin-left: auto !important;
    border: none !important;
}

#chatbot .bot-message {
    background: linear-gradient(135deg, #f8f9fa 0%, #e9ecef 100%) !important;
    color: #2d3748 !important;
    margin-right: auto !important;
    border: 1px solid #e2e8f0 !important;
}

.input-container {
    background: #f8f9fa;
    padding: 25px;
    border-top: 1px solid #e1e5e9;
}

.control-panel {
    background: white;
    padding: 20px;
    border-radius: 15px;
    margin-bottom: 20px;
    box-shadow: 0 4px 12px rgba(0,0,0,0.1);
    border: 1px solid #eef2f7;
}

.model-info {
    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
    color: white;
    padding: 20px;
    border-radius: 15px;
    box-shadow: 0 4px 15px rgba(102, 126, 234, 0.3);
}

.gr-button {
    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important;
    border: none !important;
    color: white !important;
    border-radius: 12px !important;
    padding: 14px 28px !important;
    font-weight: 600 !important;
    margin: 5px !important;
    transition: all 0.3s ease !important;
    box-shadow: 0 4px 12px rgba(102, 126, 234, 0.3) !important;
}

.gr-button:hover {
    transform: translateY(-2px) !important;
    box-shadow: 0 6px 20px rgba(102, 126, 234, 0.4) !important;
}

.clear-btn {
    background: linear-gradient(135deg, #ff6b6b 0%, #ee5a24 100%) !important;
    box-shadow: 0 4px 12px rgba(255, 107, 107, 0.3) !important;
}

.retry-btn {
    background: linear-gradient(135deg, #00b894 0%, #00a085 100%) !important;
    box-shadow: 0 4px 12px rgba(0, 184, 148, 0.3) !important;
}

.textbox {
    border-radius: 15px !important;
    border: 2px solid #e2e8f0 !important;
    padding: 18px !important;
    font-size: 16px !important;
    background: white !important;
    box-shadow: 0 2px 8px rgba(0,0,0,0.05) !important;
    min-height: 80px !important;
}

.textbox:focus {
    border-color: #667eea !important;
    box-shadow: 0 0 0 3px rgba(102, 126, 234, 0.1), 0 2px 8px rgba(0,0,0,0.05) !important;
}

.examples-panel {
    background: white;
    padding: 25px;
    border-top: 1px solid #e1e5e9;
}

.control-section {
    margin-bottom: 20px;
}

.control-section:last-child {
    margin-bottom: 0;
}

.slider-container {
    margin: 15px 0;
}

.feature-list {
    list-style: none;
    padding: 0;
    margin: 10px 0 0 0;
}

.feature-list li {
    padding: 6px 0;
    position: relative;
    padding-left: 20px;
    font-size: 14px;
}

.feature-list li::before {
    content: '✓';
    position: absolute;
    left: 0;
    color: #48bb78;
    font-weight: bold;
}

.status-indicator {
    display: inline-block;
    width: 10px;
    height: 10px;
    border-radius: 50%;
    margin-right: 8px;
}

.status-online {
    background: #48bb78;
    box-shadow: 0 0 8px #48bb78;
}

@media (max-width: 1024px) {
    .content {
        flex-direction: column;
    }
    
    .control-column {
        max-width: none;
        border-left: none;
        border-top: 1px solid #e1e5e9;
    }
    
    .chat-column {
        min-height: 500px;
    }
}
"""

# Create the Gradio interface
with gr.Blocks(css=custom_css, theme=gr.themes.Soft()) as demo:
    
    with gr.Column(elem_classes="main-container"):
        # Header
        with gr.Column(elem_classes="header"):
            gr.Markdown("# 🤖 Qwen3-0.6B Chatbot")
            gr.Markdown("Chat with Alibaba's advanced Qwen3-0.6B model - Think tags automatically removed!")
        
        with gr.Row(elem_classes="content"):
            # Left Column - Chat (80% width)
            with gr.Column(elem_classes="chat-column"):
                with gr.Column(elem_classes="chatbot-container"):
                    chatbot = gr.Chatbot(
                        value=[["Hello! How can I assist you today? 😊", ""]],
                        label="",
                        elem_id="chatbot",
                        show_copy_button=True,
                        avatar_images=("👤", "🤖"),
                        height=550,
                        container=True,
                        show_label=False
                    )
                
                with gr.Column(elem_classes="input-container"):
                    with gr.Row():
                        msg = gr.Textbox(
                            label="",
                            placeholder="💭 Type your message here... (Press Enter to send, Shift+Enter for new line)",
                            lines=3,
                            scale=4,
                            container=False,
                            show_label=False
                        )
                        with gr.Column(scale=1):
                            submit_btn = gr.Button("Send 🚀", size="lg")
                    
                    with gr.Row():
                        clear_btn = gr.Button("🗑️ Clear Chat", elem_classes="clear-btn", size="sm")
                        retry_btn = gr.Button("🔄 Retry Last", elem_classes="retry-btn", size="sm")
                        gr.HTML("""<div style="flex: 1; text-align: center; color: #666; font-size: 12px; padding: 10px;">
                            Press Enter to send • Shift+Enter for new line
                        </div>""")
            
            # Right Column - Controls (20% width)
            with gr.Column(elem_classes="control-column"):
                with gr.Column(elem_classes="control-panel"):
                    gr.Markdown("### ⚙️ Settings")
                    
                    with gr.Column(elem_classes="control-section"):
                        gr.Markdown("**🎛️ Temperature**")
                        temperature = gr.Slider(
                            minimum=0.1,
                            maximum=1.5,
                            value=0.7,
                            step=0.1,
                            label="",
                            show_label=False
                        )
                        gr.Markdown("<div style='font-size: 12px; color: #666; margin-top: 8px;'>Lower = more predictable<br>Higher = more creative</div>")
                    
                    with gr.Column(elem_classes="control-section"):
                        gr.Markdown("**📏 Max Length**")
                        max_length = gr.Slider(
                            minimum=50,
                            maximum=1000,
                            value=256,
                            step=50,
                            label="",
                            show_label=False
                        )
                        gr.Markdown("<div style='font-size: 12px; color: #666; margin-top: 8px;'>Response length in tokens</div>")
                
                with gr.Column(elem_classes="model-info"):
                    gr.Markdown("### ℹ️ Model Info")
                    if tokenizer and model:
                        gr.Markdown("""
                        <span class="status-indicator status-online"></span> **Status:** Online
                        
                        **Model:** Qwen3-0.6B ✅  
                        **Think Tags:** Auto-removed ✅
                        
                        <ul class="feature-list">
                            <li>0.6B parameters</li>
                            <li>128K context</li>
                            <li>Multilingual</li>
                            <li>Advanced AI</li>
                        </ul>
                        """)
                    else:
                        gr.Markdown("""
                        <span class="status-indicator status-online" style="background: #ed8936;"></span> **Status:** Loading
                        
                        Check console for details
                        """)
        
        # Examples Section
        with gr.Column(elem_classes="examples-panel"):
            gr.Markdown("### 💡 Try These Examples")
            gr.Examples(
                examples=[
                    "Explain quantum computing in simple terms",
                    "Write a short poem about artificial intelligence",
                    "What are the benefits of renewable energy?",
                    "How do I learn programming effectively?",
                    "Tell me an interesting fact about space exploration",
                    "Help me plan a healthy weekly meal plan"
                ],
                inputs=msg,
                label="Click any example to start chatting!",
                examples_per_page=6
            )

    # Event handlers
    submit_event = msg.submit(
        chat_interface,
        inputs=[msg, chatbot, temperature, max_length],
        outputs=[msg, chatbot]
    )
    
    submit_btn.click(
        chat_interface,
        inputs=[msg, chatbot, temperature, max_length],
        outputs=[msg, chatbot]
    )
    
    clear_btn.click(
        clear_chat,
        outputs=[chatbot]
    )
    
    retry_btn.click(
        retry_last_response,
        inputs=[chatbot, temperature, max_length],
        outputs=[chatbot]
    )

if __name__ == "__main__":
    demo.launch(
        server_name="0.0.0.0",
        share=False,
        show_error=True
    )