import gradio as gr
from huggingface_hub import InferenceClient
import os
from typing import Generator, List, Dict, Any, Optional

# Default model configurations
DEFAULT_MODEL = "meta-llama/Llama-3.2-11B-Vision-Instruct"
DEFAULT_SYSTEM_MESSAGE = "You are a helpful, harmless, and honest AI assistant."

# Available models from Hugging Face
AVAILABLE_MODELS = [
    "meta-llama/Llama-3.2-11B-Vision-Instruct",
    "meta-llama/Llama-3.2-3B-Instruct",
    "meta-llama/Llama-3.1-8B-Instruct",
    "mistralai/Mistral-7B-Instruct-v0.3",
    "HuggingFaceH4/zephyr-7b-beta",
    "microsoft/Phi-3-mini-4k-instruct",
    "google/gemma-2-2b-it",
    "Qwen/Qwen2.5-7B-Instruct",
]

def get_inference_client(token: Optional[str] = None) -> InferenceClient:
    """Create an InferenceClient with optional token."""
    return InferenceClient(token=token or os.getenv("HF_TOKEN"))

def format_messages(
    message: str,
    history: List[Dict[str, str]],
    system_message: str,
    image: Optional[Any] = None
) -> List[Dict[str, Any]]:
    """Format messages for the chat API."""
    messages = []
    
    # Add system message if present
    if system_message:
        messages.append({"role": "system", "content": system_message})
    
    # Add conversation history
    for msg in history:
        messages.append({"role": msg["role"], "content": msg["content"]})
    
    # Add current message with image if multimodal
    if image is not None:
        # For multimodal, content is a list
        content = []
        if image is not None:
            content.append({"type": "image", "url": image})
        content.append({"type": "text", "text": message})
        messages.append({"role": "user", "content": content})
    else:
        messages.append({"role": "user", "content": message})
    
    return messages

def chat_response(
    message: str,
    history: List[Dict[str, str]],
    model: str,
    system_message: str,
    temperature: float,
    max_tokens: int,
    top_p: float,
    token: str,
    image: Optional[Any] = None,
) -> Generator[str, None, None]:
    """
    Generate streaming chat response from Hugging Face model.
    """
    try:
        client = get_inference_client(token if token else None)
        
        # Format messages
        messages = format_messages(message, history, system_message, image)
        
        # Stream the response
        stream = client.chat.completions.create(
            model=model,
            messages=messages,
            temperature=temperature,
            max_tokens=max_tokens,
            top_p=top_p,
            stream=True,
        )
        
        partial_message = ""
        for chunk in stream:
            if chunk.choices and chunk.choices[0].delta.content:
                partial_message += chunk.choices[0].delta.content
                yield partial_message
                
    except Exception as e:
        error_msg = f"Error: {str(e)}"
        if "401" in str(e):
            error_msg = "Authentication Error: Please provide a valid Hugging Face token."
        elif "404" in str(e):
            error_msg = f"Model '{model}' not found or not available."
        elif "429" in str(e):
            error_msg = "Rate limit exceeded. Please try again later."
        yield error_msg

def clear_chat():
    """Clear the chat history."""
    return None

def get_model_info(model: str) -> str:
    """Get information about the selected model."""
    info = {
        "meta-llama/Llama-3.2-11B-Vision-Instruct": 
            "Multimodal model supporting both text and images. Great for vision tasks.",
        "meta-llama/Llama-3.2-3B-Instruct": 
            "Efficient small model good for quick responses and simpler tasks.",
        "meta-llama/Llama-3.1-8B-Instruct": 
            "Balanced performance and quality. Good general-purpose assistant.",
        "mistralai/Mistral-7B-Instruct-v0.3": 
            "Strong performance on reasoning and coding tasks.",
        "HuggingFaceH4/zephyr-7b-beta": 
            "Fine-tuned for helpful and engaging conversations.",
        "microsoft/Phi-3-mini-4k-instruct": 
            "Compact model with strong reasoning capabilities.",
        "google/gemma-2-2b-it": 
            "Lightweight model from Google, good for everyday tasks.",
        "Qwen/Qwen2.5-7B-Instruct": 
            "Strong multilingual capabilities and long context understanding.",
    }
    return info.get(model, "No information available.")

def toggle_multimodal(multimodal: bool) -> Dict[str, Any]:
    """Toggle multimodal input visibility."""
    return {
        "visible": multimodal,
        "value": None
    }

# Custom theme for modern appearance
custom_theme = gr.themes.Soft(
    primary_hue="indigo",
    secondary_hue="blue",
    neutral_hue="slate",
    font=gr.themes.GoogleFont("Inter"),
    text_size="md",
    spacing_size="md",
    radius_size="lg"
).set(
    button_primary_background_fill="*primary_600",
    button_primary_background_fill_hover="*primary_700",
    button_secondary_background_fill="*neutral_100",
    button_secondary_background_fill_hover="*neutral_200",
    block_title_text_weight="600",
    block_label_text_weight="500",
)

# CSS for additional styling
custom_css = """
.gradio-container {
    max-width: 1400px !important;
}
.chatbot-container {
    min-height: 500px;
}
.settings-accordion {
    background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
}
.built-with {
    text-align: center;
    padding: 10px;
    margin-top: 20px;
    color: #6b7280;
    font-size: 0.875rem;
}
.built-with a {
    color: #4f46e5;
    text-decoration: none;
    font-weight: 500;
}
.built-with a:hover {
    text-decoration: underline;
}
"""

with gr.Blocks(theme=custom_theme, css=custom_css) as demo:
    # Header
    gr.Markdown("""
    # 🤖 Hugging Face Chat Interface
    
    Chat with state-of-the-art language models from the Hugging Face Hub. 
    Supports both text-only and multimodal (text + image) conversations.
    """)
    
    # Built with anycoder link
    gr.Markdown("""
    <div class="built-with">
        <a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank">Built with anycoder</a>
    </div>
    """)
    
    with gr.Row():
        # Main chat area
        with gr.Column(scale=3):
            chatbot = gr.Chatbot(
                label="Conversation",
                height=500,
                type="messages",
                show_copy_button=True,
                avatar_images=(
                    "https://cdn-icons-png.flaticon.com/512/1077/1077114.png",  # user
                    "https://cdn-icons-png.flaticon.com/512/4712/4712035.png",  # assistant
                ),
            )
            
            with gr.Row():
                with gr.Column(scale=10):
                    msg_input = gr.MultimodalTextbox(
                        label="Message",
                        placeholder="Type your message here...",
                        show_label=False,
                        sources=["upload", "clipboard"],
                        file_count="single",
                        file_types=["image"],
                        submit_btn=True,
                        stop_btn=True,
                    )
                with gr.Column(scale=1, min_width=80):
                    clear_btn = gr.ClearButton(
                        components=[chatbot, msg_input],
                        value="🗑️",
                        size="lg",
                    )
        
        # Settings sidebar
        with gr.Column(scale=1, min_width=300):
            with gr.Accordion("⚙️ Model Settings", open=True):
                model_dropdown = gr.Dropdown(
                    choices=AVAILABLE_MODELS,
                    value=DEFAULT_MODEL,
                    label="Model",
                    info="Select a Hugging Face model"
                )
                
                model_info = gr.Textbox(
                    value=get_model_info(DEFAULT_MODEL),
                    label="Model Info",
                    interactive=False,
                    lines=3,
                )
                
                hf_token = gr.Textbox(
                    label="Hugging Face Token",
                    placeholder="hf_... (optional)",
                    type="password",
                    info="Required for some models. Get yours at huggingface.co/settings/tokens",
                )
                
                system_msg = gr.Textbox(
                    label="System Message",
                    value=DEFAULT_SYSTEM_MESSAGE,
                    lines=3,
                    info="Instructions for the AI's behavior",
                )
            
            with gr.Accordion("🔧 Generation Parameters", open=False):
                temperature = gr.Slider(
                    minimum=0.0,
                    maximum=2.0,
                    value=0.7,
                    step=0.1,
                    label="Temperature",
                    info="Higher = more creative, lower = more focused",
                )
                
                max_tokens = gr.Slider(
                    minimum=50,
                    maximum=4096,
                    value=1024,
                    step=64,
                    label="Max Tokens",
                    info="Maximum response length",
                )
                
                top_p = gr.Slider(
                    minimum=0.0,
                    maximum=1.0,
                    value=0.9,
                    step=0.05,
                    label="Top-p (Nucleus Sampling)",
                    info="Controls diversity of outputs",
                )
            
            with gr.Accordion("ℹ️ About", open=False):
                gr.Markdown("""
                ### How to use:
                1. **Select a model** from the dropdown
                2. **Type your message** in the chat box
                3. **Upload images** (for multimodal models) using the paperclip icon
                4. **Adjust parameters** to control the response style
                
                ### Tips:
                - Use **temperature** to control creativity
                - **Vision models** (like Llama 3.2 11B) support image understanding
                - Add a **Hugging Face token** for better rate limits
                
                ### Privacy:
                Messages are sent to Hugging Face's inference API.
                Your token is only used for authentication and never stored.
                """)
    
    # Event handlers
    def user_message_handler(message: Dict[str, Any], history: List[Dict[str, str]]):
        """Handle user message submission."""
        text = message.get("text", "")
        files = message.get("files", [])
        image = files[0] if files else None
        
        # Add user message to history
        history = history + [{"role": "user", "content": text}]
        
        return "", history, image
    
    def bot_response_handler(
        history: List[Dict[str, str]],
        model: str,
        system_msg: str,
        temperature: float,
        max_tokens: int,
        top_p: float,
        token: str,
        image: Any,
    ):
        """Generate bot response."""
        if not history:
            return history
        
        # Get the last user message
        last_message = ""
        for msg in reversed(history):
            if msg["role"] == "user":
                last_message = msg["content"]
                break
        
        if not last_message:
            return history
        
        # Generate response
        full_response = ""
        for partial in chat_response(
            message=last_message,
            history=history[:-1],  # Exclude the last user message we just added
            model=model,
            system_message=system_msg,
            temperature=temperature,
            max_tokens=max_tokens,
            top_p=top_p,
            token=token,
            image=image,
        ):
            full_response = partial
            # Update the last assistant message or add new one
            if history and history[-1]["role"] == "assistant":
                history[-1]["content"] = full_response
            else:
                history = history + [{"role": "assistant", "content": full_response}]
            yield history
    
    # Update model info when model changes
    model_dropdown.change(
        fn=get_model_info,
        inputs=model_dropdown,
        outputs=model_info,
        api_visibility="private",
    )
    
    # Chat submission
    msg_input.submit(
        fn=user_message_handler,
        inputs=[msg_input, chatbot],
        outputs=[msg_input, chatbot, gr.State()],
        queue=False,
    ).then(
        fn=bot_response_handler,
        inputs=[
            chatbot,
            model_dropdown,
            system_msg,
            temperature,
            max_tokens,
            top_p,
            hf_token,
            gr.State(),
        ],
        outputs=chatbot,
        api_visibility="public",
    )
    
    # Example conversations
    gr.Examples(
        examples=[
            [{"text": "Explain quantum computing in simple terms", "files": []}],
            [{"text": "Write a Python function to calculate fibonacci numbers", "files": []}],
            [{"text": "What can you see in this image?", "files": ["https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"]}],
            [{"text": "Help me brainstorm ideas for a science fiction story", "files": []}],
        ],
        inputs=msg_input,
        label="Example Prompts (click to try)",
    )

# Launch with Gradio 6 syntax - all parameters in launch()
demo.launch(
    theme=custom_theme,
    css=custom_css,
    footer_links=[
        {"label": "Built with anycoder", "url": "https://huggingface.co/spaces/akhaliq/anycoder"},
        "gradio",
        "api",
    ],
    show_error=True,
    pwa=True,
    favicon_path="https://huggingface.co/front/assets/huggingface_logo-noborder.svg",
)