import gradio as gr
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
import os

# Download a pre-made GGUF model from HuggingFace
MODEL_NAME = "TheBloke/Llama-2-7B-Chat-GGUF"
MODEL_FILE = "llama-2-7b-chat.Q4_K_M.gguf"

print("📥 Downloading model from HuggingFace...")
model_path = hf_hub_download(
    repo_id=MODEL_NAME,
    filename=MODEL_FILE,
    local_dir="./models"
)
print(f"✅ Model downloaded to: {model_path}")

print("🚀 Loading model...")
llm = Llama(
    model_path=model_path,
    n_ctx=2048,
    n_threads=4,
    n_gpu_layers=0,
    verbose=False
)
print("✅ Model loaded!")

def chat(message, history):
    prompt = "<|begin_of_text|>"
    
    for user_msg, bot_msg in history:
        prompt += f"<|start_header_id|>user<|end_header_id|>\n\n{user_msg}<|eot_id|>"
        prompt += f"<|start_header_id|>assistant<|end_header_id|>\n\n{bot_msg}<|eot_id|>"
    
    prompt += f"<|start_header_id|>user<|end_header_id|>\n\n{message}<|eot_id|>"
    prompt += "<|start_header_id|>assistant<|end_header_id|>\n\n"
    
    response = llm(
        prompt,
        max_tokens=512,
        temperature=0.7,
        top_p=0.9,
        stop=["<|eot_id|>", "<|start_header_id|>"],
        echo=False
    )
    
    return response['choices'][0]['text'].strip()

# Ultra-modern CSS
custom_css = """
@import url('https://fonts.googleapis.com/css2?family=Space+Grotesk:wght@400;600;700&display=swap');

* {
    font-family: 'Space Grotesk', sans-serif !important;
}

.gradio-container {
    background: linear-gradient(135deg, #1e3a8a 0%, #7c3aed 50%, #db2777 100%) !important;
}

#chatbot {
    height: 650px !important;
    border-radius: 24px !important;
    border: 2px solid rgba(255,255,255,0.1) !important;
    box-shadow: 0 25px 50px -12px rgba(0,0,0,0.5) !important;
}

.message {
    padding: 18px 24px !important;
    border-radius: 20px !important;
    font-size: 15px !important;
    margin: 8px 0 !important;
    backdrop-filter: blur(10px) !important;
    box-shadow: 0 8px 32px 0 rgba(31, 38, 135, 0.37) !important;
}

.user {
    background: linear-gradient(135deg, rgba(147, 51, 234, 0.9) 0%, rgba(219, 39, 119, 0.9) 100%) !important;
    color: white !important;
    border: 1px solid rgba(255,255,255,0.2) !important;
}

.bot {
    background: linear-gradient(135deg, rgba(59, 130, 246, 0.9) 0%, rgba(147, 51, 234, 0.9) 100%) !important;
    color: white !important;
    border: 1px solid rgba(255,255,255,0.2) !important;
}

button {
    border-radius: 16px !important;
    font-weight: 600 !important;
    transition: all 0.3s ease !important;
}

button:hover {
    transform: translateY(-2px) !important;
    box-shadow: 0 12px 24px rgba(0,0,0,0.3) !important;
}

.primary {
    background: linear-gradient(135deg, #9333ea 0%, #db2777 100%) !important;
    border: none !important;
}

input, textarea {
    border-radius: 16px !important;
    border: 2px solid rgba(255,255,255,0.2) !important;
    background: rgba(255,255,255,0.1) !important;
    backdrop-filter: blur(10px) !important;
    color: white !important;
}

input::placeholder, textarea::placeholder {
    color: rgba(255,255,255,0.6) !important;
}

.prose {
    color: white !important;
}

.prose h1 {
    background: linear-gradient(135deg, #fbbf24 0%, #f59e0b 100%);
    -webkit-background-clip: text;
    -webkit-text-fill-color: transparent;
    font-weight: 700 !important;
}

footer {
    display: none !important;
}
"""

with gr.Blocks(
    theme=gr.themes.Glass(
        primary_hue="purple",
        secondary_hue="pink",
    ),
    css=custom_css,
    title="🦙 Llama 3.2 AI"
) as demo:
    
    gr.Markdown(
        """
        # 🦙 Llama Chat AI Assistant (TEST)
        ### ⚡ Testing deployment with pre-trained model
        """
    )
    
    chatbot = gr.Chatbot(
        elem_id="chatbot",
        bubble_full_width=False,
        avatar_images=(
            "https://em-content.zobj.net/thumbs/120/apple/354/sparkles_2728.png",
            "https://em-content.zobj.net/thumbs/120/apple/354/llama_1f999.png"
        ),
        height=650,
        show_copy_button=True,
        likeable=True
    )
    
    with gr.Row():
        msg = gr.Textbox(
            placeholder="✨ Ask me anything...",
            show_label=False,
            scale=8,
            container=False
        )
        submit = gr.Button("Send 🚀", scale=1, variant="primary", size="lg")
    
    gr.Examples(
        examples=[
            "🌍 What is the capital of France?",
            "🧠 Explain quantum computing simply",
            "💻 Write fibonacci in Python",
            "😴 Tips for better sleep?",
            "🔢 Continue: 2, 4, 6, 8...",
            "📝 Write a haiku about AI",
        ],
        inputs=msg,
        label="💡 Quick Start:"
    )
    
    with gr.Accordion("ℹ️ Model Information", open=False):
        gr.Markdown(
            """
            **Testing Model:**
            - 🤖 Model: Llama 2 7B Chat (Pre-trained)
            - ⚙️ Format: GGUF (q4_k_m quantization)
            - 📝 Note: This is a test deployment. Will be replaced with fine-tuned model.
            """
        )
    
    clear = gr.ClearButton([msg, chatbot], value="🗑️ New Chat")
    
    submit.click(chat, [msg, chatbot], [chatbot])
    submit.click(lambda: "", None, msg)
    msg.submit(chat, [msg, chatbot], [chatbot])
    msg.submit(lambda: "", None, msg)

if __name__ == "__main__":
    demo.queue()
    demo.launch(share=False)