import gradio as gr
from llama_cpp import Llama
from huggingface_hub import hf_hub_download

# GGUF model filenames
MODELS = {
    "DeepSeek Chat (7B Chat)": "deepseek-llm-7b-chat-Q4_K_M.gguf",
    "LLaMA 2 7B Chat": "llama-2-7b-chat.Q4_K_M.gguf",
    "Smol 7B Chat (GGUF)": "smol-7b-Q4_K_M.gguf",
    "DeepSeek Coder (6.7B Instruct)": "deepseek-coder-6.7b-instruct-Q4_K_M.gguf",
    # Tiny test model
    "TinyLlama 1.1B Test": "tinyllama-1.1b-chat-v1.0.Q4_K_M.gguf"
}

# Corresponding Hugging Face repo IDs
REPO_IDS = {
    "DeepSeek Chat (7B Chat)": "TheBloke/deepseek-llm-7B-chat-GGUF",
    "LLaMA 2 7B Chat": "TheBloke/Llama-2-7B-Chat-GGUF",
    "Smol 7B Chat (GGUF)": "TheBloke/smol-7B-GGUF",
    "DeepSeek Coder (6.7B Instruct)": "second-state/Deepseek-Coder-6.7B-Instruct-GGUF",
    # Tiny test model repo
    "TinyLlama 1.1B Test": "TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF"
}

# System prompts per model
SYSTEM_PROMPTS = {
    "DeepSeek Chat (7B Chat)": "You are a helpful assistant for friendly conversation:",
    "LLaMA 2 7B Chat": "You are a helpful general AI assistant:",
    "Smol 7B Chat (GGUF)": "You are a friendly general chat assistant:",
    "DeepSeek Coder (6.7B Instruct)": "You are an instruction-following assistant:",
    "TinyLlama 1.1B Test": "You are a lightweight test chat assistant:"
}

loaded_models = {}

def load_model(choice):
    if choice not in loaded_models:
        # Download GGUF model file from Hugging Face Hub
        model_path = hf_hub_download(repo_id=REPO_IDS[choice], filename=MODELS[choice])
        loaded_models[choice] = Llama(model=model_path, n_ctx=2048)
    return loaded_models[choice]

def generate_chat(model_choice, history):
    llm = load_model(model_choice)
    user_msg = history[-1][0]
    prompt = f"{SYSTEM_PROMPTS.get(model_choice, '')}\n{user_msg}"
    out = llm(prompt, max_tokens=150)["choices"][0]["text"]
    history[-1] = (user_msg, out)
    return history

# Quick preset helper
def preset_text(text):
    return text

with gr.Blocks() as demo:
    gr.Markdown("# Playground.Deeemoi — Multi-Model Chat + Quick Prompts")

    selector = gr.Dropdown(list(MODELS.keys()), label="Choose Model")

    with gr.Row():
        explain_btn = gr.Button("Explain")
        summarize_btn = gr.Button("Summarize")
        translate_btn = gr.Button("Translate to Spanish")
        code_help_btn = gr.Button("Code Help")

    chat_ui = gr.ChatInterface(
        fn=lambda history, model_choice=selector: generate_chat(model_choice, history),
        title="Chat with Quantized GGUF Models"
    )

    explain_btn.click(lambda: preset_text("Explain this clearly:"), None, chat_ui)
    summarize_btn.click(lambda: preset_text("Summarize this text:"), None, chat_ui)
    translate_btn.click(lambda: preset_text("Translate to Spanish:"), None, chat_ui)
    code_help_btn.click(lambda: preset_text("Help with this code snippet:"), None, chat_ui)

    selector.change(lambda _: [], None, chat_ui)

demo.launch()