import os
import time
import gradio as gr
from llama_cpp import Llama
from huggingface_hub import hf_hub_download

# ----------------------------
# Model
# ----------------------------
model_path = hf_hub_download(
    repo_id="bartowski/Qwen2.5-Coder-0.5B-Instruct-abliterated-GGUF",
    filename="Qwen2.5-Coder-0.5B-Instruct-abliterated-f16.gguf"
)

llm = Llama(
    model_path=model_path,
    n_ctx=4096,
    n_threads=os.cpu_count(),
    n_batch=512,
    n_gpu_layers=0,
    verbose=False
)

llm("warmup", max_tokens=1)


# ----------------------------
# Code Explainer Prompt
# ----------------------------
SYSTEM_PROMPT = """
You are a senior software engineer and coding teacher.

Your job:
- Explain code clearly step by step
- Break complex logic into simple parts
- Use examples when needed
- If code has bugs, explain the bug and fix it
- Keep explanations beginner friendly but technical when needed
"""


# ----------------------------
# Chat function with typing animation
# ----------------------------
def generate_response(message, history):

    # 🔵 Fake "typing indicator"
    yield "🤖 Thinking..."
    time.sleep(0.6)

    prompt = f"<|im_start|>system\n{SYSTEM_PROMPT}<|im_end|>\n"

    for user, assistant in history:
        prompt += f"<|im_start|>user\n{user}<|im_end|>\n"
        prompt += f"<|im_start|>assistant\n{assistant}<|im_end|>\n"

    prompt += f"<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"

    stream = llm(
        prompt,
        max_tokens=1024,
        temperature=0.2,
        top_p=0.9,
        repeat_penalty=1.1,
        stream=True
    )

    output = ""
    for token in stream:
        output += token["choices"][0]["text"]
        yield output


# ----------------------------
# Mobile UI CSS + Animation
# ----------------------------
css = """
.gradio-container {
    max-width: 420px !important;
    margin: auto !important;
    height: 100vh;
    border-radius: 25px;
    overflow: hidden;
    background: #0b0f19;
    box-shadow: 0 10px 40px rgba(0,0,0,0.3);
}

/* User bubble */
.message.user {
    background: #2b6fff !important;
    color: white !important;
    border-radius: 18px 18px 4px 18px !important;
}

/* Bot bubble */
.message.bot {
    background: #1c1f2a !important;
    color: white !important;
    border-radius: 18px 18px 18px 4px !important;
}

/* Typing animation */
@keyframes blink {
    0% { opacity: 0.2; }
    50% { opacity: 1; }
    100% { opacity: 0.2; }
}

.typing {
    display: inline-block;
    font-size: 18px;
    animation: blink 1.2s infinite;
}

/* hide footer */
footer {display:none !important;}
"""


# ----------------------------
# UI
# ----------------------------
with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo:

    gr.HTML("""
    <div style="text-align:center; padding:10px;">
        <img src="https://huggingface.co/front/assets/huggingface_logo-noborder.svg"
             width="60"/>
        <h2 style="color:white;">Code Explainer AI</h2>
        <p style="color:gray; font-size:12px;">
            Explain • Debug • Learn code step by step
        </p>
    </div>
    """)

    gr.ChatInterface(
        fn=generate_response,
        chatbot=gr.Chatbot(height=600),
        textbox=gr.Textbox(
            placeholder="Paste code or ask for explanation...",
            container=False
        ),
    )


if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0", server_port=7860)