import gradio as gr
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
import re

MODEL_REPO = "Qwen/Qwen2.5-3B-Instruct-GGUF"
MODEL_FILE = "qwen2.5-3b-instruct-q4_k_m.gguf"

MODEL_PATH = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE)
print("Loading model...")

llm = Llama(
    model_path=MODEL_PATH,
    n_ctx=10240,
    n_threads=4,
    n_batch=512,
    n_gpu_layers=0,
    verbose=False,
    logits_all=False,
    use_mmap=True,
    use_mlock=False,
)
print("Model ready!")

SYSTEM_PROMPT = """Kamu adalah asisten AI cerdas bernama ChatAssist tahun 2026.

WAJIB selalu jawab dengan format XML ini tanpa terkecuali:

<thinking>
[analisis dan langkah berpikir kamu di sini]
</thinking>
<answer>
[jawaban final yang lengkap, ramah, dan informatif]
</answer>

Tidak boleh ada teks di luar tag XML tersebut."""


def _safe_delta(chunk) -> str:
    try:
        delta = chunk["choices"][0]["delta"].get("content", "")
        if isinstance(delta, list):
            return "".join(
                p.get("text", "") if isinstance(p, dict) else str(p)
                for p in delta
            )
        return str(delta) if delta is not None else ""
    except Exception:
        return ""


def _parse(text: str):
    thinking, answer = "", ""
    if "<thinking>" in text:
        after = text.split("<thinking>", 1)[1]
        if "</thinking>" in after:
            thinking = after.split("</thinking>", 1)[0].strip()
            rest = after.split("</thinking>", 1)[1]
            if "<answer>" in rest:
                a = rest.split("<answer>", 1)[1]
                answer = a.split("</answer>", 1)[0].strip() if "</answer>" in a else a.strip()
        else:
            thinking = after.strip()
    elif "<answer>" in text:
        a = text.split("<answer>", 1)[1]
        answer = a.split("</answer>", 1)[0].strip() if "</answer>" in a else a.strip()
    else:
        answer = text.strip()
    return thinking, answer


def strip_html(text: str) -> str:
    return re.sub(r"<[^>]+>", "", text)


def format_bot_message(thinking: str, answer: str) -> str:
    out = ""
    if thinking:
        out += f"""<div style="border:0.5px solid #2a2a2a;border-radius:10px;margin-bottom:10px;overflow:hidden;background:#111;">
  <div style="display:flex;align-items:center;gap:7px;padding:8px 12px;font-size:12px;color:#888;background:#111;">
    <span style="color:#f59e0b;font-size:13px;">&#9889;</span>
    <span>Thinking</span>
  </div>
  <div style="font-size:12px;color:#6b7280;line-height:1.6;border-top:0.5px solid #1e1e1e;background:#0a0a0a;padding:10px 12px;max-height:260px;overflow-y:auto;">
    {thinking.replace(chr(10), "<br>")}
  </div>
</div>"""
    if answer:
        ans = answer.replace(chr(10), "<br>")
        ans = re.sub(r"\*\*(.+?)\*\*", r"<strong>\1</strong>", ans)
        out += f'<div style="font-size:14px;color:#e5e7eb;line-height:1.7;padding:0 2px;">{ans}</div>'
    return out


def chat_stream(history):
    if not history:
        yield history
        return

    # Rebuild LLM messages dari history (exclude assistant terakhir yang sedang di-stream)
    messages = [{"role": "system", "content": SYSTEM_PROMPT}]
    for msg in history[:-1]:
        role, content = msg.get("role"), msg.get("content", "")
        if role == "user" and content:
            messages.append({"role": "user", "content": str(content)})
        elif role == "assistant" and content:
            plain = strip_html(str(content)).strip()
            if plain:
                messages.append({"role": "assistant", "content": plain})

    # Ambil user message terakhir (sebelum assistant kosong)
    last_user = history[-2]["content"] if len(history) >= 2 else ""
    if not last_user or not str(last_user).strip():
        yield history
        return

    messages.append({"role": "user", "content": str(last_user).strip()})

    while len(messages) > 3 and sum(len(m["content"]) for m in messages) > 28000:
        messages.pop(1)

    raw = ""
    try:
        for chunk in llm.create_chat_completion(
            messages=messages,
            max_tokens=10240,
            temperature=0.7,
            top_p=0.95,
            top_k=40,
            repeat_penalty=1.1,
            stream=True,
        ):
            raw += _safe_delta(chunk)
            t, a = _parse(raw)
            history[-1]["content"] = format_bot_message(t, a)
            yield history
    except Exception as e:
        t, a = _parse(raw)
        history[-1]["content"] = format_bot_message(t, a + f"\n\nError: {str(e)}")
        yield history


CUSTOM_CSS = """
.gradio-container {
    background: #000 !important;
    color: #fff !important;
    font-family: 'Segoe UI', system-ui, sans-serif !important;
}
footer { display: none !important; }

.gradio-container > .main {
    max-width: 480px !important;
    margin: 0 auto !important;
    background: #000 !important;
}

.ca-topbar {
    display: flex !important;
    align-items: center !important;
    justify-content: space-between !important;
    padding: 14px 16px !important;
    border-bottom: 0.5px solid #1e1e1e !important;
}
.ca-topbar h2 {
    margin: 0 !important;
    font-size: 15px !important;
    font-weight: 500 !important;
    color: #fff !important;
}

.ca-chatbot {
    background: #000 !important;
    border: none !important;
    height: 62vh !important;
}
.ca-chatbot .message-wrap {
    gap: 14px !important;
    padding: 16px !important;
}

/* User bubble — align right + rounded */
.ca-chatbot .message-row.user {
    justify-content: flex-end !important;
}
.ca-chatbot .message.user {
    background: #1e1e1e !important;
    border: 0.5px solid #2e2e2e !important;
    border-radius: 18px 18px 4px 18px !important;
    padding: 10px 14px !important;
    color: #f0f0f0 !important;
    font-size: 14px !important;
    line-height: 1.5 !important;
    max-width: 82% !important;
    width: fit-content !important;
    box-shadow: none !important;
}

/* Assistant bubble — transparent full width */
.ca-chatbot .message.assistant {
    background: transparent !important;
    border: none !important;
    box-shadow: none !important;
    color: #e5e7eb !important;
    font-size: 14px !important;
    line-height: 1.7 !important;
    padding: 0 2px !important;
    width: 100% !important;
}

.ca-inputbar {
    display: flex !important;
    align-items: flex-end !important;
    gap: 10px !important;
    background: #111 !important;
    border: 0.5px solid #2e2e2e !important;
    border-radius: 14px !important;
    padding: 10px 12px !important;
    margin: 0 16px 16px !important;
}
.ca-inputbar textarea,
.ca-inputbar input {
    flex: 1 !important;
    background: transparent !important;
    border: none !important;
    outline: none !important;
    color: #f0f0f0 !important;
    font-size: 14px !important;
    resize: none !important;
    box-shadow: none !important;
    min-height: 20px !important;
    max-height: 100px !important;
    line-height: 1.5 !important;
    font-family: inherit !important;
}
.ca-inputbar textarea::placeholder {
    color: #555 !important;
}
.ca-sbtn {
    width: 32px !important;
    height: 32px !important;
    border-radius: 50% !important;
    background: #fff !important;
    border: none !important;
    color: #000 !important;
    cursor: pointer !important;
    display: flex !important;
    align-items: center !important;
    justify-content: center !important;
    flex-shrink: 0 !important;
    font-weight: bold !important;
    padding: 0 !important;
    min-width: 32px !important;
}
.ca-sbtn.off {
    background: #1e1e1e !important;
    color: #444 !important;
    pointer-events: none !important;
}

.ca-chips {
    display: flex !important;
    flex-wrap: wrap !important;
    gap: 8px !important;
    justify-content: center !important;
    padding: 8px 16px 12px !important;
}
.ca-chip {
    border: 0.5px solid #2e2e2e !important;
    border-radius: 999px !important;
    background: transparent !important;
    color: #ccc !important;
    font-size: 13px !important;
    padding: 8px 14px !important;
    cursor: pointer !important;
    transition: background 0.2s !important;
    box-shadow: none !important;
}
.ca-chip:hover {
    background: #1a1a1a !important;
}
"""


with gr.Blocks() as demo:
    with gr.Row(elem_classes="ca-topbar"):
        gr.Markdown("### &equiv;  ChatAssist AI")
        gr.Markdown("&#9998;  &vellip;")

    chatbot = gr.Chatbot(
        elem_classes="ca-chatbot",
        show_label=False,
        sanitize_html=False,
    )

    with gr.Row(elem_classes="ca-chips"):
        c1 = gr.Button("🖼️ Create image", elem_classes="ca-chip")
        c2 = gr.Button("📝 Summarize", elem_classes="ca-chip")
        c3 = gr.Button("💡 Brainstorm", elem_classes="ca-chip")
        c4 = gr.Button("💻 Code", elem_classes="ca-chip")
        c5 = gr.Button("⋮ More", elem_classes="ca-chip")

    with gr.Row(elem_classes="ca-inputbar"):
        msg = gr.Textbox(
            placeholder="Message",
            show_label=False,
            container=False,
            scale=5,
        )
        send = gr.Button("⬆", scale=1, elem_classes="ca-sbtn")

    def user_submit(user_msg, history):
        if not user_msg or not user_msg.strip():
            return "", history
        if history is None:
            history = []
        return "", history + [
            {"role": "user", "content": user_msg.strip()},
            {"role": "assistant", "content": ""},
        ]

    for trigger in [msg.submit, send.click]:
        trigger(
            fn=user_submit,
            inputs=[msg, chatbot],
            outputs=[msg, chatbot],
            queue=False,
        ).then(
            fn=chat_stream,
            inputs=chatbot,
            outputs=chatbot,
        )

    chip_texts = {
        c1: "Buatkan gambar pemandangan alam",
        c2: "Tolong ringkas teks berikut:",
        c3: "Bantu saya brainstorm ide bisnis teknologi 2026",
        c4: "Bantu saya belajar Python dari nol",
        c5: "Apa saja kemampuan ChatAssist AI?",
    }
    for btn, txt in chip_texts.items():
        btn.click(lambda t=txt: t, outputs=msg).then(
            fn=user_submit,
            inputs=[msg, chatbot],
            outputs=[msg, chatbot],
            queue=False,
        ).then(
            fn=chat_stream,
            inputs=chatbot,
            outputs=chatbot,
        )


if __name__ == "__main__":
    demo.queue(max_size=5, default_concurrency_limit=1).launch(
        server_name="0.0.0.0",
        server_port=7860,
        ssr_mode=False,
        css=CUSTOM_CSS,
    )