import gradio as gr from llama_cpp import Llama from huggingface_hub import hf_hub_download import re MODEL_REPO = "Qwen/Qwen2.5-3B-Instruct-GGUF" MODEL_FILE = "qwen2.5-3b-instruct-q4_k_m.gguf" MODEL_PATH = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE) print("Loading model...") llm = Llama( model_path=MODEL_PATH, n_ctx=10240, n_threads=4, n_batch=512, n_gpu_layers=0, verbose=False, logits_all=False, use_mmap=True, use_mlock=False, ) print("Model ready!") SYSTEM_PROMPT = """Kamu adalah asisten AI cerdas bernama ChatAssist tahun 2026. WAJIB selalu jawab dengan format XML ini tanpa terkecuali: [analisis dan langkah berpikir kamu di sini] [jawaban final yang lengkap, ramah, dan informatif] Tidak boleh ada teks di luar tag XML tersebut.""" def _safe_delta(chunk) -> str: try: delta = chunk["choices"][0]["delta"].get("content", "") if isinstance(delta, list): return "".join( p.get("text", "") if isinstance(p, dict) else str(p) for p in delta ) return str(delta) if delta is not None else "" except Exception: return "" def _parse(text: str): thinking, answer = "", "" if "" in text: after = text.split("", 1)[1] if "" in after: thinking = after.split("", 1)[0].strip() rest = after.split("", 1)[1] if "" in rest: a = rest.split("", 1)[1] answer = a.split("", 1)[0].strip() if "" in a else a.strip() else: thinking = after.strip() elif "" in text: a = text.split("", 1)[1] answer = a.split("", 1)[0].strip() if "" in a else a.strip() else: answer = text.strip() return thinking, answer def strip_html(text: str) -> str: return re.sub(r"<[^>]+>", "", text) def format_bot_message(thinking: str, answer: str) -> str: out = "" if thinking: out += f"""
Thinking
{thinking.replace(chr(10), "
")}
""" if answer: ans = answer.replace(chr(10), "
") ans = re.sub(r"\*\*(.+?)\*\*", r"\1", ans) out += f'
{ans}
' return out def chat_stream(history): if not history: yield history return # Rebuild LLM messages dari history (exclude assistant terakhir yang sedang di-stream) messages = [{"role": "system", "content": SYSTEM_PROMPT}] for msg in history[:-1]: role, content = msg.get("role"), msg.get("content", "") if role == "user" and content: messages.append({"role": "user", "content": str(content)}) elif role == "assistant" and content: plain = strip_html(str(content)).strip() if plain: messages.append({"role": "assistant", "content": plain}) # Ambil user message terakhir (sebelum assistant kosong) last_user = history[-2]["content"] if len(history) >= 2 else "" if not last_user or not str(last_user).strip(): yield history return messages.append({"role": "user", "content": str(last_user).strip()}) while len(messages) > 3 and sum(len(m["content"]) for m in messages) > 28000: messages.pop(1) raw = "" try: for chunk in llm.create_chat_completion( messages=messages, max_tokens=10240, temperature=0.7, top_p=0.95, top_k=40, repeat_penalty=1.1, stream=True, ): raw += _safe_delta(chunk) t, a = _parse(raw) history[-1]["content"] = format_bot_message(t, a) yield history except Exception as e: t, a = _parse(raw) history[-1]["content"] = format_bot_message(t, a + f"\n\nError: {str(e)}") yield history CUSTOM_CSS = """ .gradio-container { background: #000 !important; color: #fff !important; font-family: 'Segoe UI', system-ui, sans-serif !important; } footer { display: none !important; } .gradio-container > .main { max-width: 480px !important; margin: 0 auto !important; background: #000 !important; } .ca-topbar { display: flex !important; align-items: center !important; justify-content: space-between !important; padding: 14px 16px !important; border-bottom: 0.5px solid #1e1e1e !important; } .ca-topbar h2 { margin: 0 !important; font-size: 15px !important; font-weight: 500 !important; color: #fff !important; } .ca-chatbot { background: #000 !important; border: none !important; height: 62vh !important; } .ca-chatbot .message-wrap { gap: 14px !important; padding: 16px !important; } /* User bubble — align right + rounded */ .ca-chatbot .message-row.user { justify-content: flex-end !important; } .ca-chatbot .message.user { background: #1e1e1e !important; border: 0.5px solid #2e2e2e !important; border-radius: 18px 18px 4px 18px !important; padding: 10px 14px !important; color: #f0f0f0 !important; font-size: 14px !important; line-height: 1.5 !important; max-width: 82% !important; width: fit-content !important; box-shadow: none !important; } /* Assistant bubble — transparent full width */ .ca-chatbot .message.assistant { background: transparent !important; border: none !important; box-shadow: none !important; color: #e5e7eb !important; font-size: 14px !important; line-height: 1.7 !important; padding: 0 2px !important; width: 100% !important; } .ca-inputbar { display: flex !important; align-items: flex-end !important; gap: 10px !important; background: #111 !important; border: 0.5px solid #2e2e2e !important; border-radius: 14px !important; padding: 10px 12px !important; margin: 0 16px 16px !important; } .ca-inputbar textarea, .ca-inputbar input { flex: 1 !important; background: transparent !important; border: none !important; outline: none !important; color: #f0f0f0 !important; font-size: 14px !important; resize: none !important; box-shadow: none !important; min-height: 20px !important; max-height: 100px !important; line-height: 1.5 !important; font-family: inherit !important; } .ca-inputbar textarea::placeholder { color: #555 !important; } .ca-sbtn { width: 32px !important; height: 32px !important; border-radius: 50% !important; background: #fff !important; border: none !important; color: #000 !important; cursor: pointer !important; display: flex !important; align-items: center !important; justify-content: center !important; flex-shrink: 0 !important; font-weight: bold !important; padding: 0 !important; min-width: 32px !important; } .ca-sbtn.off { background: #1e1e1e !important; color: #444 !important; pointer-events: none !important; } .ca-chips { display: flex !important; flex-wrap: wrap !important; gap: 8px !important; justify-content: center !important; padding: 8px 16px 12px !important; } .ca-chip { border: 0.5px solid #2e2e2e !important; border-radius: 999px !important; background: transparent !important; color: #ccc !important; font-size: 13px !important; padding: 8px 14px !important; cursor: pointer !important; transition: background 0.2s !important; box-shadow: none !important; } .ca-chip:hover { background: #1a1a1a !important; } """ with gr.Blocks() as demo: with gr.Row(elem_classes="ca-topbar"): gr.Markdown("### ≡ ChatAssist AI") gr.Markdown("✎ ⋮") chatbot = gr.Chatbot( elem_classes="ca-chatbot", show_label=False, sanitize_html=False, ) with gr.Row(elem_classes="ca-chips"): c1 = gr.Button("🖼️ Create image", elem_classes="ca-chip") c2 = gr.Button("📝 Summarize", elem_classes="ca-chip") c3 = gr.Button("💡 Brainstorm", elem_classes="ca-chip") c4 = gr.Button("💻 Code", elem_classes="ca-chip") c5 = gr.Button("⋮ More", elem_classes="ca-chip") with gr.Row(elem_classes="ca-inputbar"): msg = gr.Textbox( placeholder="Message", show_label=False, container=False, scale=5, ) send = gr.Button("⬆", scale=1, elem_classes="ca-sbtn") def user_submit(user_msg, history): if not user_msg or not user_msg.strip(): return "", history if history is None: history = [] return "", history + [ {"role": "user", "content": user_msg.strip()}, {"role": "assistant", "content": ""}, ] for trigger in [msg.submit, send.click]: trigger( fn=user_submit, inputs=[msg, chatbot], outputs=[msg, chatbot], queue=False, ).then( fn=chat_stream, inputs=chatbot, outputs=chatbot, ) chip_texts = { c1: "Buatkan gambar pemandangan alam", c2: "Tolong ringkas teks berikut:", c3: "Bantu saya brainstorm ide bisnis teknologi 2026", c4: "Bantu saya belajar Python dari nol", c5: "Apa saja kemampuan ChatAssist AI?", } for btn, txt in chip_texts.items(): btn.click(lambda t=txt: t, outputs=msg).then( fn=user_submit, inputs=[msg, chatbot], outputs=[msg, chatbot], queue=False, ).then( fn=chat_stream, inputs=chatbot, outputs=chatbot, ) if __name__ == "__main__": demo.queue(max_size=5, default_concurrency_limit=1).launch( server_name="0.0.0.0", server_port=7860, ssr_mode=False, css=CUSTOM_CSS, )