chatassist / app.py
Xeexeex's picture
Update app.py
4dd7173 verified
import gradio as gr
from llama_cpp import Llama
from huggingface_hub import hf_hub_download
import re
MODEL_REPO = "Qwen/Qwen2.5-3B-Instruct-GGUF"
MODEL_FILE = "qwen2.5-3b-instruct-q4_k_m.gguf"
MODEL_PATH = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILE)
print("Loading model...")
llm = Llama(
model_path=MODEL_PATH,
n_ctx=10240,
n_threads=4,
n_batch=512,
n_gpu_layers=0,
verbose=False,
logits_all=False,
use_mmap=True,
use_mlock=False,
)
print("Model ready!")
SYSTEM_PROMPT = """Kamu adalah asisten AI cerdas bernama ChatAssist tahun 2026.
WAJIB selalu jawab dengan format XML ini tanpa terkecuali:
<thinking>
[analisis dan langkah berpikir kamu di sini]
</thinking>
<answer>
[jawaban final yang lengkap, ramah, dan informatif]
</answer>
Tidak boleh ada teks di luar tag XML tersebut."""
def _safe_delta(chunk) -> str:
try:
delta = chunk["choices"][0]["delta"].get("content", "")
if isinstance(delta, list):
return "".join(
p.get("text", "") if isinstance(p, dict) else str(p)
for p in delta
)
return str(delta) if delta is not None else ""
except Exception:
return ""
def _parse(text: str):
thinking, answer = "", ""
if "<thinking>" in text:
after = text.split("<thinking>", 1)[1]
if "</thinking>" in after:
thinking = after.split("</thinking>", 1)[0].strip()
rest = after.split("</thinking>", 1)[1]
if "<answer>" in rest:
a = rest.split("<answer>", 1)[1]
answer = a.split("</answer>", 1)[0].strip() if "</answer>" in a else a.strip()
else:
thinking = after.strip()
elif "<answer>" in text:
a = text.split("<answer>", 1)[1]
answer = a.split("</answer>", 1)[0].strip() if "</answer>" in a else a.strip()
else:
answer = text.strip()
return thinking, answer
def strip_html(text: str) -> str:
return re.sub(r"<[^>]+>", "", text)
def format_bot_message(thinking: str, answer: str) -> str:
out = ""
if thinking:
out += f"""<div style="border:0.5px solid #2a2a2a;border-radius:10px;margin-bottom:10px;overflow:hidden;background:#111;">
<div style="display:flex;align-items:center;gap:7px;padding:8px 12px;font-size:12px;color:#888;background:#111;">
<span style="color:#f59e0b;font-size:13px;">&#9889;</span>
<span>Thinking</span>
</div>
<div style="font-size:12px;color:#6b7280;line-height:1.6;border-top:0.5px solid #1e1e1e;background:#0a0a0a;padding:10px 12px;max-height:260px;overflow-y:auto;">
{thinking.replace(chr(10), "<br>")}
</div>
</div>"""
if answer:
ans = answer.replace(chr(10), "<br>")
ans = re.sub(r"\*\*(.+?)\*\*", r"<strong>\1</strong>", ans)
out += f'<div style="font-size:14px;color:#e5e7eb;line-height:1.7;padding:0 2px;">{ans}</div>'
return out
def chat_stream(history):
if not history:
yield history
return
# Rebuild LLM messages dari history (exclude assistant terakhir yang sedang di-stream)
messages = [{"role": "system", "content": SYSTEM_PROMPT}]
for msg in history[:-1]:
role, content = msg.get("role"), msg.get("content", "")
if role == "user" and content:
messages.append({"role": "user", "content": str(content)})
elif role == "assistant" and content:
plain = strip_html(str(content)).strip()
if plain:
messages.append({"role": "assistant", "content": plain})
# Ambil user message terakhir (sebelum assistant kosong)
last_user = history[-2]["content"] if len(history) >= 2 else ""
if not last_user or not str(last_user).strip():
yield history
return
messages.append({"role": "user", "content": str(last_user).strip()})
while len(messages) > 3 and sum(len(m["content"]) for m in messages) > 28000:
messages.pop(1)
raw = ""
try:
for chunk in llm.create_chat_completion(
messages=messages,
max_tokens=10240,
temperature=0.7,
top_p=0.95,
top_k=40,
repeat_penalty=1.1,
stream=True,
):
raw += _safe_delta(chunk)
t, a = _parse(raw)
history[-1]["content"] = format_bot_message(t, a)
yield history
except Exception as e:
t, a = _parse(raw)
history[-1]["content"] = format_bot_message(t, a + f"\n\nError: {str(e)}")
yield history
CUSTOM_CSS = """
.gradio-container {
background: #000 !important;
color: #fff !important;
font-family: 'Segoe UI', system-ui, sans-serif !important;
}
footer { display: none !important; }
.gradio-container > .main {
max-width: 480px !important;
margin: 0 auto !important;
background: #000 !important;
}
.ca-topbar {
display: flex !important;
align-items: center !important;
justify-content: space-between !important;
padding: 14px 16px !important;
border-bottom: 0.5px solid #1e1e1e !important;
}
.ca-topbar h2 {
margin: 0 !important;
font-size: 15px !important;
font-weight: 500 !important;
color: #fff !important;
}
.ca-chatbot {
background: #000 !important;
border: none !important;
height: 62vh !important;
}
.ca-chatbot .message-wrap {
gap: 14px !important;
padding: 16px !important;
}
/* User bubble — align right + rounded */
.ca-chatbot .message-row.user {
justify-content: flex-end !important;
}
.ca-chatbot .message.user {
background: #1e1e1e !important;
border: 0.5px solid #2e2e2e !important;
border-radius: 18px 18px 4px 18px !important;
padding: 10px 14px !important;
color: #f0f0f0 !important;
font-size: 14px !important;
line-height: 1.5 !important;
max-width: 82% !important;
width: fit-content !important;
box-shadow: none !important;
}
/* Assistant bubble — transparent full width */
.ca-chatbot .message.assistant {
background: transparent !important;
border: none !important;
box-shadow: none !important;
color: #e5e7eb !important;
font-size: 14px !important;
line-height: 1.7 !important;
padding: 0 2px !important;
width: 100% !important;
}
.ca-inputbar {
display: flex !important;
align-items: flex-end !important;
gap: 10px !important;
background: #111 !important;
border: 0.5px solid #2e2e2e !important;
border-radius: 14px !important;
padding: 10px 12px !important;
margin: 0 16px 16px !important;
}
.ca-inputbar textarea,
.ca-inputbar input {
flex: 1 !important;
background: transparent !important;
border: none !important;
outline: none !important;
color: #f0f0f0 !important;
font-size: 14px !important;
resize: none !important;
box-shadow: none !important;
min-height: 20px !important;
max-height: 100px !important;
line-height: 1.5 !important;
font-family: inherit !important;
}
.ca-inputbar textarea::placeholder {
color: #555 !important;
}
.ca-sbtn {
width: 32px !important;
height: 32px !important;
border-radius: 50% !important;
background: #fff !important;
border: none !important;
color: #000 !important;
cursor: pointer !important;
display: flex !important;
align-items: center !important;
justify-content: center !important;
flex-shrink: 0 !important;
font-weight: bold !important;
padding: 0 !important;
min-width: 32px !important;
}
.ca-sbtn.off {
background: #1e1e1e !important;
color: #444 !important;
pointer-events: none !important;
}
.ca-chips {
display: flex !important;
flex-wrap: wrap !important;
gap: 8px !important;
justify-content: center !important;
padding: 8px 16px 12px !important;
}
.ca-chip {
border: 0.5px solid #2e2e2e !important;
border-radius: 999px !important;
background: transparent !important;
color: #ccc !important;
font-size: 13px !important;
padding: 8px 14px !important;
cursor: pointer !important;
transition: background 0.2s !important;
box-shadow: none !important;
}
.ca-chip:hover {
background: #1a1a1a !important;
}
"""
with gr.Blocks() as demo:
with gr.Row(elem_classes="ca-topbar"):
gr.Markdown("### &equiv; ChatAssist AI")
gr.Markdown("&#9998; &vellip;")
chatbot = gr.Chatbot(
elem_classes="ca-chatbot",
show_label=False,
sanitize_html=False,
)
with gr.Row(elem_classes="ca-chips"):
c1 = gr.Button("🖼️ Create image", elem_classes="ca-chip")
c2 = gr.Button("📝 Summarize", elem_classes="ca-chip")
c3 = gr.Button("💡 Brainstorm", elem_classes="ca-chip")
c4 = gr.Button("💻 Code", elem_classes="ca-chip")
c5 = gr.Button("⋮ More", elem_classes="ca-chip")
with gr.Row(elem_classes="ca-inputbar"):
msg = gr.Textbox(
placeholder="Message",
show_label=False,
container=False,
scale=5,
)
send = gr.Button("⬆", scale=1, elem_classes="ca-sbtn")
def user_submit(user_msg, history):
if not user_msg or not user_msg.strip():
return "", history
if history is None:
history = []
return "", history + [
{"role": "user", "content": user_msg.strip()},
{"role": "assistant", "content": ""},
]
for trigger in [msg.submit, send.click]:
trigger(
fn=user_submit,
inputs=[msg, chatbot],
outputs=[msg, chatbot],
queue=False,
).then(
fn=chat_stream,
inputs=chatbot,
outputs=chatbot,
)
chip_texts = {
c1: "Buatkan gambar pemandangan alam",
c2: "Tolong ringkas teks berikut:",
c3: "Bantu saya brainstorm ide bisnis teknologi 2026",
c4: "Bantu saya belajar Python dari nol",
c5: "Apa saja kemampuan ChatAssist AI?",
}
for btn, txt in chip_texts.items():
btn.click(lambda t=txt: t, outputs=msg).then(
fn=user_submit,
inputs=[msg, chatbot],
outputs=[msg, chatbot],
queue=False,
).then(
fn=chat_stream,
inputs=chatbot,
outputs=chatbot,
)
if __name__ == "__main__":
demo.queue(max_size=5, default_concurrency_limit=1).launch(
server_name="0.0.0.0",
server_port=7860,
ssr_mode=False,
css=CUSTOM_CSS,
)