chatbot / app.py
lintangamr's picture
Update app.py
ecdb04d verified
# -*- coding: utf-8 -*-
import torch
import pandas as pd
import faiss
import gradio as gr
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModelForCausalLM
from rank_bm25 import BM25Okapi
# =========================
# LOAD DATA
# =========================
df = pd.read_csv("chunks.csv")
index = faiss.read_index("faiss_index.index")
# =========================
# EMBEDDING MODEL
# =========================
device = "cuda" if torch.cuda.is_available() else "cpu"
embed_model = SentenceTransformer(
"sentence-transformers/all-MiniLM-L6-v2",
device=device
)
# =========================
# BM25
# =========================
corpus = [text.split() for text in df["text"].tolist()]
bm25 = BM25Okapi(corpus)
# =========================
# HYBRID SEARCH (DIKECILKAN)
# =========================
def hybrid_search(query, top_k=3, alpha=0.6):
bm25_scores = bm25.get_scores(query.split())
query_emb = embed_model.encode(
query,
convert_to_tensor=True,
normalize_embeddings=True
)
sem_scores, indices = index.search(
query_emb.cpu().numpy().reshape(1, -1),
top_k
)
# ambil indeks global
idxs = indices[0]
df_temp = df.iloc[idxs].copy()
df_temp["bm25"] = [bm25_scores[i] for i in idxs]
df_temp["semantic"] = sem_scores[0]
return df_temp
# =========================
# LLM (BAHASALAB LOCAL)
# =========================
MODEL_ID = "Bahasalab/Bahasa-4b-chat-v2"
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
llm = AutoModelForCausalLM.from_pretrained(
MODEL_ID,
device_map="auto",
torch_dtype=torch.float16
)
# =========================
# PROMPT
# =========================
def build_prompt(query, context_df):
context = "\n\n".join(
f"(Hal {row['page_number']}) {row['text'][:500]}"
for _, row in context_df.iterrows()
)
return f"""
Jawab hanya berdasarkan konteks berikut.
Jika tidak ada, jawab: Informasi tidak ditemukan.
KONTEKS:
{context}
PERTANYAAN:
{query}
JAWABAN:
"""
# =========================
# GENERATE
# =========================
def ask(query):
results = hybrid_search(query)
if results.empty:
return "Informasi tidak ditemukan."
prompt = build_prompt(query, results)
inputs = tokenizer(prompt, return_tensors="pt")
inputs = {k: v.to(device) for k, v in inputs.items()}
with torch.no_grad():
output = llm.generate(
**inputs,
max_new_tokens=80, # diperkecil biar lebih cepat
temperature=0.3,
do_sample=False
)
decoded = tokenizer.decode(output[0], skip_special_tokens=True)
if "JAWABAN:" in decoded:
answer = decoded.split("JAWABAN:")[-1].strip()
else:
answer = decoded.strip()
sources = {
f"{r['source_file']} (Hal {r['page_number']})"
for _, r in results.iterrows()
}
return answer + "\n\n📚 Sumber:\n" + "\n".join(f"- {s}" for s in sources)
# =========================
# CHATBOT
# =========================
def chatbot_fn(message, history):
if history is None:
history = []
if not message.strip():
history.append({
"role": "assistant",
"content": "Silakan masukkan pertanyaan."
})
return "", history
history.append({
"role": "user",
"content": message
})
answer = ask(message)
history.append({
"role": "assistant",
"content": answer
})
return "", history
def clear_chat():
return [], ""
# =========================
# UI
# =========================
custom_css = """
/* ===== FORCE DARK GLOBAL ===== */
:root {
color-scheme: dark !important;
}
html, body, #root, .gradio-container {
background-color: #0f172a !important;
color: #e5e7eb !important;
margin: 0;
padding: 0;
}
/* Hilangkan efek light mode */
* {
background-color: transparent;
}
/* Container utama */
.gradio-container {
background-color: #0f172a !important;
max-width: 100% !important;
}
/* Wrapper */
.block-container {
max-width: 1200px;
margin: auto;
padding: 20px;
background-color: #0f172a !important;
}
/* ===== CHATBOT ===== */
[data-testid="chatbot"],
[data-testid="chatbot"] *,
.gr-chatbot,
.gr-chatbot * {
background-color: #111827 !important;
}
/* Scroll area */
.overflow-y-auto {
background-color: #111827 !important;
}
/* Bubble */
.message.bot {
background-color: #1f2937 !important;
color: #e5e7eb !important;
border-radius: 12px;
}
.message.user {
background-color: #2563eb !important;
color: white !important;
border-radius: 12px;
}
/* Textbox */
textarea {
background-color: #1f2937 !important;
color: white !important;
border-radius: 12px !important;
}
/* Input wrapper */
.gr-textbox,
.gr-input-container {
background-color: #0f172a !important;
}
/* Label */
.gr-textbox > label {
color: #cbd5e1 !important;
}
/* Button */
button {
background: linear-gradient(135deg, #2563eb, #1d4ed8) !important;
color: white !important;
border-radius: 10px !important;
padding: 10px 16px;
}
/* Text */
h1, h2, h3, p, label {
color: #e5e7eb !important;
}
/* Header */
#hero {
text-align: center;
margin-bottom: 20px;
}
#hero p {
color: #9ca3af;
}
/* Hilangkan outline */
*:focus {
outline: none !important;
}
/* ===== HILANGKAN HEADER GRADIO ===== */
footer,
.gradio-container .footer,
#footer,
.built-with,
a[href*="gradio.app"],
button[aria-label="Use via API"],
button[aria-label="Settings"] {
display: none !important;
}
"""
# =========================
# UI (FORCE DARK THEME)
# =========================
with gr.Blocks(
css=custom_css,
theme=gr.themes.Base(
primary_hue="blue",
neutral_hue="slate"
)
) as demo:
gr.HTML("""
<div id="hero">
<h1>🏛️ Chatbot Layanan Informasi DCKTRP DKI Jakarta</h1>
<p>
Chatbot yang dirancang untuk membantu pengguna mendapatkan
informasi seputar bangunan gedung, tata ruang, dan regulasi Dinas
Cipta Karya dan Tata Ruang DKI Jakarta.
</p>
</div>
""")
chatbot = gr.Chatbot(
height=500
)
state = gr.State([])
user_input = gr.Textbox(
label="Masukkan Pertanyaan",
placeholder="Tanyakan sesuatu...",
lines=2
)
with gr.Row():
send_btn = gr.Button("🚀 Kirim")
clear_btn = gr.Button("🧹 Bersihkan")
gr.Markdown("### 💡 Contoh Pertanyaan")
gr.Examples(
examples=[
["Apa itu bangunan gedung?"],
["Apa syarat mendirikan bangunan?"],
["Apa fungsi Persetujuan Bangunan Gedung?"],
["Apa pengertian tata ruang?"],
["Berapa lama proses Persetujuan Bangunan Gedung?"],
],
inputs=user_input
)
# EVENTS
send_btn.click(
fn=chatbot_fn,
inputs=[user_input, state],
outputs=[user_input, chatbot]
).then(
fn=lambda history: history,
inputs=chatbot,
outputs=state
)
user_input.submit(
fn=chatbot_fn,
inputs=[user_input, state],
outputs=[user_input, chatbot]
).then(
fn=lambda history: history,
inputs=chatbot,
outputs=state
)
clear_btn.click(
fn=clear_chat,
outputs=[chatbot, user_input]
).then(
fn=lambda: [],
outputs=state
)
# =========================
# LAUNCH
# =========================
demo.launch(server_name="0.0.0.0", server_port=7860)