# -*- coding: utf-8 -*- import torch import pandas as pd import faiss import gradio as gr from sentence_transformers import SentenceTransformer from transformers import AutoTokenizer, AutoModelForCausalLM from rank_bm25 import BM25Okapi # ========================= # LOAD DATA # ========================= df = pd.read_csv("chunks.csv") index = faiss.read_index("faiss_index.index") # ========================= # EMBEDDING MODEL # ========================= device = "cuda" if torch.cuda.is_available() else "cpu" embed_model = SentenceTransformer( "sentence-transformers/all-MiniLM-L6-v2", device=device ) # ========================= # BM25 # ========================= corpus = [text.split() for text in df["text"].tolist()] bm25 = BM25Okapi(corpus) # ========================= # HYBRID SEARCH (DIKECILKAN) # ========================= def hybrid_search(query, top_k=3, alpha=0.6): bm25_scores = bm25.get_scores(query.split()) query_emb = embed_model.encode( query, convert_to_tensor=True, normalize_embeddings=True ) sem_scores, indices = index.search( query_emb.cpu().numpy().reshape(1, -1), top_k ) # ambil indeks global idxs = indices[0] df_temp = df.iloc[idxs].copy() df_temp["bm25"] = [bm25_scores[i] for i in idxs] df_temp["semantic"] = sem_scores[0] return df_temp # ========================= # LLM (BAHASALAB LOCAL) # ========================= MODEL_ID = "Bahasalab/Bahasa-4b-chat-v2" tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) llm = AutoModelForCausalLM.from_pretrained( MODEL_ID, device_map="auto", torch_dtype=torch.float16 ) # ========================= # PROMPT # ========================= def build_prompt(query, context_df): context = "\n\n".join( f"(Hal {row['page_number']}) {row['text'][:500]}" for _, row in context_df.iterrows() ) return f""" Jawab hanya berdasarkan konteks berikut. Jika tidak ada, jawab: Informasi tidak ditemukan. KONTEKS: {context} PERTANYAAN: {query} JAWABAN: """ # ========================= # GENERATE # ========================= def ask(query): results = hybrid_search(query) if results.empty: return "Informasi tidak ditemukan." prompt = build_prompt(query, results) inputs = tokenizer(prompt, return_tensors="pt") inputs = {k: v.to(device) for k, v in inputs.items()} with torch.no_grad(): output = llm.generate( **inputs, max_new_tokens=80, # diperkecil biar lebih cepat temperature=0.3, do_sample=False ) decoded = tokenizer.decode(output[0], skip_special_tokens=True) if "JAWABAN:" in decoded: answer = decoded.split("JAWABAN:")[-1].strip() else: answer = decoded.strip() sources = { f"{r['source_file']} (Hal {r['page_number']})" for _, r in results.iterrows() } return answer + "\n\n๐Ÿ“š Sumber:\n" + "\n".join(f"- {s}" for s in sources) # ========================= # CHATBOT # ========================= def chatbot_fn(message, history): if history is None: history = [] if not message.strip(): history.append({ "role": "assistant", "content": "Silakan masukkan pertanyaan." }) return "", history history.append({ "role": "user", "content": message }) answer = ask(message) history.append({ "role": "assistant", "content": answer }) return "", history def clear_chat(): return [], "" # ========================= # UI # ========================= custom_css = """ /* ===== FORCE DARK GLOBAL ===== */ :root { color-scheme: dark !important; } html, body, #root, .gradio-container { background-color: #0f172a !important; color: #e5e7eb !important; margin: 0; padding: 0; } /* Hilangkan efek light mode */ * { background-color: transparent; } /* Container utama */ .gradio-container { background-color: #0f172a !important; max-width: 100% !important; } /* Wrapper */ .block-container { max-width: 1200px; margin: auto; padding: 20px; background-color: #0f172a !important; } /* ===== CHATBOT ===== */ [data-testid="chatbot"], [data-testid="chatbot"] *, .gr-chatbot, .gr-chatbot * { background-color: #111827 !important; } /* Scroll area */ .overflow-y-auto { background-color: #111827 !important; } /* Bubble */ .message.bot { background-color: #1f2937 !important; color: #e5e7eb !important; border-radius: 12px; } .message.user { background-color: #2563eb !important; color: white !important; border-radius: 12px; } /* Textbox */ textarea { background-color: #1f2937 !important; color: white !important; border-radius: 12px !important; } /* Input wrapper */ .gr-textbox, .gr-input-container { background-color: #0f172a !important; } /* Label */ .gr-textbox > label { color: #cbd5e1 !important; } /* Button */ button { background: linear-gradient(135deg, #2563eb, #1d4ed8) !important; color: white !important; border-radius: 10px !important; padding: 10px 16px; } /* Text */ h1, h2, h3, p, label { color: #e5e7eb !important; } /* Header */ #hero { text-align: center; margin-bottom: 20px; } #hero p { color: #9ca3af; } /* Hilangkan outline */ *:focus { outline: none !important; } /* ===== HILANGKAN HEADER GRADIO ===== */ footer, .gradio-container .footer, #footer, .built-with, a[href*="gradio.app"], button[aria-label="Use via API"], button[aria-label="Settings"] { display: none !important; } """ # ========================= # UI (FORCE DARK THEME) # ========================= with gr.Blocks( css=custom_css, theme=gr.themes.Base( primary_hue="blue", neutral_hue="slate" ) ) as demo: gr.HTML("""

๐Ÿ›๏ธ Chatbot Layanan Informasi DCKTRP DKI Jakarta

Chatbot yang dirancang untuk membantu pengguna mendapatkan informasi seputar bangunan gedung, tata ruang, dan regulasi Dinas Cipta Karya dan Tata Ruang DKI Jakarta.

""") chatbot = gr.Chatbot( height=500 ) state = gr.State([]) user_input = gr.Textbox( label="Masukkan Pertanyaan", placeholder="Tanyakan sesuatu...", lines=2 ) with gr.Row(): send_btn = gr.Button("๐Ÿš€ Kirim") clear_btn = gr.Button("๐Ÿงน Bersihkan") gr.Markdown("### ๐Ÿ’ก Contoh Pertanyaan") gr.Examples( examples=[ ["Apa itu bangunan gedung?"], ["Apa syarat mendirikan bangunan?"], ["Apa fungsi Persetujuan Bangunan Gedung?"], ["Apa pengertian tata ruang?"], ["Berapa lama proses Persetujuan Bangunan Gedung?"], ], inputs=user_input ) # EVENTS send_btn.click( fn=chatbot_fn, inputs=[user_input, state], outputs=[user_input, chatbot] ).then( fn=lambda history: history, inputs=chatbot, outputs=state ) user_input.submit( fn=chatbot_fn, inputs=[user_input, state], outputs=[user_input, chatbot] ).then( fn=lambda history: history, inputs=chatbot, outputs=state ) clear_btn.click( fn=clear_chat, outputs=[chatbot, user_input] ).then( fn=lambda: [], outputs=state ) # ========================= # LAUNCH # ========================= demo.launch(server_name="0.0.0.0", server_port=7860)