Spaces:
Sleeping
Sleeping
| # -*- coding: utf-8 -*- | |
| import torch | |
| import pandas as pd | |
| import faiss | |
| import gradio as gr | |
| from sentence_transformers import SentenceTransformer | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| from rank_bm25 import BM25Okapi | |
| # ========================= | |
| # LOAD DATA | |
| # ========================= | |
| df = pd.read_csv("chunks.csv") | |
| index = faiss.read_index("faiss_index.index") | |
| # ========================= | |
| # EMBEDDING MODEL | |
| # ========================= | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| embed_model = SentenceTransformer( | |
| "sentence-transformers/all-MiniLM-L6-v2", | |
| device=device | |
| ) | |
| # ========================= | |
| # BM25 | |
| # ========================= | |
| corpus = [text.split() for text in df["text"].tolist()] | |
| bm25 = BM25Okapi(corpus) | |
| # ========================= | |
| # HYBRID SEARCH (DIKECILKAN) | |
| # ========================= | |
| def hybrid_search(query, top_k=3, alpha=0.6): | |
| bm25_scores = bm25.get_scores(query.split()) | |
| query_emb = embed_model.encode( | |
| query, | |
| convert_to_tensor=True, | |
| normalize_embeddings=True | |
| ) | |
| sem_scores, indices = index.search( | |
| query_emb.cpu().numpy().reshape(1, -1), | |
| top_k | |
| ) | |
| # ambil indeks global | |
| idxs = indices[0] | |
| df_temp = df.iloc[idxs].copy() | |
| df_temp["bm25"] = [bm25_scores[i] for i in idxs] | |
| df_temp["semantic"] = sem_scores[0] | |
| return df_temp | |
| # ========================= | |
| # LLM (BAHASALAB LOCAL) | |
| # ========================= | |
| MODEL_ID = "Bahasalab/Bahasa-4b-chat-v2" | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) | |
| llm = AutoModelForCausalLM.from_pretrained( | |
| MODEL_ID, | |
| device_map="auto", | |
| torch_dtype=torch.float16 | |
| ) | |
| # ========================= | |
| # PROMPT | |
| # ========================= | |
| def build_prompt(query, context_df): | |
| context = "\n\n".join( | |
| f"(Hal {row['page_number']}) {row['text'][:500]}" | |
| for _, row in context_df.iterrows() | |
| ) | |
| return f""" | |
| Jawab hanya berdasarkan konteks berikut. | |
| Jika tidak ada, jawab: Informasi tidak ditemukan. | |
| KONTEKS: | |
| {context} | |
| PERTANYAAN: | |
| {query} | |
| JAWABAN: | |
| """ | |
| # ========================= | |
| # GENERATE | |
| # ========================= | |
| def ask(query): | |
| results = hybrid_search(query) | |
| if results.empty: | |
| return "Informasi tidak ditemukan." | |
| prompt = build_prompt(query, results) | |
| inputs = tokenizer(prompt, return_tensors="pt") | |
| inputs = {k: v.to(device) for k, v in inputs.items()} | |
| with torch.no_grad(): | |
| output = llm.generate( | |
| **inputs, | |
| max_new_tokens=80, # diperkecil biar lebih cepat | |
| temperature=0.3, | |
| do_sample=False | |
| ) | |
| decoded = tokenizer.decode(output[0], skip_special_tokens=True) | |
| if "JAWABAN:" in decoded: | |
| answer = decoded.split("JAWABAN:")[-1].strip() | |
| else: | |
| answer = decoded.strip() | |
| sources = { | |
| f"{r['source_file']} (Hal {r['page_number']})" | |
| for _, r in results.iterrows() | |
| } | |
| return answer + "\n\n📚 Sumber:\n" + "\n".join(f"- {s}" for s in sources) | |
| # ========================= | |
| # CHATBOT | |
| # ========================= | |
| def chatbot_fn(message, history): | |
| if history is None: | |
| history = [] | |
| if not message.strip(): | |
| history.append({ | |
| "role": "assistant", | |
| "content": "Silakan masukkan pertanyaan." | |
| }) | |
| return "", history | |
| history.append({ | |
| "role": "user", | |
| "content": message | |
| }) | |
| answer = ask(message) | |
| history.append({ | |
| "role": "assistant", | |
| "content": answer | |
| }) | |
| return "", history | |
| def clear_chat(): | |
| return [], "" | |
| # ========================= | |
| # UI | |
| # ========================= | |
| custom_css = """ | |
| /* ===== FORCE DARK GLOBAL ===== */ | |
| :root { | |
| color-scheme: dark !important; | |
| } | |
| html, body, #root, .gradio-container { | |
| background-color: #0f172a !important; | |
| color: #e5e7eb !important; | |
| margin: 0; | |
| padding: 0; | |
| } | |
| /* Hilangkan efek light mode */ | |
| * { | |
| background-color: transparent; | |
| } | |
| /* Container utama */ | |
| .gradio-container { | |
| background-color: #0f172a !important; | |
| max-width: 100% !important; | |
| } | |
| /* Wrapper */ | |
| .block-container { | |
| max-width: 1200px; | |
| margin: auto; | |
| padding: 20px; | |
| background-color: #0f172a !important; | |
| } | |
| /* ===== CHATBOT ===== */ | |
| [data-testid="chatbot"], | |
| [data-testid="chatbot"] *, | |
| .gr-chatbot, | |
| .gr-chatbot * { | |
| background-color: #111827 !important; | |
| } | |
| /* Scroll area */ | |
| .overflow-y-auto { | |
| background-color: #111827 !important; | |
| } | |
| /* Bubble */ | |
| .message.bot { | |
| background-color: #1f2937 !important; | |
| color: #e5e7eb !important; | |
| border-radius: 12px; | |
| } | |
| .message.user { | |
| background-color: #2563eb !important; | |
| color: white !important; | |
| border-radius: 12px; | |
| } | |
| /* Textbox */ | |
| textarea { | |
| background-color: #1f2937 !important; | |
| color: white !important; | |
| border-radius: 12px !important; | |
| } | |
| /* Input wrapper */ | |
| .gr-textbox, | |
| .gr-input-container { | |
| background-color: #0f172a !important; | |
| } | |
| /* Label */ | |
| .gr-textbox > label { | |
| color: #cbd5e1 !important; | |
| } | |
| /* Button */ | |
| button { | |
| background: linear-gradient(135deg, #2563eb, #1d4ed8) !important; | |
| color: white !important; | |
| border-radius: 10px !important; | |
| padding: 10px 16px; | |
| } | |
| /* Text */ | |
| h1, h2, h3, p, label { | |
| color: #e5e7eb !important; | |
| } | |
| /* Header */ | |
| #hero { | |
| text-align: center; | |
| margin-bottom: 20px; | |
| } | |
| #hero p { | |
| color: #9ca3af; | |
| } | |
| /* Hilangkan outline */ | |
| *:focus { | |
| outline: none !important; | |
| } | |
| /* ===== HILANGKAN HEADER GRADIO ===== */ | |
| footer, | |
| .gradio-container .footer, | |
| #footer, | |
| .built-with, | |
| a[href*="gradio.app"], | |
| button[aria-label="Use via API"], | |
| button[aria-label="Settings"] { | |
| display: none !important; | |
| } | |
| """ | |
| # ========================= | |
| # UI (FORCE DARK THEME) | |
| # ========================= | |
| with gr.Blocks( | |
| css=custom_css, | |
| theme=gr.themes.Base( | |
| primary_hue="blue", | |
| neutral_hue="slate" | |
| ) | |
| ) as demo: | |
| gr.HTML(""" | |
| <div id="hero"> | |
| <h1>🏛️ Chatbot Layanan Informasi DCKTRP DKI Jakarta</h1> | |
| <p> | |
| Chatbot yang dirancang untuk membantu pengguna mendapatkan | |
| informasi seputar bangunan gedung, tata ruang, dan regulasi Dinas | |
| Cipta Karya dan Tata Ruang DKI Jakarta. | |
| </p> | |
| </div> | |
| """) | |
| chatbot = gr.Chatbot( | |
| height=500 | |
| ) | |
| state = gr.State([]) | |
| user_input = gr.Textbox( | |
| label="Masukkan Pertanyaan", | |
| placeholder="Tanyakan sesuatu...", | |
| lines=2 | |
| ) | |
| with gr.Row(): | |
| send_btn = gr.Button("🚀 Kirim") | |
| clear_btn = gr.Button("🧹 Bersihkan") | |
| gr.Markdown("### 💡 Contoh Pertanyaan") | |
| gr.Examples( | |
| examples=[ | |
| ["Apa itu bangunan gedung?"], | |
| ["Apa syarat mendirikan bangunan?"], | |
| ["Apa fungsi Persetujuan Bangunan Gedung?"], | |
| ["Apa pengertian tata ruang?"], | |
| ["Berapa lama proses Persetujuan Bangunan Gedung?"], | |
| ], | |
| inputs=user_input | |
| ) | |
| # EVENTS | |
| send_btn.click( | |
| fn=chatbot_fn, | |
| inputs=[user_input, state], | |
| outputs=[user_input, chatbot] | |
| ).then( | |
| fn=lambda history: history, | |
| inputs=chatbot, | |
| outputs=state | |
| ) | |
| user_input.submit( | |
| fn=chatbot_fn, | |
| inputs=[user_input, state], | |
| outputs=[user_input, chatbot] | |
| ).then( | |
| fn=lambda history: history, | |
| inputs=chatbot, | |
| outputs=state | |
| ) | |
| clear_btn.click( | |
| fn=clear_chat, | |
| outputs=[chatbot, user_input] | |
| ).then( | |
| fn=lambda: [], | |
| outputs=state | |
| ) | |
| # ========================= | |
| # LAUNCH | |
| # ========================= | |
| demo.launch(server_name="0.0.0.0", server_port=7860) |