import json, numpy as np, gradio as gr from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics.pairwise import linear_kernel from rapidfuzz import fuzz DATA_PATH = "data/iplm_qna.jsonl" def load_qa(path): qa_list = [] with open(path, "r", encoding="utf-8") as f: for line in f: obj = json.loads(line) q = (obj.get("question") or "").strip() qvars = obj.get("q_variants") or [] ans = (obj.get("answer") or "").strip() src = (obj.get("source") or "").strip() all_q = [q] + qvars for qq in all_q: if qq: qa_list.append({"question": qq, "answer": ans, "source": src}) return qa_list qa_data = load_qa(DATA_PATH) questions = [d["question"] for d in qa_data] vectorizer = TfidfVectorizer(analyzer="char_wb", ngram_range=(3,5), lowercase=True) X = vectorizer.fit_transform(questions) def search_answer(query, alpha=0.6, threshold=55): if not query.strip(): return "Silakan ketik pertanyaan." qv = vectorizer.transform([query]) cos = linear_kernel(qv, X).ravel() cos_scaled = 100 * (cos - cos.min()) / (cos.max() - cos.min() + 1e-12) idx = np.argsort(-cos)[:50] hybrid_scores = [] for j in idx: fz = fuzz.ratio(query.lower(), questions[j].lower()) score = alpha * cos_scaled[j] + (1 - alpha) * fz hybrid_scores.append((j, score)) hybrid_scores.sort(key=lambda x: -x[1]) best_idx, best_score = hybrid_scores[0] best = qa_data[best_idx] if best_score < threshold: suggestions = "\n".join([f"- {qa_data[i]['question']} (skor {round(s,1)})" for i, s in hybrid_scores[:3]]) return f"❓ Maaf, saya belum yakin.\nMungkin maksud Anda salah satu dari ini:\n{suggestions}" src = best.get("source", "") src_line = f"\n\n📚 Sumber: {src}" if src else "" return best['answer'] + src_line def chatbot_response(message, history): return search_answer(message) with gr.Blocks(title="IPLM FAQ Chatbot (Non‑LLM)") as demo: gr.Markdown("## 🤖 Chatbot IPLM (Non‑LLM)\nChatbot ini menjawab berdasarkan data Q&A IPLM resmi.") chat = gr.Chatbot(height=420, bubble_full_width=False, show_copy_button=True, show_label=False) with gr.Row(): user_input = gr.Textbox(label="Ketik pertanyaan…", placeholder="Contoh: Apa itu IPLM?", lines=2, scale=8) send_btn = gr.Button("💬 Kirim", scale=1) def on_submit(msg, chat_history): reply = chatbot_response(msg, chat_history) chat_history = chat_history + [(msg, reply)] return "", chat_history send_btn.click(on_submit, inputs=[user_input, chat], outputs=[user_input, chat]) user_input.submit(on_submit, inputs=[user_input, chat], outputs=[user_input, chat]) gr.Markdown("---\nDikembangkan dengan 💡 TF‑IDF + Fuzzy Matching (tanpa LLM).") demo.launch()