Spaces:
Sleeping
Sleeping
| import json, numpy as np, gradio as gr | |
| from sklearn.feature_extraction.text import TfidfVectorizer | |
| from sklearn.metrics.pairwise import linear_kernel | |
| from rapidfuzz import fuzz | |
| DATA_PATH = "data/iplm_qna.jsonl" | |
| def load_qa(path): | |
| qa_list = [] | |
| with open(path, "r", encoding="utf-8") as f: | |
| for line in f: | |
| obj = json.loads(line) | |
| q = (obj.get("question") or "").strip() | |
| qvars = obj.get("q_variants") or [] | |
| ans = (obj.get("answer") or "").strip() | |
| src = (obj.get("source") or "").strip() | |
| all_q = [q] + qvars | |
| for qq in all_q: | |
| if qq: | |
| qa_list.append({"question": qq, "answer": ans, "source": src}) | |
| return qa_list | |
| qa_data = load_qa(DATA_PATH) | |
| questions = [d["question"] for d in qa_data] | |
| vectorizer = TfidfVectorizer(analyzer="char_wb", ngram_range=(3,5), lowercase=True) | |
| X = vectorizer.fit_transform(questions) | |
| def search_answer(query, alpha=0.6, threshold=55): | |
| if not query.strip(): | |
| return "Silakan ketik pertanyaan." | |
| qv = vectorizer.transform([query]) | |
| cos = linear_kernel(qv, X).ravel() | |
| cos_scaled = 100 * (cos - cos.min()) / (cos.max() - cos.min() + 1e-12) | |
| idx = np.argsort(-cos)[:50] | |
| hybrid_scores = [] | |
| for j in idx: | |
| fz = fuzz.ratio(query.lower(), questions[j].lower()) | |
| score = alpha * cos_scaled[j] + (1 - alpha) * fz | |
| hybrid_scores.append((j, score)) | |
| hybrid_scores.sort(key=lambda x: -x[1]) | |
| best_idx, best_score = hybrid_scores[0] | |
| best = qa_data[best_idx] | |
| if best_score < threshold: | |
| suggestions = "\n".join([f"- {qa_data[i]['question']} (skor {round(s,1)})" for i, s in hybrid_scores[:3]]) | |
| return f"❓ Maaf, saya belum yakin.\nMungkin maksud Anda salah satu dari ini:\n{suggestions}" | |
| src = best.get("source", "") | |
| src_line = f"\n\n📚 Sumber: {src}" if src else "" | |
| return best['answer'] + src_line | |
| def chatbot_response(message, history): | |
| return search_answer(message) | |
| with gr.Blocks(title="IPLM FAQ Chatbot (Non‑LLM)") as demo: | |
| gr.Markdown("## 🤖 Chatbot IPLM (Non‑LLM)\nChatbot ini menjawab berdasarkan data Q&A IPLM resmi.") | |
| chat = gr.Chatbot(height=420, bubble_full_width=False, show_copy_button=True, show_label=False) | |
| with gr.Row(): | |
| user_input = gr.Textbox(label="Ketik pertanyaan…", placeholder="Contoh: Apa itu IPLM?", lines=2, scale=8) | |
| send_btn = gr.Button("💬 Kirim", scale=1) | |
| def on_submit(msg, chat_history): | |
| reply = chatbot_response(msg, chat_history) | |
| chat_history = chat_history + [(msg, reply)] | |
| return "", chat_history | |
| send_btn.click(on_submit, inputs=[user_input, chat], outputs=[user_input, chat]) | |
| user_input.submit(on_submit, inputs=[user_input, chat], outputs=[user_input, chat]) | |
| gr.Markdown("---\nDikembangkan dengan 💡 TF‑IDF + Fuzzy Matching (tanpa LLM).") | |
| demo.launch() | |