irhamni commited on
Commit
11c9fbc
·
verified ·
1 Parent(s): 4ef236a

Upload 4 files

Browse files
Files changed (4) hide show
  1. README.md +17 -13
  2. app.py +67 -0
  3. requirements.txt +4 -0
  4. runtime.txt +1 -0
README.md CHANGED
@@ -1,13 +1,17 @@
1
- ---
2
- title: Faq Chatbot
3
- emoji: 📊
4
- colorFrom: blue
5
- colorTo: gray
6
- sdk: gradio
7
- sdk_version: 5.49.1
8
- app_file: app.py
9
- pinned: false
10
- license: lgpl-2.1
11
- ---
12
-
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
1
+
2
+ # IPLM FAQ Chatbot (Non‑LLM)
3
+
4
+ A simple FAQ chatbot for IPLM using TF‑IDF + fuzzy matching. Built with **Gradio** (no LLM).
5
+
6
+ ## How to run locally
7
+ ```bash
8
+ pip install -r requirements.txt
9
+ python app.py
10
+ ```
11
+
12
+ ## Dataset
13
+ Put your JSONL under `data/iplm_qna.jsonl` with keys:
14
+ - `question`: main question
15
+ - `q_variants`: list of alternative phrasings (optional)
16
+ - `answer`: the short authoritative answer
17
+ - `source`: source string (optional)
app.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import json, numpy as np, gradio as gr
3
+ from sklearn.feature_extraction.text import TfidfVectorizer
4
+ from sklearn.metrics.pairwise import linear_kernel
5
+ from rapidfuzz import fuzz
6
+
7
+ DATA_PATH = "data/iplm_qna.jsonl"
8
+
9
+ def load_qa(path):
10
+ qa_list = []
11
+ with open(path, "r", encoding="utf-8") as f:
12
+ for line in f:
13
+ obj = json.loads(line)
14
+ q = (obj.get("question") or "").strip()
15
+ qvars = obj.get("q_variants") or []
16
+ ans = (obj.get("answer") or "").strip()
17
+ src = (obj.get("source") or "").strip()
18
+ all_q = [q] + qvars
19
+ for qq in all_q:
20
+ if qq:
21
+ qa_list.append({"question": qq, "answer": ans, "source": src})
22
+ return qa_list
23
+
24
+ qa_data = load_qa(DATA_PATH)
25
+ questions = [d["question"] for d in qa_data]
26
+ vectorizer = TfidfVectorizer(analyzer="char_wb", ngram_range=(3,5), lowercase=True)
27
+ X = vectorizer.fit_transform(questions)
28
+
29
+ def search_answer(query, alpha=0.6, threshold=55):
30
+ if not query.strip():
31
+ return "Silakan ketik pertanyaan."
32
+ qv = vectorizer.transform([query])
33
+ cos = linear_kernel(qv, X).ravel()
34
+ cos_scaled = 100 * (cos - cos.min()) / (cos.max() - cos.min() + 1e-12)
35
+ idx = np.argsort(-cos)[:50]
36
+ hybrid_scores = []
37
+ for j in idx:
38
+ fz = fuzz.ratio(query.lower(), questions[j].lower())
39
+ score = alpha * cos_scaled[j] + (1 - alpha) * fz
40
+ hybrid_scores.append((j, score))
41
+ hybrid_scores.sort(key=lambda x: -x[1])
42
+ best_idx, best_score = hybrid_scores[0]
43
+ best = qa_data[best_idx]
44
+ if best_score < threshold:
45
+ suggestions = "\n".join([f"- {qa_data[i]['question']} (skor {round(s,1)})" for i, s in hybrid_scores[:3]])
46
+ return f"❓ Maaf, saya belum yakin.\nMungkin maksud Anda salah satu dari ini:\n{suggestions}"
47
+ src = best.get("source", "")
48
+ src_line = f"\n\n📚 Sumber: {src}" if src else ""
49
+ return best['answer'] + src_line
50
+
51
+ def chatbot_response(message, history):
52
+ return search_answer(message)
53
+
54
+ with gr.Blocks(title="IPLM FAQ Chatbot (Non‑LLM)") as demo:
55
+ gr.Markdown("## 🤖 Chatbot IPLM (Non‑LLM)\nChatbot ini menjawab berdasarkan data Q&A IPLM resmi.")
56
+ chat = gr.Chatbot(height=420, bubble_full_width=False, show_copy_button=True, show_label=False)
57
+ with gr.Row():
58
+ user_input = gr.Textbox(label="Ketik pertanyaan…", placeholder="Contoh: Apa itu IPLM?", lines=2, scale=8)
59
+ send_btn = gr.Button("💬 Kirim", scale=1)
60
+ def on_submit(msg, chat_history):
61
+ reply = chatbot_response(msg, chat_history)
62
+ chat_history = chat_history + [(msg, reply)]
63
+ return "", chat_history
64
+ send_btn.click(on_submit, inputs=[user_input, chat], outputs=[user_input, chat])
65
+ user_input.submit(on_submit, inputs=[user_input, chat], outputs=[user_input, chat])
66
+ gr.Markdown("---\nDikembangkan dengan 💡 TF‑IDF + Fuzzy Matching (tanpa LLM).")
67
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ gradio>=4.0.0,<5
2
+ scikit-learn>=1.2
3
+ rapidfuzz>=3.0
4
+ numpy>=1.23
runtime.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ python-3.10