Spaces:
Sleeping
Sleeping
Upload 4 files
Browse files- README.md +17 -13
- app.py +67 -0
- requirements.txt +4 -0
- runtime.txt +1 -0
README.md
CHANGED
|
@@ -1,13 +1,17 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
# IPLM FAQ Chatbot (Non‑LLM)
|
| 3 |
+
|
| 4 |
+
A simple FAQ chatbot for IPLM using TF‑IDF + fuzzy matching. Built with **Gradio** (no LLM).
|
| 5 |
+
|
| 6 |
+
## How to run locally
|
| 7 |
+
```bash
|
| 8 |
+
pip install -r requirements.txt
|
| 9 |
+
python app.py
|
| 10 |
+
```
|
| 11 |
+
|
| 12 |
+
## Dataset
|
| 13 |
+
Put your JSONL under `data/iplm_qna.jsonl` with keys:
|
| 14 |
+
- `question`: main question
|
| 15 |
+
- `q_variants`: list of alternative phrasings (optional)
|
| 16 |
+
- `answer`: the short authoritative answer
|
| 17 |
+
- `source`: source string (optional)
|
app.py
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
|
| 2 |
+
import json, numpy as np, gradio as gr
|
| 3 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
| 4 |
+
from sklearn.metrics.pairwise import linear_kernel
|
| 5 |
+
from rapidfuzz import fuzz
|
| 6 |
+
|
| 7 |
+
DATA_PATH = "data/iplm_qna.jsonl"
|
| 8 |
+
|
| 9 |
+
def load_qa(path):
|
| 10 |
+
qa_list = []
|
| 11 |
+
with open(path, "r", encoding="utf-8") as f:
|
| 12 |
+
for line in f:
|
| 13 |
+
obj = json.loads(line)
|
| 14 |
+
q = (obj.get("question") or "").strip()
|
| 15 |
+
qvars = obj.get("q_variants") or []
|
| 16 |
+
ans = (obj.get("answer") or "").strip()
|
| 17 |
+
src = (obj.get("source") or "").strip()
|
| 18 |
+
all_q = [q] + qvars
|
| 19 |
+
for qq in all_q:
|
| 20 |
+
if qq:
|
| 21 |
+
qa_list.append({"question": qq, "answer": ans, "source": src})
|
| 22 |
+
return qa_list
|
| 23 |
+
|
| 24 |
+
qa_data = load_qa(DATA_PATH)
|
| 25 |
+
questions = [d["question"] for d in qa_data]
|
| 26 |
+
vectorizer = TfidfVectorizer(analyzer="char_wb", ngram_range=(3,5), lowercase=True)
|
| 27 |
+
X = vectorizer.fit_transform(questions)
|
| 28 |
+
|
| 29 |
+
def search_answer(query, alpha=0.6, threshold=55):
|
| 30 |
+
if not query.strip():
|
| 31 |
+
return "Silakan ketik pertanyaan."
|
| 32 |
+
qv = vectorizer.transform([query])
|
| 33 |
+
cos = linear_kernel(qv, X).ravel()
|
| 34 |
+
cos_scaled = 100 * (cos - cos.min()) / (cos.max() - cos.min() + 1e-12)
|
| 35 |
+
idx = np.argsort(-cos)[:50]
|
| 36 |
+
hybrid_scores = []
|
| 37 |
+
for j in idx:
|
| 38 |
+
fz = fuzz.ratio(query.lower(), questions[j].lower())
|
| 39 |
+
score = alpha * cos_scaled[j] + (1 - alpha) * fz
|
| 40 |
+
hybrid_scores.append((j, score))
|
| 41 |
+
hybrid_scores.sort(key=lambda x: -x[1])
|
| 42 |
+
best_idx, best_score = hybrid_scores[0]
|
| 43 |
+
best = qa_data[best_idx]
|
| 44 |
+
if best_score < threshold:
|
| 45 |
+
suggestions = "\n".join([f"- {qa_data[i]['question']} (skor {round(s,1)})" for i, s in hybrid_scores[:3]])
|
| 46 |
+
return f"❓ Maaf, saya belum yakin.\nMungkin maksud Anda salah satu dari ini:\n{suggestions}"
|
| 47 |
+
src = best.get("source", "")
|
| 48 |
+
src_line = f"\n\n📚 Sumber: {src}" if src else ""
|
| 49 |
+
return best['answer'] + src_line
|
| 50 |
+
|
| 51 |
+
def chatbot_response(message, history):
|
| 52 |
+
return search_answer(message)
|
| 53 |
+
|
| 54 |
+
with gr.Blocks(title="IPLM FAQ Chatbot (Non‑LLM)") as demo:
|
| 55 |
+
gr.Markdown("## 🤖 Chatbot IPLM (Non‑LLM)\nChatbot ini menjawab berdasarkan data Q&A IPLM resmi.")
|
| 56 |
+
chat = gr.Chatbot(height=420, bubble_full_width=False, show_copy_button=True, show_label=False)
|
| 57 |
+
with gr.Row():
|
| 58 |
+
user_input = gr.Textbox(label="Ketik pertanyaan…", placeholder="Contoh: Apa itu IPLM?", lines=2, scale=8)
|
| 59 |
+
send_btn = gr.Button("💬 Kirim", scale=1)
|
| 60 |
+
def on_submit(msg, chat_history):
|
| 61 |
+
reply = chatbot_response(msg, chat_history)
|
| 62 |
+
chat_history = chat_history + [(msg, reply)]
|
| 63 |
+
return "", chat_history
|
| 64 |
+
send_btn.click(on_submit, inputs=[user_input, chat], outputs=[user_input, chat])
|
| 65 |
+
user_input.submit(on_submit, inputs=[user_input, chat], outputs=[user_input, chat])
|
| 66 |
+
gr.Markdown("---\nDikembangkan dengan 💡 TF‑IDF + Fuzzy Matching (tanpa LLM).")
|
| 67 |
+
demo.launch()
|
requirements.txt
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio>=4.0.0,<5
|
| 2 |
+
scikit-learn>=1.2
|
| 3 |
+
rapidfuzz>=3.0
|
| 4 |
+
numpy>=1.23
|
runtime.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
python-3.10
|