File size: 2,926 Bytes
11c9fbc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68

import json, numpy as np, gradio as gr
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
from rapidfuzz import fuzz

DATA_PATH = "data/iplm_qna.jsonl"

def load_qa(path):
    qa_list = []
    with open(path, "r", encoding="utf-8") as f:
        for line in f:
            obj = json.loads(line)
            q = (obj.get("question") or "").strip()
            qvars = obj.get("q_variants") or []
            ans = (obj.get("answer") or "").strip()
            src = (obj.get("source") or "").strip()
            all_q = [q] + qvars
            for qq in all_q:
                if qq:
                    qa_list.append({"question": qq, "answer": ans, "source": src})
    return qa_list

qa_data = load_qa(DATA_PATH)
questions = [d["question"] for d in qa_data]
vectorizer = TfidfVectorizer(analyzer="char_wb", ngram_range=(3,5), lowercase=True)
X = vectorizer.fit_transform(questions)

def search_answer(query, alpha=0.6, threshold=55):
    if not query.strip():
        return "Silakan ketik pertanyaan."
    qv = vectorizer.transform([query])
    cos = linear_kernel(qv, X).ravel()
    cos_scaled = 100 * (cos - cos.min()) / (cos.max() - cos.min() + 1e-12)
    idx = np.argsort(-cos)[:50]
    hybrid_scores = []
    for j in idx:
        fz = fuzz.ratio(query.lower(), questions[j].lower())
        score = alpha * cos_scaled[j] + (1 - alpha) * fz
        hybrid_scores.append((j, score))
    hybrid_scores.sort(key=lambda x: -x[1])
    best_idx, best_score = hybrid_scores[0]
    best = qa_data[best_idx]
    if best_score < threshold:
        suggestions = "\n".join([f"- {qa_data[i]['question']} (skor {round(s,1)})" for i, s in hybrid_scores[:3]])
        return f"❓ Maaf, saya belum yakin.\nMungkin maksud Anda salah satu dari ini:\n{suggestions}"
    src = best.get("source", "")
    src_line = f"\n\n📚 Sumber: {src}" if src else ""
    return best['answer'] + src_line

def chatbot_response(message, history):
    return search_answer(message)

with gr.Blocks(title="IPLM FAQ Chatbot (Non‑LLM)") as demo:
    gr.Markdown("## 🤖 Chatbot IPLM (Non‑LLM)\nChatbot ini menjawab berdasarkan data Q&A IPLM resmi.")
    chat = gr.Chatbot(height=420, bubble_full_width=False, show_copy_button=True, show_label=False)
    with gr.Row():
        user_input = gr.Textbox(label="Ketik pertanyaan…", placeholder="Contoh: Apa itu IPLM?", lines=2, scale=8)
        send_btn = gr.Button("💬 Kirim", scale=1)
    def on_submit(msg, chat_history):
        reply = chatbot_response(msg, chat_history)
        chat_history = chat_history + [(msg, reply)]
        return "", chat_history
    send_btn.click(on_submit, inputs=[user_input, chat], outputs=[user_input, chat])
    user_input.submit(on_submit, inputs=[user_input, chat], outputs=[user_input, chat])
    gr.Markdown("---\nDikembangkan dengan 💡 TF‑IDF + Fuzzy Matching (tanpa LLM).")
demo.launch()