Lyon28 commited on
Commit
3ae8926
Β·
verified Β·
1 Parent(s): d646643

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +122 -0
app.py ADDED
@@ -0,0 +1,122 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pickle
3
+ from rank_bm25 import BM25Okapi
4
+ from sklearn.feature_extraction.text import TfidfVectorizer
5
+ from sklearn.metrics.pairwise import cosine_similarity
6
+ from difflib import SequenceMatcher
7
+ import numpy as np
8
+ import random
9
+
10
+ # Load model
11
+ print("πŸ€– Loading model...")
12
+ with open('chatbot_caca.pkl', 'rb') as f:
13
+ data = pickle.load(f)
14
+
15
+ qa_pairs = data['qa_pairs']
16
+ bm25 = data['bm25']
17
+ tfidf = data['tfidf']
18
+ tfidf_matrix = data['tfidf_matrix']
19
+ answers = data['answers']
20
+
21
+ print(f"βœ… Loaded {len(qa_pairs)} QA pairs")
22
+
23
+
24
+ def preprocess(text):
25
+ return text.lower().strip()
26
+
27
+
28
+ def get_bm25_score(user_input, top_k=3):
29
+ tokenized_query = preprocess(user_input).split()
30
+ scores = bm25.get_scores(tokenized_query)
31
+ top_indices = np.argsort(scores)[-top_k:][::-1]
32
+ return [(idx, scores[idx]) for idx in top_indices]
33
+
34
+
35
+ def get_tfidf_score(user_input, top_k=3):
36
+ user_vector = tfidf.transform([preprocess(user_input)])
37
+ similarities = cosine_similarity(user_vector, tfidf_matrix)[0]
38
+ top_indices = np.argsort(similarities)[-top_k:][::-1]
39
+ return [(idx, similarities[idx]) for idx in top_indices]
40
+
41
+
42
+ def get_fuzzy_score(user_input, candidate_idx):
43
+ question = qa_pairs[candidate_idx]['question']
44
+ return SequenceMatcher(None, preprocess(user_input), preprocess(question)).ratio()
45
+
46
+
47
+ def fallback_response(confidence=0.0):
48
+ if confidence > 0.15:
49
+ responses = [
50
+ "hmm kayaknya aku tau sih maksudmu, tapi ga terlalu yakin... coba tanya dengan kata lain? πŸ€”",
51
+ "aku nangkep sedikit sih, tapi ga confident buat jawab. bisa diperjelas ga?",
52
+ ]
53
+ else:
54
+ responses = [
55
+ "waduh, pertanyaan ini di luar kemampuanku nih. Lyon-nya kurang ngajarin kayaknya πŸ™„",
56
+ "jujur aja ya, aku ga ngerti maksudmu πŸ˜‚ coba tanya yang lain deh",
57
+ "kayaknya pertanyaan ini terlalu advanced buat AI bernama Caca Kecil πŸ˜…",
58
+ "hmm aku belum tau jawabannya nih. Lyon-nya lagi males update dataset kayaknya 😀",
59
+ "maaf belum bisa jawab yang itu. tapi aku usahain belajar ya! *semangat meski nama ngaco*",
60
+ ]
61
+ return random.choice(responses)
62
+
63
+
64
+ def chat(message, history):
65
+ """Chat function untuk Gradio"""
66
+
67
+ # Get scores
68
+ bm25_results = get_bm25_score(message, top_k=3)
69
+ tfidf_results = get_tfidf_score(message, top_k=3)
70
+
71
+ # Combine scores
72
+ combined_scores = {}
73
+
74
+ for idx, score in bm25_results:
75
+ normalized_score = min(score / 20, 1.0)
76
+ combined_scores[idx] = combined_scores.get(idx, 0) + (normalized_score * 0.4)
77
+
78
+ for idx, score in tfidf_results:
79
+ combined_scores[idx] = combined_scores.get(idx, 0) + (score * 0.5)
80
+
81
+ if not combined_scores:
82
+ return fallback_response(0.0)
83
+
84
+ best_idx = max(combined_scores, key=combined_scores.get)
85
+ best_score = combined_scores[best_idx]
86
+
87
+ # Fuzzy bonus
88
+ fuzzy_score = get_fuzzy_score(message, best_idx)
89
+ final_score = best_score + (fuzzy_score * 0.1)
90
+
91
+ threshold = 0.25
92
+
93
+ if final_score >= threshold:
94
+ return answers[best_idx]
95
+ else:
96
+ return fallback_response(final_score)
97
+
98
+
99
+ # Create Gradio interface
100
+ demo = gr.ChatInterface(
101
+ fn=chat,
102
+ title="πŸ’¬ Chatbot Caca",
103
+ description="""
104
+ Chatbot berbasis retrieval (BM25 + TF-IDF) untuk QA Bahasa Indonesia.
105
+
106
+ **Fun fact:** AI ini namanya Caca Kecil karena creator-nya (Lyon) punya selera penamaan yang... unik πŸ˜‚
107
+
108
+ Model size: 2.83 MB | QA pairs: 3,500+ | No LLM needed!
109
+ """,
110
+ examples=[
111
+ "siapa nama kamu?",
112
+ "ceritakan tentang dirimu",
113
+ "siapa itu Lyon?",
114
+ "kenapa namamu Caca?",
115
+ "kamu bisa apa?",
116
+ ],
117
+ theme="soft",
118
+ chatbot=gr.Chatbot(height=400),
119
+ )
120
+
121
+ if __name__ == "__main__":
122
+ demo.launch()