Lyon28 commited on
Commit
21b8ac8
Β·
verified Β·
1 Parent(s): edb4be6

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -122
app.py DELETED
@@ -1,122 +0,0 @@
1
- import gradio as gr
2
- import pickle
3
- from rank_bm25 import BM25Okapi
4
- from sklearn.feature_extraction.text import TfidfVectorizer
5
- from sklearn.metrics.pairwise import cosine_similarity
6
- from difflib import SequenceMatcher
7
- import numpy as np
8
- import random
9
-
10
- # Load model
11
- print("πŸ€– Loading model...")
12
- with open('chatbot_caca.pkl', 'rb') as f:
13
- data = pickle.load(f)
14
-
15
- qa_pairs = data['qa_pairs']
16
- bm25 = data['bm25']
17
- tfidf = data['tfidf']
18
- tfidf_matrix = data['tfidf_matrix']
19
- answers = data['answers']
20
-
21
- print(f"βœ… Loaded {len(qa_pairs)} QA pairs")
22
-
23
-
24
- def preprocess(text):
25
- return text.lower().strip()
26
-
27
-
28
- def get_bm25_score(user_input, top_k=3):
29
- tokenized_query = preprocess(user_input).split()
30
- scores = bm25.get_scores(tokenized_query)
31
- top_indices = np.argsort(scores)[-top_k:][::-1]
32
- return [(idx, scores[idx]) for idx in top_indices]
33
-
34
-
35
- def get_tfidf_score(user_input, top_k=3):
36
- user_vector = tfidf.transform([preprocess(user_input)])
37
- similarities = cosine_similarity(user_vector, tfidf_matrix)[0]
38
- top_indices = np.argsort(similarities)[-top_k:][::-1]
39
- return [(idx, similarities[idx]) for idx in top_indices]
40
-
41
-
42
- def get_fuzzy_score(user_input, candidate_idx):
43
- question = qa_pairs[candidate_idx]['question']
44
- return SequenceMatcher(None, preprocess(user_input), preprocess(question)).ratio()
45
-
46
-
47
- def fallback_response(confidence=0.0):
48
- if confidence > 0.15:
49
- responses = [
50
- "hmm kayaknya aku tau sih maksudmu, tapi ga terlalu yakin... coba tanya dengan kata lain? πŸ€”",
51
- "aku nangkep sedikit sih, tapi ga confident buat jawab. bisa diperjelas ga?",
52
- ]
53
- else:
54
- responses = [
55
- "waduh, pertanyaan ini di luar kemampuanku nih. Lyon-nya kurang ngajarin kayaknya πŸ™„",
56
- "jujur aja ya, aku ga ngerti maksudmu πŸ˜‚ coba tanya yang lain deh",
57
- "kayaknya pertanyaan ini terlalu advanced buat AI bernama Caca Kecil πŸ˜…",
58
- "hmm aku belum tau jawabannya nih. Lyon-nya lagi males update dataset kayaknya 😀",
59
- "maaf belum bisa jawab yang itu. tapi aku usahain belajar ya! *semangat meski nama ngaco*",
60
- ]
61
- return random.choice(responses)
62
-
63
-
64
- def chat(message, history):
65
- """Chat function untuk Gradio"""
66
-
67
- # Get scores
68
- bm25_results = get_bm25_score(message, top_k=3)
69
- tfidf_results = get_tfidf_score(message, top_k=3)
70
-
71
- # Combine scores
72
- combined_scores = {}
73
-
74
- for idx, score in bm25_results:
75
- normalized_score = min(score / 20, 1.0)
76
- combined_scores[idx] = combined_scores.get(idx, 0) + (normalized_score * 0.4)
77
-
78
- for idx, score in tfidf_results:
79
- combined_scores[idx] = combined_scores.get(idx, 0) + (score * 0.5)
80
-
81
- if not combined_scores:
82
- return fallback_response(0.0)
83
-
84
- best_idx = max(combined_scores, key=combined_scores.get)
85
- best_score = combined_scores[best_idx]
86
-
87
- # Fuzzy bonus
88
- fuzzy_score = get_fuzzy_score(message, best_idx)
89
- final_score = best_score + (fuzzy_score * 0.1)
90
-
91
- threshold = 0.25
92
-
93
- if final_score >= threshold:
94
- return answers[best_idx]
95
- else:
96
- return fallback_response(final_score)
97
-
98
-
99
- # Create Gradio interface
100
- demo = gr.ChatInterface(
101
- fn=chat,
102
- title="πŸ’¬ Chatbot Caca",
103
- description="""
104
- Chatbot berbasis retrieval (BM25 + TF-IDF) untuk QA Bahasa Indonesia.
105
-
106
- **Fun fact:** AI ini namanya Caca Kecil karena creator-nya (Lyon) punya selera penamaan yang... unik πŸ˜‚
107
-
108
- Model size: 2.83 MB | QA pairs: 3,500+ | No LLM needed!
109
- """,
110
- examples=[
111
- "siapa nama kamu?",
112
- "ceritakan tentang dirimu",
113
- "siapa itu Lyon?",
114
- "kenapa namamu Caca?",
115
- "kamu bisa apa?",
116
- ],
117
- theme="soft",
118
- chatbot=gr.Chatbot(height=400),
119
- )
120
-
121
- if __name__ == "__main__":
122
- demo.launch()