Caca-Chatbot / app.py
Lyon28's picture
Create app.py
3ae8926 verified
raw
history blame
3.82 kB
import gradio as gr
import pickle
from rank_bm25 import BM25Okapi
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from difflib import SequenceMatcher
import numpy as np
import random
# Load model
print("πŸ€– Loading model...")
with open('chatbot_caca.pkl', 'rb') as f:
data = pickle.load(f)
qa_pairs = data['qa_pairs']
bm25 = data['bm25']
tfidf = data['tfidf']
tfidf_matrix = data['tfidf_matrix']
answers = data['answers']
print(f"βœ… Loaded {len(qa_pairs)} QA pairs")
def preprocess(text):
return text.lower().strip()
def get_bm25_score(user_input, top_k=3):
tokenized_query = preprocess(user_input).split()
scores = bm25.get_scores(tokenized_query)
top_indices = np.argsort(scores)[-top_k:][::-1]
return [(idx, scores[idx]) for idx in top_indices]
def get_tfidf_score(user_input, top_k=3):
user_vector = tfidf.transform([preprocess(user_input)])
similarities = cosine_similarity(user_vector, tfidf_matrix)[0]
top_indices = np.argsort(similarities)[-top_k:][::-1]
return [(idx, similarities[idx]) for idx in top_indices]
def get_fuzzy_score(user_input, candidate_idx):
question = qa_pairs[candidate_idx]['question']
return SequenceMatcher(None, preprocess(user_input), preprocess(question)).ratio()
def fallback_response(confidence=0.0):
if confidence > 0.15:
responses = [
"hmm kayaknya aku tau sih maksudmu, tapi ga terlalu yakin... coba tanya dengan kata lain? πŸ€”",
"aku nangkep sedikit sih, tapi ga confident buat jawab. bisa diperjelas ga?",
]
else:
responses = [
"waduh, pertanyaan ini di luar kemampuanku nih. Lyon-nya kurang ngajarin kayaknya πŸ™„",
"jujur aja ya, aku ga ngerti maksudmu πŸ˜‚ coba tanya yang lain deh",
"kayaknya pertanyaan ini terlalu advanced buat AI bernama Caca Kecil πŸ˜…",
"hmm aku belum tau jawabannya nih. Lyon-nya lagi males update dataset kayaknya 😀",
"maaf belum bisa jawab yang itu. tapi aku usahain belajar ya! *semangat meski nama ngaco*",
]
return random.choice(responses)
def chat(message, history):
"""Chat function untuk Gradio"""
# Get scores
bm25_results = get_bm25_score(message, top_k=3)
tfidf_results = get_tfidf_score(message, top_k=3)
# Combine scores
combined_scores = {}
for idx, score in bm25_results:
normalized_score = min(score / 20, 1.0)
combined_scores[idx] = combined_scores.get(idx, 0) + (normalized_score * 0.4)
for idx, score in tfidf_results:
combined_scores[idx] = combined_scores.get(idx, 0) + (score * 0.5)
if not combined_scores:
return fallback_response(0.0)
best_idx = max(combined_scores, key=combined_scores.get)
best_score = combined_scores[best_idx]
# Fuzzy bonus
fuzzy_score = get_fuzzy_score(message, best_idx)
final_score = best_score + (fuzzy_score * 0.1)
threshold = 0.25
if final_score >= threshold:
return answers[best_idx]
else:
return fallback_response(final_score)
# Create Gradio interface
demo = gr.ChatInterface(
fn=chat,
title="πŸ’¬ Chatbot Caca",
description="""
Chatbot berbasis retrieval (BM25 + TF-IDF) untuk QA Bahasa Indonesia.
**Fun fact:** AI ini namanya Caca Kecil karena creator-nya (Lyon) punya selera penamaan yang... unik πŸ˜‚
Model size: 2.83 MB | QA pairs: 3,500+ | No LLM needed!
""",
examples=[
"siapa nama kamu?",
"ceritakan tentang dirimu",
"siapa itu Lyon?",
"kenapa namamu Caca?",
"kamu bisa apa?",
],
theme="soft",
chatbot=gr.Chatbot(height=400),
)
if __name__ == "__main__":
demo.launch()