Lyon28
/

Caca-Chatbot

@@ -1,122 +0,0 @@
-import gradio as gr
-import pickle
-from rank_bm25 import BM25Okapi
-from sklearn.feature_extraction.text import TfidfVectorizer
-from sklearn.metrics.pairwise import cosine_similarity
-from difflib import SequenceMatcher
-import numpy as np
-import random
-# Load model
-print("🤖 Loading model...")
-with open('chatbot_caca.pkl', 'rb') as f:
-    data = pickle.load(f)
-qa_pairs = data['qa_pairs']
-bm25 = data['bm25']
-tfidf = data['tfidf']
-tfidf_matrix = data['tfidf_matrix']
-answers = data['answers']
-print(f"✅ Loaded {len(qa_pairs)} QA pairs")
-def preprocess(text):
-    return text.lower().strip()
-def get_bm25_score(user_input, top_k=3):
-    tokenized_query = preprocess(user_input).split()
-    scores = bm25.get_scores(tokenized_query)
-    top_indices = np.argsort(scores)[-top_k:][::-1]
-    return [(idx, scores[idx]) for idx in top_indices]
-def get_tfidf_score(user_input, top_k=3):
-    user_vector = tfidf.transform([preprocess(user_input)])
-    similarities = cosine_similarity(user_vector, tfidf_matrix)[0]
-    top_indices = np.argsort(similarities)[-top_k:][::-1]
-    return [(idx, similarities[idx]) for idx in top_indices]
-def get_fuzzy_score(user_input, candidate_idx):
-    question = qa_pairs[candidate_idx]['question']
-    return SequenceMatcher(None, preprocess(user_input), preprocess(question)).ratio()
-def fallback_response(confidence=0.0):
-    if confidence > 0.15:
-        responses = [
-            "hmm kayaknya aku tau sih maksudmu, tapi ga terlalu yakin... coba tanya dengan kata lain? 🤔",
-            "aku nangkep sedikit sih, tapi ga confident buat jawab. bisa diperjelas ga?",
-        ]
-    else:
-        responses = [
-            "waduh, pertanyaan ini di luar kemampuanku nih. Lyon-nya kurang ngajarin kayaknya 🙄",
-            "jujur aja ya, aku ga ngerti maksudmu 😂 coba tanya yang lain deh",
-            "kayaknya pertanyaan ini terlalu advanced buat AI bernama Caca Kecil 😅",
-            "hmm aku belum tau jawabannya nih. Lyon-nya lagi males update dataset kayaknya 😤",
-            "maaf belum bisa jawab yang itu. tapi aku usahain belajar ya! *semangat meski nama ngaco*",
-        ]
-    return random.choice(responses)
-def chat(message, history):
-    """Chat function untuk Gradio"""
-    # Get scores
-    bm25_results = get_bm25_score(message, top_k=3)
-    tfidf_results = get_tfidf_score(message, top_k=3)
-    # Combine scores
-    combined_scores = {}
-    for idx, score in bm25_results:
-        normalized_score = min(score / 20, 1.0)
-        combined_scores[idx] = combined_scores.get(idx, 0) + (normalized_score * 0.4)
-    for idx, score in tfidf_results:
-        combined_scores[idx] = combined_scores.get(idx, 0) + (score * 0.5)
-    if not combined_scores:
-        return fallback_response(0.0)
-    best_idx = max(combined_scores, key=combined_scores.get)
-    best_score = combined_scores[best_idx]
-    # Fuzzy bonus
-    fuzzy_score = get_fuzzy_score(message, best_idx)
-    final_score = best_score + (fuzzy_score * 0.1)
-    threshold = 0.25
-    if final_score >= threshold:
-        return answers[best_idx]
-    else:
-        return fallback_response(final_score)
-# Create Gradio interface
-demo = gr.ChatInterface(
-    fn=chat,
-    title="💬 Chatbot Caca",
-    description="""
-    Chatbot berbasis retrieval (BM25 + TF-IDF) untuk QA Bahasa Indonesia.
-    **Fun fact:** AI ini namanya Caca Kecil karena creator-nya (Lyon) punya selera penamaan yang... unik 😂
-    Model size: 2.83 MB | QA pairs: 3,500+ | No LLM needed!
-    """,
-    examples=[
-        "siapa nama kamu?",
-        "ceritakan tentang dirimu",
-        "siapa itu Lyon?",
-        "kenapa namamu Caca?",
-        "kamu bisa apa?",
-    ],
-    theme="soft",
-    chatbot=gr.Chatbot(height=400),
-)
-if __name__ == "__main__":
-    demo.launch()