|
|
|
|
|
"""app.ipynb |
|
|
|
|
|
Automatically generated by Colab. |
|
|
|
|
|
Original file is located at |
|
|
https://colab.research.google.com/drive/1aiG3WNZzfqKvIQLQ8wVnGMoPiXIWvmNZ |
|
|
""" |
|
|
|
|
|
import pandas as pd |
|
|
from sentence_transformers import SentenceTransformer, util |
|
|
import gradio as gr |
|
|
import numpy as np |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CSV_FILE_NAME = "perpustakaan_faq (2).csv" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
MODEL_NAME = 'paraphrase-multilingual-mpnet-base-v2' |
|
|
|
|
|
|
|
|
try: |
|
|
print(f"Mengunduh dan memuat model Indo-friendly Sentence Transformer: {MODEL_NAME}...") |
|
|
|
|
|
|
|
|
model = SentenceTransformer(MODEL_NAME) |
|
|
print("Model berhasil dimuat.") |
|
|
except Exception as e: |
|
|
print(f"Error memuat model: {e}. Pastikan koneksi internet.") |
|
|
|
|
|
MODEL_NAME = 'all-MiniLM-L6-v2' |
|
|
print(f"Mencoba fallback ke model: {MODEL_NAME}") |
|
|
model = SentenceTransformer(MODEL_NAME) |
|
|
|
|
|
|
|
|
|
|
|
def load_and_preprocess_data(csv_data_path): |
|
|
"""Memuat data FAQ dari CSV dan menghasilkan embeddings.""" |
|
|
try: |
|
|
|
|
|
df = pd.read_csv(csv_data_path) |
|
|
print(f"Data berhasil dimuat. Total {len(df)} FAQ ditemukan.") |
|
|
|
|
|
|
|
|
df = df.rename(columns={'user_query': 'Question', 'chatbot_response': 'Answer'}) |
|
|
df['Question'] = df['Question'].astype(str).str.strip() |
|
|
df['Answer'] = df['Answer'].astype(str).str.strip() |
|
|
|
|
|
|
|
|
print("Mulai menghasilkan embeddings untuk pertanyaan FAQ dengan model Indo-friendly...") |
|
|
corpus_embeddings = model.encode(df['Question'].tolist(), convert_to_tensor=True) |
|
|
print("Embeddings selesai dibuat.") |
|
|
|
|
|
return df, corpus_embeddings |
|
|
except Exception as e: |
|
|
print(f"Error saat memproses data: {e}") |
|
|
return None, None |
|
|
|
|
|
|
|
|
df_faq, corpus_embeddings = load_and_preprocess_data(CSV_FILE_NAME) |
|
|
|
|
|
if df_faq is None: |
|
|
raise Exception("Gagal memuat atau memproses data FAQ. Chatbot tidak dapat dijalankan.") |
|
|
|
|
|
|
|
|
|
|
|
def library_chatbot(user_input, top_k=1, score_threshold=0.6): |
|
|
""" |
|
|
Fungsi inti chatbot: Menerima input pengguna, mencari pertanyaan paling mirip |
|
|
secara semantik (menggunakan IndoBERT-friendly embeddings), dan mengembalikan jawabannya. |
|
|
""" |
|
|
if not user_input: |
|
|
return "Silakan ajukan pertanyaan terkait perpustakaan!" |
|
|
|
|
|
|
|
|
query_embedding = model.encode(user_input, convert_to_tensor=True) |
|
|
|
|
|
|
|
|
|
|
|
hits = util.semantic_search(query_embedding, corpus_embeddings, top_k=top_k) |
|
|
|
|
|
|
|
|
best_hit = hits[0][0] |
|
|
|
|
|
|
|
|
score = best_hit['score'] |
|
|
|
|
|
if score >= score_threshold: |
|
|
|
|
|
index = best_hit['corpus_id'] |
|
|
|
|
|
response_answer = df_faq.iloc[index]['Answer'] |
|
|
|
|
|
return response_answer |
|
|
|
|
|
else: |
|
|
|
|
|
return ( |
|
|
"Mohon maaf, saya tidak menemukan jawaban yang cocok untuk pertanyaan Anda " |
|
|
"dalam data FAQ yang tersedia. Coba tanyakan dengan kata kunci yang berbeda " |
|
|
"atau hubungi staf perpustakaan secara langsung." |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
example_questions = [ |
|
|
"Saya mau pinjam buku, bagaimana prosedurnya?", |
|
|
"Apa syarat buat kartu anggota perpustakaan?", |
|
|
"Jam berapa perpustakaan ini mulai buka?", |
|
|
"Saya ingin menggunakan internet, apakah tersedia wifi gratis?", |
|
|
"Kalau telat mengembalikan, dendanya berapa?", |
|
|
] |
|
|
|
|
|
|
|
|
iface = gr.Interface( |
|
|
fn=library_chatbot, |
|
|
inputs=gr.Textbox(lines=2, placeholder="Ketik pertanyaan Anda tentang perpustakaan..."), |
|
|
outputs=gr.Textbox(label="Jawaban Chatbot"), |
|
|
title="📚 Chatbot FAQ Perpustakaan (IndoBERT-Friendly)", |
|
|
description=( |
|
|
f"Chatbot ini menggunakan model Semantic Search ({MODEL_NAME}) untuk menjawab pertanyaan Anda " |
|
|
"berdasarkan data FAQ perpustakaan. Tanyakan hal-hal terkait peminjaman, keanggotaan, atau layanan." |
|
|
), |
|
|
examples=example_questions |
|
|
) |
|
|
|
|
|
|
|
|
iface.launch() |