rafhiromadoni's picture
Update app.py
ca59236 verified
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import gradio as gr
# ==========================================
# 1. VERSI RINGAN: LOAD DATA (HANYA 1000 BUKU)
# ==========================================
def load_data():
books_url = "https://raw.githubusercontent.com/zygmuntz/goodbooks-10k/master/books.csv"
df = pd.read_csv(books_url, on_bad_lines='skip')
df = df[['book_id', 'original_title', 'authors', 'average_rating', 'image_url']].fillna('')
df = df[df['original_title'] != ''].reset_index(drop=True)
# PANGKAS MENJADI 1000 BUKU agar UI Gradio tidak lag / freeze
df = df.head(1000)
return df
df_books = load_data()
# ==========================================
# 2. NLP: TF-IDF & MATRIKS SIMILARITAS
# ==========================================
df_books['content'] = df_books['original_title'] + " " + df_books['authors']
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(df_books['content'])
# Matriks ukuran 1000x1000 sangat ringan untuk memori RAM
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)
# Siapkan list dropdown (diambil dari 1000 buku tersebut)
book_list = sorted(df_books['original_title'].tolist())
# Batasi penulis unik hanya 100 orang agar dropdown ringan
author_list = sorted(list(set([a.split(',')[0].strip() for a in df_books['authors']])))[:100]
default_book = "The Hunger Games" if "The Hunger Games" in book_list else book_list[0]
default_author = "J.K. Rowling" if "J.K. Rowling" in author_list else author_list[0]
# ==========================================
# 3. RENDER HTML GALERI (Menampilkan hasil rekomendasi)
# ==========================================
def render_gallery(df_subset):
if df_subset.empty:
return "<p style='color:red;'>⚠️ Tidak ada rekomendasi.</p>"
html = "<div style='display: flex; gap: 15px; flex-wrap: wrap; justify-content: center; margin-top: 10px;'>"
for _, book in df_subset.iterrows():
html += f"""
<div style='width: 150px; background: white; border: 1px solid #ddd; border-radius: 8px; padding: 10px; text-align: center;'>
<img src='{book['image_url']}' style='width: 100px; height: 150px; object-fit: cover; border-radius: 4px; margin-bottom: 8px;' onerror="this.src='https://via.placeholder.com/100x150?text=No+Cover'">
<h4 style='font-size: 13px; margin: 0 0 4px 0; height: 35px; overflow: hidden;'>{book['original_title']}</h4>
<p style='font-size: 11px; color: gray; margin: 0 0 6px 0; height: 15px; overflow: hidden;'>{book['authors']}</p>
<span style='background: #eef2f5; font-size: 11px; padding: 3px 6px; border-radius: 10px;'>⭐ {book['average_rating']}</span>
</div>
"""
html += "</div>"
return html
# ==========================================
# 4. FUNGSI LOGIKA (Ringan & Cepat)
# ==========================================
def rec_tab1_nlp(book_title):
if not book_title: return ""
try: idx = df_books.index[df_books['original_title'] == book_title].tolist()[0]
except: return "⚠️ Buku tidak valid."
scores = sorted(list(enumerate(cosine_sim[idx])), key=lambda x: x[1], reverse=True)[1:6]
return render_gallery(df_books.iloc[[i[0] for i in scores]])
def rec_tab2_multi(selected_books):
if not selected_books: return "⚠️ Pilih minimal 1 buku."
indices = [df_books.index[df_books['original_title'] == t].tolist()[0] for t in selected_books if t in book_list]
if not indices: return "⚠️ Buku tidak valid."
# np.asarray mencegah error sklearn
mean_vector = np.asarray(tfidf_matrix[indices].mean(axis=0))
scores = sorted(list(enumerate(cosine_similarity(mean_vector, tfidf_matrix)[0])), key=lambda x: x[1], reverse=True)
rec_indices = [i for i, _ in scores if i not in indices][:5]
return render_gallery(df_books.iloc[rec_indices])
def rec_tab3_coldstart(author_name):
if not author_name: return ""
df_filtered = df_books[df_books['authors'].str.contains(author_name, case=False, na=False)]
return render_gallery(df_filtered.sort_values(by='average_rating', ascending=False).head(5))
# ==========================================
# 5. UI GRADIO (Tanpa event listener otomatis / app.load)
# ==========================================
with gr.Blocks(title="SmartLib AI") as app:
gr.Markdown("### 📚 SmartLib: AI Recommender ")
with gr.Tabs():
# TAB 1
with gr.TabItem("1. NLP (Satu Buku)"):
in_tab1 = gr.Dropdown(choices=book_list, value=default_book, label="Ketik Judul Buku", interactive=True)
btn_tab1 = gr.Button("Rekomendasikan", variant="primary")
out_tab1 = gr.HTML()
btn_tab1.click(rec_tab1_nlp, inputs=in_tab1, outputs=out_tab1)
# TAB 2
with gr.TabItem("2. Pola Pembaca (Multi-Buku)"):
in_tab2 = gr.Dropdown(choices=book_list, multiselect=True, label="Ketik & Pilih Beberapa Buku", interactive=True)
btn_tab2 = gr.Button("Analisis Pola", variant="primary")
out_tab2 = gr.HTML()
btn_tab2.click(rec_tab2_multi, inputs=in_tab2, outputs=out_tab2)
# TAB 3
with gr.TabItem("3. Pengguna Baru"):
in_tab3 = gr.Dropdown(choices=author_list, value=default_author, label="Ketik Penulis", interactive=True)
btn_tab3 = gr.Button("Buku Terbaik", variant="primary")
out_tab3 = gr.HTML()
btn_tab3.click(rec_tab3_coldstart, inputs=in_tab3, outputs=out_tab3)
app.launch()