import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import gradio as gr
# ==========================================
# 1. VERSI RINGAN: LOAD DATA (HANYA 1000 BUKU)
# ==========================================
def load_data():
books_url = "https://raw.githubusercontent.com/zygmuntz/goodbooks-10k/master/books.csv"
df = pd.read_csv(books_url, on_bad_lines='skip')
df = df[['book_id', 'original_title', 'authors', 'average_rating', 'image_url']].fillna('')
df = df[df['original_title'] != ''].reset_index(drop=True)
# PANGKAS MENJADI 1000 BUKU agar UI Gradio tidak lag / freeze
df = df.head(1000)
return df
df_books = load_data()
# ==========================================
# 2. NLP: TF-IDF & MATRIKS SIMILARITAS
# ==========================================
df_books['content'] = df_books['original_title'] + " " + df_books['authors']
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(df_books['content'])
# Matriks ukuran 1000x1000 sangat ringan untuk memori RAM
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)
# Siapkan list dropdown (diambil dari 1000 buku tersebut)
book_list = sorted(df_books['original_title'].tolist())
# Batasi penulis unik hanya 100 orang agar dropdown ringan
author_list = sorted(list(set([a.split(',')[0].strip() for a in df_books['authors']])))[:100]
default_book = "The Hunger Games" if "The Hunger Games" in book_list else book_list[0]
default_author = "J.K. Rowling" if "J.K. Rowling" in author_list else author_list[0]
# ==========================================
# 3. RENDER HTML GALERI (Menampilkan hasil rekomendasi)
# ==========================================
def render_gallery(df_subset):
if df_subset.empty:
return "
⚠️ Tidak ada rekomendasi.
"
html = ""
for _, book in df_subset.iterrows():
html += f"""
{book['original_title']}
{book['authors']}
⭐ {book['average_rating']}
"""
html += "
"
return html
# ==========================================
# 4. FUNGSI LOGIKA (Ringan & Cepat)
# ==========================================
def rec_tab1_nlp(book_title):
if not book_title: return ""
try: idx = df_books.index[df_books['original_title'] == book_title].tolist()[0]
except: return "⚠️ Buku tidak valid."
scores = sorted(list(enumerate(cosine_sim[idx])), key=lambda x: x[1], reverse=True)[1:6]
return render_gallery(df_books.iloc[[i[0] for i in scores]])
def rec_tab2_multi(selected_books):
if not selected_books: return "⚠️ Pilih minimal 1 buku."
indices = [df_books.index[df_books['original_title'] == t].tolist()[0] for t in selected_books if t in book_list]
if not indices: return "⚠️ Buku tidak valid."
# np.asarray mencegah error sklearn
mean_vector = np.asarray(tfidf_matrix[indices].mean(axis=0))
scores = sorted(list(enumerate(cosine_similarity(mean_vector, tfidf_matrix)[0])), key=lambda x: x[1], reverse=True)
rec_indices = [i for i, _ in scores if i not in indices][:5]
return render_gallery(df_books.iloc[rec_indices])
def rec_tab3_coldstart(author_name):
if not author_name: return ""
df_filtered = df_books[df_books['authors'].str.contains(author_name, case=False, na=False)]
return render_gallery(df_filtered.sort_values(by='average_rating', ascending=False).head(5))
# ==========================================
# 5. UI GRADIO (Tanpa event listener otomatis / app.load)
# ==========================================
with gr.Blocks(title="SmartLib AI") as app:
gr.Markdown("### 📚 SmartLib: AI Recommender ")
with gr.Tabs():
# TAB 1
with gr.TabItem("1. NLP (Satu Buku)"):
in_tab1 = gr.Dropdown(choices=book_list, value=default_book, label="Ketik Judul Buku", interactive=True)
btn_tab1 = gr.Button("Rekomendasikan", variant="primary")
out_tab1 = gr.HTML()
btn_tab1.click(rec_tab1_nlp, inputs=in_tab1, outputs=out_tab1)
# TAB 2
with gr.TabItem("2. Pola Pembaca (Multi-Buku)"):
in_tab2 = gr.Dropdown(choices=book_list, multiselect=True, label="Ketik & Pilih Beberapa Buku", interactive=True)
btn_tab2 = gr.Button("Analisis Pola", variant="primary")
out_tab2 = gr.HTML()
btn_tab2.click(rec_tab2_multi, inputs=in_tab2, outputs=out_tab2)
# TAB 3
with gr.TabItem("3. Pengguna Baru"):
in_tab3 = gr.Dropdown(choices=author_list, value=default_author, label="Ketik Penulis", interactive=True)
btn_tab3 = gr.Button("Buku Terbaik", variant="primary")
out_tab3 = gr.HTML()
btn_tab3.click(rec_tab3_coldstart, inputs=in_tab3, outputs=out_tab3)
app.launch()