Spaces:
Sleeping
Sleeping
File size: 5,648 Bytes
dbf3dd6 c4cc939 95b8d85 c4cc939 95b8d85 c4cc939 230a87d dbf3dd6 95b8d85 dbf3dd6 c4cc939 95b8d85 c4cc939 95b8d85 c4cc939 95b8d85 c4cc939 95b8d85 230a87d dbf3dd6 95b8d85 dbf3dd6 c4cc939 95b8d85 c4cc939 95b8d85 c4cc939 95b8d85 c4cc939 95b8d85 230a87d dbf3dd6 95b8d85 dbf3dd6 c4cc939 95b8d85 c4cc939 95b8d85 c4cc939 95b8d85 2535b4c 95b8d85 c4cc939 95b8d85 c4cc939 95b8d85 c4cc939 95b8d85 c4cc939 95b8d85 dbf3dd6 95b8d85 dbf3dd6 c4cc939 ca59236 c4cc939 95b8d85 c4cc939 95b8d85 c4cc939 95b8d85 c4cc939 95b8d85 c4cc939 95b8d85 c4cc939 95b8d85 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 | import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import gradio as gr
# ==========================================
# 1. VERSI RINGAN: LOAD DATA (HANYA 1000 BUKU)
# ==========================================
def load_data():
books_url = "https://raw.githubusercontent.com/zygmuntz/goodbooks-10k/master/books.csv"
df = pd.read_csv(books_url, on_bad_lines='skip')
df = df[['book_id', 'original_title', 'authors', 'average_rating', 'image_url']].fillna('')
df = df[df['original_title'] != ''].reset_index(drop=True)
# PANGKAS MENJADI 1000 BUKU agar UI Gradio tidak lag / freeze
df = df.head(1000)
return df
df_books = load_data()
# ==========================================
# 2. NLP: TF-IDF & MATRIKS SIMILARITAS
# ==========================================
df_books['content'] = df_books['original_title'] + " " + df_books['authors']
tfidf = TfidfVectorizer(stop_words='english')
tfidf_matrix = tfidf.fit_transform(df_books['content'])
# Matriks ukuran 1000x1000 sangat ringan untuk memori RAM
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)
# Siapkan list dropdown (diambil dari 1000 buku tersebut)
book_list = sorted(df_books['original_title'].tolist())
# Batasi penulis unik hanya 100 orang agar dropdown ringan
author_list = sorted(list(set([a.split(',')[0].strip() for a in df_books['authors']])))[:100]
default_book = "The Hunger Games" if "The Hunger Games" in book_list else book_list[0]
default_author = "J.K. Rowling" if "J.K. Rowling" in author_list else author_list[0]
# ==========================================
# 3. RENDER HTML GALERI (Menampilkan hasil rekomendasi)
# ==========================================
def render_gallery(df_subset):
if df_subset.empty:
return "<p style='color:red;'>⚠️ Tidak ada rekomendasi.</p>"
html = "<div style='display: flex; gap: 15px; flex-wrap: wrap; justify-content: center; margin-top: 10px;'>"
for _, book in df_subset.iterrows():
html += f"""
<div style='width: 150px; background: white; border: 1px solid #ddd; border-radius: 8px; padding: 10px; text-align: center;'>
<img src='{book['image_url']}' style='width: 100px; height: 150px; object-fit: cover; border-radius: 4px; margin-bottom: 8px;' onerror="this.src='https://via.placeholder.com/100x150?text=No+Cover'">
<h4 style='font-size: 13px; margin: 0 0 4px 0; height: 35px; overflow: hidden;'>{book['original_title']}</h4>
<p style='font-size: 11px; color: gray; margin: 0 0 6px 0; height: 15px; overflow: hidden;'>{book['authors']}</p>
<span style='background: #eef2f5; font-size: 11px; padding: 3px 6px; border-radius: 10px;'>⭐ {book['average_rating']}</span>
</div>
"""
html += "</div>"
return html
# ==========================================
# 4. FUNGSI LOGIKA (Ringan & Cepat)
# ==========================================
def rec_tab1_nlp(book_title):
if not book_title: return ""
try: idx = df_books.index[df_books['original_title'] == book_title].tolist()[0]
except: return "⚠️ Buku tidak valid."
scores = sorted(list(enumerate(cosine_sim[idx])), key=lambda x: x[1], reverse=True)[1:6]
return render_gallery(df_books.iloc[[i[0] for i in scores]])
def rec_tab2_multi(selected_books):
if not selected_books: return "⚠️ Pilih minimal 1 buku."
indices = [df_books.index[df_books['original_title'] == t].tolist()[0] for t in selected_books if t in book_list]
if not indices: return "⚠️ Buku tidak valid."
# np.asarray mencegah error sklearn
mean_vector = np.asarray(tfidf_matrix[indices].mean(axis=0))
scores = sorted(list(enumerate(cosine_similarity(mean_vector, tfidf_matrix)[0])), key=lambda x: x[1], reverse=True)
rec_indices = [i for i, _ in scores if i not in indices][:5]
return render_gallery(df_books.iloc[rec_indices])
def rec_tab3_coldstart(author_name):
if not author_name: return ""
df_filtered = df_books[df_books['authors'].str.contains(author_name, case=False, na=False)]
return render_gallery(df_filtered.sort_values(by='average_rating', ascending=False).head(5))
# ==========================================
# 5. UI GRADIO (Tanpa event listener otomatis / app.load)
# ==========================================
with gr.Blocks(title="SmartLib AI") as app:
gr.Markdown("### 📚 SmartLib: AI Recommender ")
with gr.Tabs():
# TAB 1
with gr.TabItem("1. NLP (Satu Buku)"):
in_tab1 = gr.Dropdown(choices=book_list, value=default_book, label="Ketik Judul Buku", interactive=True)
btn_tab1 = gr.Button("Rekomendasikan", variant="primary")
out_tab1 = gr.HTML()
btn_tab1.click(rec_tab1_nlp, inputs=in_tab1, outputs=out_tab1)
# TAB 2
with gr.TabItem("2. Pola Pembaca (Multi-Buku)"):
in_tab2 = gr.Dropdown(choices=book_list, multiselect=True, label="Ketik & Pilih Beberapa Buku", interactive=True)
btn_tab2 = gr.Button("Analisis Pola", variant="primary")
out_tab2 = gr.HTML()
btn_tab2.click(rec_tab2_multi, inputs=in_tab2, outputs=out_tab2)
# TAB 3
with gr.TabItem("3. Pengguna Baru"):
in_tab3 = gr.Dropdown(choices=author_list, value=default_author, label="Ketik Penulis", interactive=True)
btn_tab3 = gr.Button("Buku Terbaik", variant="primary")
out_tab3 = gr.HTML()
btn_tab3.click(rec_tab3_coldstart, inputs=in_tab3, outputs=out_tab3)
app.launch() |