Spaces:
Sleeping
Sleeping
Upload 2 files
Browse files- app.py +89 -0
- requirements.txt +3 -0
app.py
ADDED
|
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import numpy as np
|
| 3 |
+
from sklearn.feature_extraction.text import TfidfVectorizer
|
| 4 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
| 5 |
+
import gradio as gr
|
| 6 |
+
|
| 7 |
+
# 1. LOAD DATA DARI GITHUB (Otomatis tanpa perlu upload CSV)
|
| 8 |
+
@gr.utils.custom_cache
|
| 9 |
+
def load_data():
|
| 10 |
+
books_url = "https://raw.githubusercontent.com/zygmuntz/goodbooks-10k/master/books.csv"
|
| 11 |
+
df = pd.read_csv(books_url, on_bad_lines='skip')
|
| 12 |
+
# Ambil kolom yang penting & bersihkan data kosong
|
| 13 |
+
df = df[['book_id', 'original_title', 'authors', 'average_rating', 'image_url']].fillna('')
|
| 14 |
+
df = df[df['original_title'] != ''].reset_index(drop=True)
|
| 15 |
+
|
| 16 |
+
# Batasi 3000 buku agar aplikasi berjalan sangat cepat di Free Tier Hugging Face
|
| 17 |
+
df = df.head(3000)
|
| 18 |
+
return df
|
| 19 |
+
|
| 20 |
+
df_books = load_data()
|
| 21 |
+
|
| 22 |
+
# 2. NLP: CONTENT-BASED FILTERING (TF-IDF)
|
| 23 |
+
# Menggabungkan judul dan penulis sebagai "Konteks Teks" untuk dipahami AI
|
| 24 |
+
df_books['content'] = df_books['original_title'] + " " + df_books['authors']
|
| 25 |
+
tfidf = TfidfVectorizer(stop_words='english')
|
| 26 |
+
tfidf_matrix = tfidf.fit_transform(df_books['content'])
|
| 27 |
+
|
| 28 |
+
# Menghitung kemiripan antar buku menggunakan Cosine Similarity
|
| 29 |
+
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)
|
| 30 |
+
|
| 31 |
+
def recommend_books(book_title):
|
| 32 |
+
try:
|
| 33 |
+
# Cari ID buku berdasarkan judul
|
| 34 |
+
idx = df_books.index[df_books['original_title'] == book_title].tolist()[0]
|
| 35 |
+
except IndexError:
|
| 36 |
+
return "<p style='color:red;'>⚠️ Buku tidak ditemukan.</p>"
|
| 37 |
+
|
| 38 |
+
# Ambil skor kemiripan untuk buku tersebut
|
| 39 |
+
sim_scores = list(enumerate(cosine_sim[idx]))
|
| 40 |
+
# Urutkan dari yang paling mirip (Skor tertinggi)
|
| 41 |
+
sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
|
| 42 |
+
|
| 43 |
+
# Ambil Top 5 buku (Lewati index 0 karena itu adalah buku itu sendiri)
|
| 44 |
+
sim_scores = sim_scores[1:6]
|
| 45 |
+
book_indices = [i[0] for i in sim_scores]
|
| 46 |
+
|
| 47 |
+
# 3. UI/UX: INJEKSI HTML & CSS UNTUK TAMPILAN GALERI (Visual Wow Factor)
|
| 48 |
+
html_content = "<div style='display: flex; gap: 20px; flex-wrap: wrap; justify-content: center; margin-top: 20px;'>"
|
| 49 |
+
for i in book_indices:
|
| 50 |
+
book = df_books.iloc[i]
|
| 51 |
+
html_content += f"""
|
| 52 |
+
<div style='width: 160px; background: white; border: 1px solid #e0e0e0; border-radius: 12px; padding: 15px; text-align: center; box-shadow: 0 4px 6px rgba(0,0,0,0.05); transition: transform 0.2s;'>
|
| 53 |
+
<img src='{book['image_url']}' style='width: 120px; height: 170px; object-fit: cover; border-radius: 6px; margin-bottom: 12px; box-shadow: 0 2px 4px rgba(0,0,0,0.1);' onerror="this.src='https://via.placeholder.com/120x170?text=No+Cover'">
|
| 54 |
+
<h4 style='font-size: 14px; color: #333; margin: 0 0 5px 0; display: -webkit-box; -webkit-line-clamp: 2; -webkit-box-orient: vertical; overflow: hidden; height: 38px;' title="{book['original_title']}">{book['original_title']}</h4>
|
| 55 |
+
<p style='font-size: 12px; color: #7f8c8d; margin: 0 0 8px 0; white-space: nowrap; overflow: hidden; text-overflow: ellipsis;'>{book['authors']}</p>
|
| 56 |
+
<div style='background: #f1f8ff; color: #0366d6; padding: 4px 8px; border-radius: 20px; display: inline-block; font-size: 12px; font-weight: bold;'>
|
| 57 |
+
⭐ {book['average_rating']}
|
| 58 |
+
</div>
|
| 59 |
+
</div>
|
| 60 |
+
"""
|
| 61 |
+
html_content += "</div>"
|
| 62 |
+
return html_content
|
| 63 |
+
|
| 64 |
+
# Ambil daftar judul buku untuk dropdown (diurutkan abjad agar rapi)
|
| 65 |
+
book_list = sorted(df_books['original_title'].tolist())
|
| 66 |
+
|
| 67 |
+
# --- UI GRADIO ---
|
| 68 |
+
with gr.Blocks(theme=gr.themes.Soft(), title="SmartLib AI") as app:
|
| 69 |
+
gr.Markdown("<h1 style='text-align: center; color: #2c3e50;'>📚 SmartLib: AI-Powered Recommender</h1>")
|
| 70 |
+
gr.Markdown("<p style='text-align: center; font-size: 16px; color: #7f8c8d;'>Sistem rekomendasi buku cerdas berbasis <b>Machine Learning (NLP TF-IDF)</b>. AI akan menganalisis konteks judul dan gaya penulis untuk menemukan bacaan yang paling sesuai dengan selera Anda.</p>")
|
| 71 |
+
|
| 72 |
+
with gr.Row():
|
| 73 |
+
with gr.Column(scale=3):
|
| 74 |
+
in_book = gr.Dropdown(choices=book_list, value="Harry Potter and the Sorcerer's Stone",
|
| 75 |
+
label="🔍 Ketik atau Pilih Buku Favorit Anda", interactive=True)
|
| 76 |
+
with gr.Column(scale=1):
|
| 77 |
+
btn = gr.Button("✨ Berikan Rekomendasi", variant="primary")
|
| 78 |
+
|
| 79 |
+
gr.Markdown("### 🎯 Rekomendasi Teratas Untuk Anda:")
|
| 80 |
+
out_html = gr.HTML()
|
| 81 |
+
|
| 82 |
+
# Interaksi: Bisa dengan klik tombol, atau langsung ganti dropdown
|
| 83 |
+
btn.click(recommend_books, inputs=in_book, outputs=out_html)
|
| 84 |
+
in_book.change(recommend_books, inputs=in_book, outputs=out_html)
|
| 85 |
+
|
| 86 |
+
# Tampilan awal saat web dibuka
|
| 87 |
+
app.load(recommend_books, inputs=in_book, outputs=out_html)
|
| 88 |
+
|
| 89 |
+
app.launch()
|
requirements.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
pandas
|
| 2 |
+
scikit-learn
|
| 3 |
+
gradio
|