Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| import numpy as np | |
| from sklearn.feature_extraction.text import TfidfVectorizer | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| import gradio as gr | |
| # ========================================== | |
| # 1. VERSI RINGAN: LOAD DATA (HANYA 1000 BUKU) | |
| # ========================================== | |
| def load_data(): | |
| books_url = "https://raw.githubusercontent.com/zygmuntz/goodbooks-10k/master/books.csv" | |
| df = pd.read_csv(books_url, on_bad_lines='skip') | |
| df = df[['book_id', 'original_title', 'authors', 'average_rating', 'image_url']].fillna('') | |
| df = df[df['original_title'] != ''].reset_index(drop=True) | |
| # PANGKAS MENJADI 1000 BUKU agar UI Gradio tidak lag / freeze | |
| df = df.head(1000) | |
| return df | |
| df_books = load_data() | |
| # ========================================== | |
| # 2. NLP: TF-IDF & MATRIKS SIMILARITAS | |
| # ========================================== | |
| df_books['content'] = df_books['original_title'] + " " + df_books['authors'] | |
| tfidf = TfidfVectorizer(stop_words='english') | |
| tfidf_matrix = tfidf.fit_transform(df_books['content']) | |
| # Matriks ukuran 1000x1000 sangat ringan untuk memori RAM | |
| cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix) | |
| # Siapkan list dropdown (diambil dari 1000 buku tersebut) | |
| book_list = sorted(df_books['original_title'].tolist()) | |
| # Batasi penulis unik hanya 100 orang agar dropdown ringan | |
| author_list = sorted(list(set([a.split(',')[0].strip() for a in df_books['authors']])))[:100] | |
| default_book = "The Hunger Games" if "The Hunger Games" in book_list else book_list[0] | |
| default_author = "J.K. Rowling" if "J.K. Rowling" in author_list else author_list[0] | |
| # ========================================== | |
| # 3. RENDER HTML GALERI (Menampilkan hasil rekomendasi) | |
| # ========================================== | |
| def render_gallery(df_subset): | |
| if df_subset.empty: | |
| return "<p style='color:red;'>⚠️ Tidak ada rekomendasi.</p>" | |
| html = "<div style='display: flex; gap: 15px; flex-wrap: wrap; justify-content: center; margin-top: 10px;'>" | |
| for _, book in df_subset.iterrows(): | |
| html += f""" | |
| <div style='width: 150px; background: white; border: 1px solid #ddd; border-radius: 8px; padding: 10px; text-align: center;'> | |
| <img src='{book['image_url']}' style='width: 100px; height: 150px; object-fit: cover; border-radius: 4px; margin-bottom: 8px;' onerror="this.src='https://via.placeholder.com/100x150?text=No+Cover'"> | |
| <h4 style='font-size: 13px; margin: 0 0 4px 0; height: 35px; overflow: hidden;'>{book['original_title']}</h4> | |
| <p style='font-size: 11px; color: gray; margin: 0 0 6px 0; height: 15px; overflow: hidden;'>{book['authors']}</p> | |
| <span style='background: #eef2f5; font-size: 11px; padding: 3px 6px; border-radius: 10px;'>⭐ {book['average_rating']}</span> | |
| </div> | |
| """ | |
| html += "</div>" | |
| return html | |
| # ========================================== | |
| # 4. FUNGSI LOGIKA (Ringan & Cepat) | |
| # ========================================== | |
| def rec_tab1_nlp(book_title): | |
| if not book_title: return "" | |
| try: idx = df_books.index[df_books['original_title'] == book_title].tolist()[0] | |
| except: return "⚠️ Buku tidak valid." | |
| scores = sorted(list(enumerate(cosine_sim[idx])), key=lambda x: x[1], reverse=True)[1:6] | |
| return render_gallery(df_books.iloc[[i[0] for i in scores]]) | |
| def rec_tab2_multi(selected_books): | |
| if not selected_books: return "⚠️ Pilih minimal 1 buku." | |
| indices = [df_books.index[df_books['original_title'] == t].tolist()[0] for t in selected_books if t in book_list] | |
| if not indices: return "⚠️ Buku tidak valid." | |
| # np.asarray mencegah error sklearn | |
| mean_vector = np.asarray(tfidf_matrix[indices].mean(axis=0)) | |
| scores = sorted(list(enumerate(cosine_similarity(mean_vector, tfidf_matrix)[0])), key=lambda x: x[1], reverse=True) | |
| rec_indices = [i for i, _ in scores if i not in indices][:5] | |
| return render_gallery(df_books.iloc[rec_indices]) | |
| def rec_tab3_coldstart(author_name): | |
| if not author_name: return "" | |
| df_filtered = df_books[df_books['authors'].str.contains(author_name, case=False, na=False)] | |
| return render_gallery(df_filtered.sort_values(by='average_rating', ascending=False).head(5)) | |
| # ========================================== | |
| # 5. UI GRADIO (Tanpa event listener otomatis / app.load) | |
| # ========================================== | |
| with gr.Blocks(title="SmartLib AI") as app: | |
| gr.Markdown("### 📚 SmartLib: AI Recommender ") | |
| with gr.Tabs(): | |
| # TAB 1 | |
| with gr.TabItem("1. NLP (Satu Buku)"): | |
| in_tab1 = gr.Dropdown(choices=book_list, value=default_book, label="Ketik Judul Buku", interactive=True) | |
| btn_tab1 = gr.Button("Rekomendasikan", variant="primary") | |
| out_tab1 = gr.HTML() | |
| btn_tab1.click(rec_tab1_nlp, inputs=in_tab1, outputs=out_tab1) | |
| # TAB 2 | |
| with gr.TabItem("2. Pola Pembaca (Multi-Buku)"): | |
| in_tab2 = gr.Dropdown(choices=book_list, multiselect=True, label="Ketik & Pilih Beberapa Buku", interactive=True) | |
| btn_tab2 = gr.Button("Analisis Pola", variant="primary") | |
| out_tab2 = gr.HTML() | |
| btn_tab2.click(rec_tab2_multi, inputs=in_tab2, outputs=out_tab2) | |
| # TAB 3 | |
| with gr.TabItem("3. Pengguna Baru"): | |
| in_tab3 = gr.Dropdown(choices=author_list, value=default_author, label="Ketik Penulis", interactive=True) | |
| btn_tab3 = gr.Button("Buku Terbaik", variant="primary") | |
| out_tab3 = gr.HTML() | |
| btn_tab3.click(rec_tab3_coldstart, inputs=in_tab3, outputs=out_tab3) | |
| app.launch() |