import pandas as pd import numpy as np from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics.pairwise import cosine_similarity import gradio as gr # ========================================== # 1. VERSI RINGAN: LOAD DATA (HANYA 1000 BUKU) # ========================================== def load_data(): books_url = "https://raw.githubusercontent.com/zygmuntz/goodbooks-10k/master/books.csv" df = pd.read_csv(books_url, on_bad_lines='skip') df = df[['book_id', 'original_title', 'authors', 'average_rating', 'image_url']].fillna('') df = df[df['original_title'] != ''].reset_index(drop=True) # PANGKAS MENJADI 1000 BUKU agar UI Gradio tidak lag / freeze df = df.head(1000) return df df_books = load_data() # ========================================== # 2. NLP: TF-IDF & MATRIKS SIMILARITAS # ========================================== df_books['content'] = df_books['original_title'] + " " + df_books['authors'] tfidf = TfidfVectorizer(stop_words='english') tfidf_matrix = tfidf.fit_transform(df_books['content']) # Matriks ukuran 1000x1000 sangat ringan untuk memori RAM cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix) # Siapkan list dropdown (diambil dari 1000 buku tersebut) book_list = sorted(df_books['original_title'].tolist()) # Batasi penulis unik hanya 100 orang agar dropdown ringan author_list = sorted(list(set([a.split(',')[0].strip() for a in df_books['authors']])))[:100] default_book = "The Hunger Games" if "The Hunger Games" in book_list else book_list[0] default_author = "J.K. Rowling" if "J.K. Rowling" in author_list else author_list[0] # ========================================== # 3. RENDER HTML GALERI (Menampilkan hasil rekomendasi) # ========================================== def render_gallery(df_subset): if df_subset.empty: return "

⚠️ Tidak ada rekomendasi.

" html = "
" for _, book in df_subset.iterrows(): html += f"""

{book['original_title']}

{book['authors']}

⭐ {book['average_rating']}
""" html += "
" return html # ========================================== # 4. FUNGSI LOGIKA (Ringan & Cepat) # ========================================== def rec_tab1_nlp(book_title): if not book_title: return "" try: idx = df_books.index[df_books['original_title'] == book_title].tolist()[0] except: return "⚠️ Buku tidak valid." scores = sorted(list(enumerate(cosine_sim[idx])), key=lambda x: x[1], reverse=True)[1:6] return render_gallery(df_books.iloc[[i[0] for i in scores]]) def rec_tab2_multi(selected_books): if not selected_books: return "⚠️ Pilih minimal 1 buku." indices = [df_books.index[df_books['original_title'] == t].tolist()[0] for t in selected_books if t in book_list] if not indices: return "⚠️ Buku tidak valid." # np.asarray mencegah error sklearn mean_vector = np.asarray(tfidf_matrix[indices].mean(axis=0)) scores = sorted(list(enumerate(cosine_similarity(mean_vector, tfidf_matrix)[0])), key=lambda x: x[1], reverse=True) rec_indices = [i for i, _ in scores if i not in indices][:5] return render_gallery(df_books.iloc[rec_indices]) def rec_tab3_coldstart(author_name): if not author_name: return "" df_filtered = df_books[df_books['authors'].str.contains(author_name, case=False, na=False)] return render_gallery(df_filtered.sort_values(by='average_rating', ascending=False).head(5)) # ========================================== # 5. UI GRADIO (Tanpa event listener otomatis / app.load) # ========================================== with gr.Blocks(title="SmartLib AI") as app: gr.Markdown("### 📚 SmartLib: AI Recommender ") with gr.Tabs(): # TAB 1 with gr.TabItem("1. NLP (Satu Buku)"): in_tab1 = gr.Dropdown(choices=book_list, value=default_book, label="Ketik Judul Buku", interactive=True) btn_tab1 = gr.Button("Rekomendasikan", variant="primary") out_tab1 = gr.HTML() btn_tab1.click(rec_tab1_nlp, inputs=in_tab1, outputs=out_tab1) # TAB 2 with gr.TabItem("2. Pola Pembaca (Multi-Buku)"): in_tab2 = gr.Dropdown(choices=book_list, multiselect=True, label="Ketik & Pilih Beberapa Buku", interactive=True) btn_tab2 = gr.Button("Analisis Pola", variant="primary") out_tab2 = gr.HTML() btn_tab2.click(rec_tab2_multi, inputs=in_tab2, outputs=out_tab2) # TAB 3 with gr.TabItem("3. Pengguna Baru"): in_tab3 = gr.Dropdown(choices=author_list, value=default_author, label="Ketik Penulis", interactive=True) btn_tab3 = gr.Button("Buku Terbaik", variant="primary") out_tab3 = gr.HTML() btn_tab3.click(rec_tab3_coldstart, inputs=in_tab3, outputs=out_tab3) app.launch()