Spaces:
Sleeping
Sleeping
| import torch | |
| from sentence_transformers import SentenceTransformer | |
| from datasets import load_dataset | |
| from sentence_transformers.util import cos_sim | |
| import gradio as gr | |
| # --- FIX: Dynamically select the device --- | |
| if torch.backends.mps.is_available(): | |
| device = torch.device("mps") | |
| print("MPS device is available. Using M1/M2 GPU!") | |
| elif torch.cuda.is_available(): | |
| device = torch.device("cuda") | |
| print("CUDA device is available. Using NVIDIA GPU!") | |
| else: | |
| device = torch.device("cpu") | |
| print("No GPU available. Falling back to CPU.") | |
| model_name = 'all-MiniLM-L6-v2' | |
| model = SentenceTransformer(model_name, device=device) | |
| print("Loading dataset...") | |
| ds = load_dataset("pszemraj/goodreads-bookgenres", "default") | |
| df = ds['train'].to_pandas() | |
| # --- FIX: Print columns to find the correct name --- | |
| print("Available columns:", df.columns.tolist()) | |
| # Drop rows with missing descriptions to avoid errors during encoding | |
| df.dropna(subset=['Description'], inplace=True) | |
| print("Dataset loaded and cleaned. Head of DataFrame:") | |
| print(df.head()) | |
| # --- 2. Generate Book Embeddings --- | |
| print("Generating book embeddings...") | |
| # Encode all descriptions at once for efficiency | |
| book_descriptions = df['Description'].tolist() | |
| book_embeddings = model.encode(book_descriptions, convert_to_tensor=True, show_progress_bar=True) | |
| print("Embeddings generated.") | |
| # --- 3. Define Recommendation Function --- | |
| def recommend_books(query, top_k=5): | |
| """ | |
| Finds and returns the top_k most similar books to a given query. | |
| """ | |
| query_embedding = model.encode(query, convert_to_tensor=True) | |
| # Calculate cosine similarity between the query and all book embeddings | |
| cosine_scores = cos_sim(query_embedding, book_embeddings) | |
| # Get the indices of the top k most similar books | |
| top_k_indices = torch.topk(cosine_scores, k=top_k)[1].squeeze() | |
| # --- FIX: Move the tensor to the CPU before using it with pandas --- | |
| top_k_indices_cpu = top_k_indices.cpu() | |
| # Use iloc to retrieve the corresponding book information from the DataFrame | |
| # Use iloc to retrieve the corresponding book information from the DataFrame | |
| # Use .tolist() to convert it into a simple Python list of integers | |
| recommended_books = df.iloc[top_k_indices_cpu.tolist()] | |
| return recommended_books[['Book', 'Description']] | |
| # --- 4. Define Gradio Interface and Launch --- | |
| print("Launching Gradio interface...") | |
| gr_interface = gr.Interface( | |
| fn=recommend_books, | |
| inputs=gr.Textbox(lines=2, placeholder="Enter a book topic, genre, or title..."), | |
| outputs=gr.Dataframe(headers=["Book", "Description"]), | |
| title="Book Recommendation System", | |
| description="Get book recommendations based on your query.", | |
| examples=["A thrilling detective story", "A heartwarming novel about friendship", "Science fiction about space travel"] | |
| ) | |
| gr_interface.launch(share=True) |