disha81's picture
Update app.py
75596a7 verified
import torch
from sentence_transformers import SentenceTransformer
from datasets import load_dataset
from sentence_transformers.util import cos_sim
import gradio as gr
# --- FIX: Dynamically select the device ---
if torch.backends.mps.is_available():
device = torch.device("mps")
print("MPS device is available. Using M1/M2 GPU!")
elif torch.cuda.is_available():
device = torch.device("cuda")
print("CUDA device is available. Using NVIDIA GPU!")
else:
device = torch.device("cpu")
print("No GPU available. Falling back to CPU.")
model_name = 'all-MiniLM-L6-v2'
model = SentenceTransformer(model_name, device=device)
print("Loading dataset...")
ds = load_dataset("pszemraj/goodreads-bookgenres", "default")
df = ds['train'].to_pandas()
# --- FIX: Print columns to find the correct name ---
print("Available columns:", df.columns.tolist())
# Drop rows with missing descriptions to avoid errors during encoding
df.dropna(subset=['Description'], inplace=True)
print("Dataset loaded and cleaned. Head of DataFrame:")
print(df.head())
# --- 2. Generate Book Embeddings ---
print("Generating book embeddings...")
# Encode all descriptions at once for efficiency
book_descriptions = df['Description'].tolist()
book_embeddings = model.encode(book_descriptions, convert_to_tensor=True, show_progress_bar=True)
print("Embeddings generated.")
# --- 3. Define Recommendation Function ---
def recommend_books(query, top_k=5):
"""
Finds and returns the top_k most similar books to a given query.
"""
query_embedding = model.encode(query, convert_to_tensor=True)
# Calculate cosine similarity between the query and all book embeddings
cosine_scores = cos_sim(query_embedding, book_embeddings)
# Get the indices of the top k most similar books
top_k_indices = torch.topk(cosine_scores, k=top_k)[1].squeeze()
# --- FIX: Move the tensor to the CPU before using it with pandas ---
top_k_indices_cpu = top_k_indices.cpu()
# Use iloc to retrieve the corresponding book information from the DataFrame
# Use iloc to retrieve the corresponding book information from the DataFrame
# Use .tolist() to convert it into a simple Python list of integers
recommended_books = df.iloc[top_k_indices_cpu.tolist()]
return recommended_books[['Book', 'Description']]
# --- 4. Define Gradio Interface and Launch ---
print("Launching Gradio interface...")
gr_interface = gr.Interface(
fn=recommend_books,
inputs=gr.Textbox(lines=2, placeholder="Enter a book topic, genre, or title..."),
outputs=gr.Dataframe(headers=["Book", "Description"]),
title="Book Recommendation System",
description="Get book recommendations based on your query.",
examples=["A thrilling detective story", "A heartwarming novel about friendship", "Science fiction about space travel"]
)
gr_interface.launch(share=True)