Spaces:

disha81
/

bookRecommendationSystem

Sleeping

App Files Files Community

bookRecommendationSystem / app.py

disha81

Update app.py

75596a7 verified 5 months ago

raw

history blame contribute delete

2.88 kB

	import torch
	from sentence_transformers import SentenceTransformer
	from datasets import load_dataset
	from sentence_transformers.util import cos_sim
	import gradio as gr

	# --- FIX: Dynamically select the device ---
	if torch.backends.mps.is_available():
	device = torch.device("mps")
	print("MPS device is available. Using M1/M2 GPU!")
	elif torch.cuda.is_available():
	device = torch.device("cuda")
	print("CUDA device is available. Using NVIDIA GPU!")
	else:
	device = torch.device("cpu")
	print("No GPU available. Falling back to CPU.")


	model_name = 'all-MiniLM-L6-v2'
	model = SentenceTransformer(model_name, device=device)
	print("Loading dataset...")

	ds = load_dataset("pszemraj/goodreads-bookgenres", "default")
	df = ds['train'].to_pandas()
	# --- FIX: Print columns to find the correct name ---
	print("Available columns:", df.columns.tolist())

	# Drop rows with missing descriptions to avoid errors during encoding
	df.dropna(subset=['Description'], inplace=True)
	print("Dataset loaded and cleaned. Head of DataFrame:")
	print(df.head())
	# --- 2. Generate Book Embeddings ---
	print("Generating book embeddings...")
	# Encode all descriptions at once for efficiency
	book_descriptions = df['Description'].tolist()
	book_embeddings = model.encode(book_descriptions, convert_to_tensor=True, show_progress_bar=True)
	print("Embeddings generated.")

	# --- 3. Define Recommendation Function ---

	def recommend_books(query, top_k=5):
	"""
	Finds and returns the top_k most similar books to a given query.
	"""
	query_embedding = model.encode(query, convert_to_tensor=True)
	# Calculate cosine similarity between the query and all book embeddings
	cosine_scores = cos_sim(query_embedding, book_embeddings)
	# Get the indices of the top k most similar books
	top_k_indices = torch.topk(cosine_scores, k=top_k)[1].squeeze()
	# --- FIX: Move the tensor to the CPU before using it with pandas ---
	top_k_indices_cpu = top_k_indices.cpu()

	# Use iloc to retrieve the corresponding book information from the DataFrame
	# Use iloc to retrieve the corresponding book information from the DataFrame
	# Use .tolist() to convert it into a simple Python list of integers
	recommended_books = df.iloc[top_k_indices_cpu.tolist()]
	return recommended_books[['Book', 'Description']]


	# --- 4. Define Gradio Interface and Launch ---
	print("Launching Gradio interface...")
	gr_interface = gr.Interface(
	fn=recommend_books,
	inputs=gr.Textbox(lines=2, placeholder="Enter a book topic, genre, or title..."),
	outputs=gr.Dataframe(headers=["Book", "Description"]),
	title="Book Recommendation System",
	description="Get book recommendations based on your query.",
	examples=["A thrilling detective story", "A heartwarming novel about friendship", "Science fiction about space travel"]
	)

	gr_interface.launch(share=True)