Spaces:

Felix273
/

Book-Recommender-System

Sleeping

github-actions[bot]

Auto-deploy from GitHub Actions

9a5c160 5 months ago

4.09 kB

	import pandas as pd
	import numpy as np
	import gradio as gr

	from langchain_community.document_loaders import TextLoader
	from langchain_openai import OpenAIEmbeddings
	from langchain_text_splitters import CharacterTextSplitter
	from langchain_chroma import Chroma

	# =======================
	# Load and preprocess books
	# =======================
	books = pd.read_csv("data/books_with_emotions.csv")

	books["large_thumbnail"] = np.where(
	books["thumbnail"].notna(),
	books["thumbnail"] + "&fife=w800",
	"cover-not-found.jpg"
	)

	# =======================
	# Prepare Chroma vector DB
	# =======================
	raw_documents = TextLoader("data/tagged_description.txt").load()
	text_splitter = CharacterTextSplitter(chunk_size=1, chunk_overlap=0, separator="\n")
	documents = text_splitter.split_documents(raw_documents)

	db_books = Chroma.from_documents(documents, OpenAIEmbeddings())

	# =======================
	# Semantic retrieval logic
	# =======================
	def retrieve_semantic_recommendations(query: str,
	category: str = "All",
	tone: str = "All",
	initial_top_k: int = 50,
	final_top_k: int = 16) -> pd.DataFrame:
	"""Truy xuất danh sách gợi ý dựa trên ngữ nghĩa, danh mục và cảm xúc."""

	recs = db_books.similarity_search(query, k=initial_top_k)
	books_list = [int(rec.page_content.strip('"').split()[0]) for rec in recs]

	# Lọc sách theo ISBN
	book_recs = books[books["isbn13"].isin(books_list)].head(initial_top_k)

	# Lọc theo category
	if category != "All":
	book_recs = book_recs[book_recs["simple_categories"] == category]

	# Sắp xếp theo tone cảm xúc
	tone_sort_map = {
	"Happy": "joy",
	"Surprising": "surprise",
	"Angry": "anger",
	"Suspenseful": "fear",
	"Sad": "sadness"
	}
	if tone in tone_sort_map:
	book_recs = book_recs.sort_values(by=tone_sort_map[tone], ascending=False)

	return book_recs.head(final_top_k)

	# =======================
	# Recommendation formatting
	# =======================
	def recommend_books(query: str, category: str, tone: str):
	recommendations = retrieve_semantic_recommendations(query, category, tone)
	results = []

	for _, row in recommendations.iterrows():
	desc = row["description"].split()
	truncated_description = " ".join(desc[:30]) + "..."

	authors = row["authors"].split(";")
	if len(authors) == 1:
	authors_str = authors[0]
	elif len(authors) == 2:
	authors_str = f"{authors[0]} and {authors[1]}"
	else:
	authors_str = f"{', '.join(authors[:-1])}, and {authors[-1]}"

	caption = f"{row['title']} by {authors_str}: {truncated_description}"
	results.append((row["large_thumbnail"], caption))

	return results

	# =======================
	# Build Gradio dashboard
	# =======================
	categories = ["All"] + sorted(books["simple_categories"].unique())
	tones = ["All", "Happy", "Surprising", "Angry", "Suspenseful", "Sad"]

	with gr.Blocks() as dashboard:
	gr.Markdown("# 📚 Semantic Book Recommender")

	with gr.Row():
	user_query = gr.Textbox(
	label="Please enter a description of a book:",
	placeholder="e.g., A story about forgiveness"
	)
	category_dropdown = gr.Dropdown(
	choices=categories,
	label="Select a category:",
	value="All"
	)
	tone_dropdown = gr.Dropdown(
	choices=tones,
	label="Select an emotional tone:",
	value="All"
	)
	submit_button = gr.Button("Find recommendations")

	gr.Markdown("## 🧠 Recommendations")
	output = gr.Gallery(label="Recommended books", columns=8, rows=2)

	submit_button.click(fn=recommend_books,
	inputs=[user_query, category_dropdown, tone_dropdown],
	outputs=output)

	if __name__ == "__main__":
	dashboard.launch()