Spaces:
Sleeping
Sleeping
File size: 4,094 Bytes
779489a 267ad08 779489a 633b888 779489a 9a5c160 779489a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 | import pandas as pd
import numpy as np
import gradio as gr
from langchain_community.document_loaders import TextLoader
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import CharacterTextSplitter
from langchain_chroma import Chroma
# =======================
# Load and preprocess books
# =======================
books = pd.read_csv("data/books_with_emotions.csv")
books["large_thumbnail"] = np.where(
books["thumbnail"].notna(),
books["thumbnail"] + "&fife=w800",
"cover-not-found.jpg"
)
# =======================
# Prepare Chroma vector DB
# =======================
raw_documents = TextLoader("data/tagged_description.txt").load()
text_splitter = CharacterTextSplitter(chunk_size=1, chunk_overlap=0, separator="\n")
documents = text_splitter.split_documents(raw_documents)
db_books = Chroma.from_documents(documents, OpenAIEmbeddings())
# =======================
# Semantic retrieval logic
# =======================
def retrieve_semantic_recommendations(query: str,
category: str = "All",
tone: str = "All",
initial_top_k: int = 50,
final_top_k: int = 16) -> pd.DataFrame:
"""Truy xuất danh sách gợi ý dựa trên ngữ nghĩa, danh mục và cảm xúc."""
recs = db_books.similarity_search(query, k=initial_top_k)
books_list = [int(rec.page_content.strip('"').split()[0]) for rec in recs]
# Lọc sách theo ISBN
book_recs = books[books["isbn13"].isin(books_list)].head(initial_top_k)
# Lọc theo category
if category != "All":
book_recs = book_recs[book_recs["simple_categories"] == category]
# Sắp xếp theo tone cảm xúc
tone_sort_map = {
"Happy": "joy",
"Surprising": "surprise",
"Angry": "anger",
"Suspenseful": "fear",
"Sad": "sadness"
}
if tone in tone_sort_map:
book_recs = book_recs.sort_values(by=tone_sort_map[tone], ascending=False)
return book_recs.head(final_top_k)
# =======================
# Recommendation formatting
# =======================
def recommend_books(query: str, category: str, tone: str):
recommendations = retrieve_semantic_recommendations(query, category, tone)
results = []
for _, row in recommendations.iterrows():
desc = row["description"].split()
truncated_description = " ".join(desc[:30]) + "..."
authors = row["authors"].split(";")
if len(authors) == 1:
authors_str = authors[0]
elif len(authors) == 2:
authors_str = f"{authors[0]} and {authors[1]}"
else:
authors_str = f"{', '.join(authors[:-1])}, and {authors[-1]}"
caption = f"{row['title']} by {authors_str}: {truncated_description}"
results.append((row["large_thumbnail"], caption))
return results
# =======================
# Build Gradio dashboard
# =======================
categories = ["All"] + sorted(books["simple_categories"].unique())
tones = ["All", "Happy", "Surprising", "Angry", "Suspenseful", "Sad"]
with gr.Blocks() as dashboard:
gr.Markdown("# 📚 Semantic Book Recommender")
with gr.Row():
user_query = gr.Textbox(
label="Please enter a description of a book:",
placeholder="e.g., A story about forgiveness"
)
category_dropdown = gr.Dropdown(
choices=categories,
label="Select a category:",
value="All"
)
tone_dropdown = gr.Dropdown(
choices=tones,
label="Select an emotional tone:",
value="All"
)
submit_button = gr.Button("Find recommendations")
gr.Markdown("## 🧠 Recommendations")
output = gr.Gallery(label="Recommended books", columns=8, rows=2)
submit_button.click(fn=recommend_books,
inputs=[user_query, category_dropdown, tone_dropdown],
outputs=output)
if __name__ == "__main__":
dashboard.launch()
|