Spaces:

Felix273
/

Book-Recommender-System

Sleeping

File size: 4,094 Bytes

import pandas as pd
import numpy as np
import gradio as gr

from langchain_community.document_loaders import TextLoader
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import CharacterTextSplitter
from langchain_chroma import Chroma

# =======================
# Load and preprocess books
# =======================
books = pd.read_csv("data/books_with_emotions.csv")

books["large_thumbnail"] = np.where(
    books["thumbnail"].notna(),
    books["thumbnail"] + "&fife=w800",
    "cover-not-found.jpg"
)

# =======================
# Prepare Chroma vector DB
# =======================
raw_documents = TextLoader("data/tagged_description.txt").load()
text_splitter = CharacterTextSplitter(chunk_size=1, chunk_overlap=0, separator="\n")
documents = text_splitter.split_documents(raw_documents)

db_books = Chroma.from_documents(documents, OpenAIEmbeddings())

# =======================
# Semantic retrieval logic
# =======================
def retrieve_semantic_recommendations(query: str,
                                      category: str = "All",
                                      tone: str = "All",
                                      initial_top_k: int = 50,
                                      final_top_k: int = 16) -> pd.DataFrame:
    """Truy xuất danh sách gợi ý dựa trên ngữ nghĩa, danh mục và cảm xúc."""

    recs = db_books.similarity_search(query, k=initial_top_k)
    books_list = [int(rec.page_content.strip('"').split()[0]) for rec in recs]

    # Lọc sách theo ISBN
    book_recs = books[books["isbn13"].isin(books_list)].head(initial_top_k)

    # Lọc theo category 
    if category != "All":
        book_recs = book_recs[book_recs["simple_categories"] == category]

    # Sắp xếp theo tone cảm xúc
    tone_sort_map = {
        "Happy": "joy",
        "Surprising": "surprise",
        "Angry": "anger",
        "Suspenseful": "fear",
        "Sad": "sadness"
    }
    if tone in tone_sort_map:
        book_recs = book_recs.sort_values(by=tone_sort_map[tone], ascending=False)

    return book_recs.head(final_top_k)

# =======================
# Recommendation formatting
# =======================
def recommend_books(query: str, category: str, tone: str):
    recommendations = retrieve_semantic_recommendations(query, category, tone)
    results = []

    for _, row in recommendations.iterrows():
        desc = row["description"].split()
        truncated_description = " ".join(desc[:30]) + "..."

        authors = row["authors"].split(";")
        if len(authors) == 1:
            authors_str = authors[0]
        elif len(authors) == 2:
            authors_str = f"{authors[0]} and {authors[1]}"
        else:
            authors_str = f"{', '.join(authors[:-1])}, and {authors[-1]}"

        caption = f"{row['title']} by {authors_str}: {truncated_description}"
        results.append((row["large_thumbnail"], caption))

    return results

# =======================
# Build Gradio dashboard
# =======================
categories = ["All"] + sorted(books["simple_categories"].unique())
tones = ["All", "Happy", "Surprising", "Angry", "Suspenseful", "Sad"]

with gr.Blocks() as dashboard:
    gr.Markdown("# 📚 Semantic Book Recommender")

    with gr.Row():
        user_query = gr.Textbox(
            label="Please enter a description of a book:",
            placeholder="e.g., A story about forgiveness"
        )
        category_dropdown = gr.Dropdown(
            choices=categories,
            label="Select a category:",
            value="All"
        )
        tone_dropdown = gr.Dropdown(
            choices=tones,
            label="Select an emotional tone:",
            value="All"
        )
        submit_button = gr.Button("Find recommendations")

    gr.Markdown("## 🧠 Recommendations")
    output = gr.Gallery(label="Recommended books", columns=8, rows=2)

    submit_button.click(fn=recommend_books,
                        inputs=[user_query, category_dropdown, tone_dropdown],
                        outputs=output)

if __name__ == "__main__":
    dashboard.launch()