File size: 4,094 Bytes
779489a
 
 
 
 
 
 
 
 
 
 
 
267ad08
779489a
 
 
 
 
 
 
 
 
 
633b888
779489a
 
 
9a5c160
779489a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import pandas as pd
import numpy as np
import gradio as gr

from langchain_community.document_loaders import TextLoader
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import CharacterTextSplitter
from langchain_chroma import Chroma

# =======================
# Load and preprocess books
# =======================
books = pd.read_csv("data/books_with_emotions.csv")

books["large_thumbnail"] = np.where(
    books["thumbnail"].notna(),
    books["thumbnail"] + "&fife=w800",
    "cover-not-found.jpg"
)

# =======================
# Prepare Chroma vector DB
# =======================
raw_documents = TextLoader("data/tagged_description.txt").load()
text_splitter = CharacterTextSplitter(chunk_size=1, chunk_overlap=0, separator="\n")
documents = text_splitter.split_documents(raw_documents)

db_books = Chroma.from_documents(documents, OpenAIEmbeddings())

# =======================
# Semantic retrieval logic
# =======================
def retrieve_semantic_recommendations(query: str,
                                      category: str = "All",
                                      tone: str = "All",
                                      initial_top_k: int = 50,
                                      final_top_k: int = 16) -> pd.DataFrame:
    """Truy xuất danh sách gợi ý dựa trên ngữ nghĩa, danh mục và cảm xúc."""

    recs = db_books.similarity_search(query, k=initial_top_k)
    books_list = [int(rec.page_content.strip('"').split()[0]) for rec in recs]

    # Lọc sách theo ISBN
    book_recs = books[books["isbn13"].isin(books_list)].head(initial_top_k)

    # Lọc theo category 
    if category != "All":
        book_recs = book_recs[book_recs["simple_categories"] == category]

    # Sắp xếp theo tone cảm xúc
    tone_sort_map = {
        "Happy": "joy",
        "Surprising": "surprise",
        "Angry": "anger",
        "Suspenseful": "fear",
        "Sad": "sadness"
    }
    if tone in tone_sort_map:
        book_recs = book_recs.sort_values(by=tone_sort_map[tone], ascending=False)

    return book_recs.head(final_top_k)

# =======================
# Recommendation formatting
# =======================
def recommend_books(query: str, category: str, tone: str):
    recommendations = retrieve_semantic_recommendations(query, category, tone)
    results = []

    for _, row in recommendations.iterrows():
        desc = row["description"].split()
        truncated_description = " ".join(desc[:30]) + "..."

        authors = row["authors"].split(";")
        if len(authors) == 1:
            authors_str = authors[0]
        elif len(authors) == 2:
            authors_str = f"{authors[0]} and {authors[1]}"
        else:
            authors_str = f"{', '.join(authors[:-1])}, and {authors[-1]}"

        caption = f"{row['title']} by {authors_str}: {truncated_description}"
        results.append((row["large_thumbnail"], caption))

    return results

# =======================
# Build Gradio dashboard
# =======================
categories = ["All"] + sorted(books["simple_categories"].unique())
tones = ["All", "Happy", "Surprising", "Angry", "Suspenseful", "Sad"]

with gr.Blocks() as dashboard:
    gr.Markdown("# 📚 Semantic Book Recommender")

    with gr.Row():
        user_query = gr.Textbox(
            label="Please enter a description of a book:",
            placeholder="e.g., A story about forgiveness"
        )
        category_dropdown = gr.Dropdown(
            choices=categories,
            label="Select a category:",
            value="All"
        )
        tone_dropdown = gr.Dropdown(
            choices=tones,
            label="Select an emotional tone:",
            value="All"
        )
        submit_button = gr.Button("Find recommendations")

    gr.Markdown("## 🧠 Recommendations")
    output = gr.Gallery(label="Recommended books", columns=8, rows=2)

    submit_button.click(fn=recommend_books,
                        inputs=[user_query, category_dropdown, tone_dropdown],
                        outputs=output)

if __name__ == "__main__":
    dashboard.launch()