EdwardConstantine commited on
Commit
00b8591
Β·
verified Β·
1 Parent(s): b92aa9a

Upload 6 files

Browse files
README.md CHANGED
@@ -1,13 +1,30 @@
1
- ---
2
- title: Book Recommender
3
- emoji: 🐨
4
- colorFrom: pink
5
- colorTo: blue
6
- sdk: gradio
7
- sdk_version: 6.0.1
8
- app_file: app.py
9
- pinned: false
10
- license: mit
11
- ---
12
-
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Semantic Book Recommender
3
+ emoji: πŸ“š
4
+ colorFrom: blue
5
+ colorTo: purple
6
+ sdk: gradio
7
+ sdk_version: 3.43.0
8
+ app_file: app.py
9
+ pinned: false
10
+ license: mit
11
+ ---
12
+
13
+ # πŸ“š Semantic Book Recommender
14
+
15
+ An AI-powered book recommendation system that uses semantic search to find books based on your description.
16
+
17
+ ## Features
18
+
19
+ - πŸ” Semantic search using sentence transformers
20
+ - 🎭 Filter by emotional tone (Happy, Sad, Suspenseful, etc.)
21
+ - πŸ“‚ Filter by category
22
+ - πŸ–ΌοΈ Visual book gallery with cover images
23
+
24
+ ## How to Use
25
+
26
+ 1. Describe the type of book you're looking for
27
+ 2. Optionally select a category and emotional tone
28
+ 3. Click "Find Books" to see recommendations
29
+
30
+ Built with LangChain, ChromaDB, and Gradio.
app.py ADDED
@@ -0,0 +1,178 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import pandas as pd
3
+ import numpy as np
4
+ import gc
5
+
6
+ # Environment variables (set in HF Spaces settings)
7
+ GROQ_API_KEY = os.getenv("GROQ_API_KEY")
8
+ HUGGINGFACEHUB_API_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN")
9
+
10
+ # -----------------------------
11
+ # LANGCHAIN IMPORTS
12
+ # -----------------------------
13
+ from langchain_huggingface import HuggingFaceEmbeddings
14
+ from langchain_community.document_loaders import TextLoader
15
+ from langchain_text_splitters import CharacterTextSplitter
16
+ from langchain_community.vectorstores import Chroma
17
+
18
+ # Gradio
19
+ import gradio as gr
20
+
21
+ print("Loading book data...")
22
+ # -----------------------------
23
+ # LOAD BOOK DATA
24
+ # -----------------------------
25
+ books = pd.read_csv("books_with_emotions.csv")
26
+
27
+ books["large_thumbnail"] = books["thumbnail"] + "&fife=w800"
28
+ books["large_thumbnail"] = np.where(
29
+ books["large_thumbnail"].isna(),
30
+ "cover-not-found.jpg",
31
+ books["large_thumbnail"],
32
+ )
33
+
34
+ print("Loading documents...")
35
+ # -----------------------------
36
+ # LOAD DOCUMENTS FOR SEMANTIC INDEX
37
+ # -----------------------------
38
+ file_path = "tagged_description.txt"
39
+ loader = TextLoader(file_path, encoding="utf-8")
40
+ raw_documents = loader.load()
41
+ print(f"Loaded {len(raw_documents)} documents")
42
+
43
+ text_splitter = CharacterTextSplitter(
44
+ separator="\n",
45
+ chunk_size=1,
46
+ chunk_overlap=0
47
+ )
48
+
49
+ documents = text_splitter.split_documents(raw_documents)
50
+
51
+ del raw_documents, loader
52
+ gc.collect()
53
+
54
+ print("Initializing embeddings model...")
55
+ # -----------------------------
56
+ # CREATE VECTOR STORE
57
+ # -----------------------------
58
+ embeddings = HuggingFaceEmbeddings(
59
+ model_name="sentence-transformers/all-MiniLM-L6-v2",
60
+ model_kwargs={'device': 'cpu'},
61
+ encode_kwargs={'normalize_embeddings': True}
62
+ )
63
+
64
+ print("Creating vector database...")
65
+ db_books = Chroma.from_documents(
66
+ documents,
67
+ embedding=embeddings,
68
+ persist_directory="./chroma_db"
69
+ )
70
+
71
+ del documents, text_splitter
72
+ gc.collect()
73
+
74
+ print("Application ready!")
75
+
76
+ def retrieve_semantic_recommendations(
77
+ query: str,
78
+ category: str = None,
79
+ tone: str = None,
80
+ initial_top_k: int = 50,
81
+ final_top_k: int = 16,
82
+ ) -> pd.DataFrame:
83
+
84
+ recs = db_books.similarity_search(query, k=initial_top_k)
85
+ books_list = [int(rec.page_content.strip('"').split()[0]) for rec in recs]
86
+ book_recs = books[books["isbn13"].isin(books_list)].head(initial_top_k)
87
+
88
+ if category != "All":
89
+ book_recs = book_recs[book_recs["simple_categories"] == category].head(final_top_k)
90
+ else:
91
+ book_recs = book_recs.head(final_top_k)
92
+
93
+ if tone == "Happy":
94
+ book_recs.sort_values(by="joy", ascending=False, inplace=True)
95
+ elif tone == "Surprising":
96
+ book_recs.sort_values(by="surprise", ascending=False, inplace=True)
97
+ elif tone == "Angry":
98
+ book_recs.sort_values(by="anger", ascending=False, inplace=True)
99
+ elif tone == "Suspenseful":
100
+ book_recs.sort_values(by="fear", ascending=False, inplace=True)
101
+ elif tone == "Sad":
102
+ book_recs.sort_values(by="sadness", ascending=False, inplace=True)
103
+
104
+ return book_recs
105
+
106
+ def recommend_books(query: str, category: str, tone: str):
107
+ try:
108
+ recommendations = retrieve_semantic_recommendations(query, category, tone)
109
+ results = []
110
+
111
+ for _, row in recommendations.iterrows():
112
+ description = row["description"]
113
+ truncated_desc_split = description.split()
114
+ truncated_description = " ".join(truncated_desc_split[:30]) + "..."
115
+
116
+ authors_split = row["authors"].split(";")
117
+ if len(authors_split) == 2:
118
+ authors_str = f"{authors_split[0]} and {authors_split[1]}"
119
+ elif len(authors_split) > 2:
120
+ authors_str = f"{', '.join(authors_split[:-1])}, and {authors_split[-1]}"
121
+ else:
122
+ authors_str = row["authors"]
123
+
124
+ caption = f"{row['title']} by {authors_str}: {truncated_description}"
125
+ results.append((row["large_thumbnail"], caption))
126
+
127
+ gc.collect()
128
+ return results
129
+ except Exception as e:
130
+ print(f"Error: {e}")
131
+ return []
132
+
133
+ categories = ["All"] + sorted(books["simple_categories"].unique())
134
+ tones = ["All"] + ["Happy", "Surprising", "Angry", "Suspenseful", "Sad"]
135
+
136
+ with gr.Blocks(theme=gr.themes.Glass()) as dashboard:
137
+ gr.Markdown("# πŸ“š Semantic Book Recommender")
138
+ gr.Markdown("Find your next favorite book using AI-powered semantic search!")
139
+
140
+ with gr.Row():
141
+ user_query = gr.Textbox(
142
+ label="Describe the book you're looking for:",
143
+ placeholder="e.g., A story about forgiveness and redemption",
144
+ scale=2
145
+ )
146
+
147
+ with gr.Row():
148
+ category_dropdown = gr.Dropdown(
149
+ choices=categories,
150
+ label="Category:",
151
+ value="All",
152
+ scale=1
153
+ )
154
+ tone_dropdown = gr.Dropdown(
155
+ choices=tones,
156
+ label="Emotional Tone:",
157
+ value="All",
158
+ scale=1
159
+ )
160
+ submit_button = gr.Button("πŸ” Find Books", variant="primary", scale=1)
161
+
162
+ gr.Markdown("## πŸ“– Recommendations")
163
+ output = gr.Gallery(label="Recommended Books", columns=4, rows=4, height="auto")
164
+
165
+ submit_button.click(
166
+ fn=recommend_books,
167
+ inputs=[user_query, category_dropdown, tone_dropdown],
168
+ outputs=output
169
+ )
170
+
171
+ user_query.submit(
172
+ fn=recommend_books,
173
+ inputs=[user_query, category_dropdown, tone_dropdown],
174
+ outputs=output
175
+ )
176
+
177
+ if __name__ == "__main__":
178
+ dashboard.launch()
books_with_emotions.csv ADDED
The diff for this file is too large to render. See raw diff
 
cover-not-found.jpg ADDED
requirements.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ pandas==2.3.3
2
+ numpy==1.26.4
3
+ gradio==3.43.0
4
+ langchain-core==0.3.63
5
+ langchain-community==0.3.10
6
+ langchain-huggingface==0.2.0
7
+ langchain-text-splitters==0.3.8
8
+ chromadb==1.3.5
9
+ sentence-transformers==2.6.0
10
+ transformers==4.57.1
11
+ huggingface-hub==0.36.0
12
+ torch
13
+ torchvision
tagged_description.txt ADDED
The diff for this file is too large to render. See raw diff