# app.py
import os
import time
import numpy as np
import pandas as pd
import faiss
import gradio as gr
from dotenv import load_dotenv
from huggingface_hub import InferenceClient
load_dotenv()
BOOKS_CSV = "books_with_emotions.csv"
FAISS_INDEX_PATH = "books.index"
ID_MAP_PATH = "id_map.npy"
HF_TOKEN = os.getenv("HF_TOKEN")
HF_EMBEDDING_MODEL = os.getenv("HF_EMBEDDING_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
if not HF_TOKEN:
raise RuntimeError("HF_TOKEN missing. Set in .env (local) or HF Spaces Secrets.")
client = InferenceClient(provider="hf-inference", api_key=HF_TOKEN)
# -----------------------------
# LOAD DATA
# -----------------------------
books = pd.read_csv(BOOKS_CSV)
books["isbn13"] = books["isbn13"].astype(str)
books["large_thumbnail"] = books["thumbnail"] + "&fife=w800"
books["large_thumbnail"] = np.where(
books["large_thumbnail"].isna(),
"cover-not-found.jpg",
books["large_thumbnail"],
)
index = faiss.read_index(FAISS_INDEX_PATH)
id_map = np.load(ID_MAP_PATH, allow_pickle=True).astype(str)
# -----------------------------
# EMBEDDING
# -----------------------------
def hf_embed_query(text: str, retry=3, sleep_s=2.0) -> np.ndarray:
last_err = None
for attempt in range(retry):
try:
out = client.feature_extraction(text, model=HF_EMBEDDING_MODEL)
arr = np.array(out, dtype=np.float32)
# token-level -> mean pool
if arr.ndim == 2:
v = arr.mean(axis=0)
elif arr.ndim == 1:
v = arr
else:
v = arr.reshape(-1, arr.shape[-1]).mean(axis=0)
v = v.reshape(1, -1).astype(np.float32)
faiss.normalize_L2(v)
return v
except Exception as e:
last_err = e
time.sleep(sleep_s * (attempt + 1))
raise RuntimeError(f"HF query embedding failed: {last_err}")
# -----------------------------
# RETRIEVAL + FILTERING
# -----------------------------
TONE_TO_COL = {
"Happy": "joy",
"Surprising": "surprise",
"Angry": "anger",
"Suspenseful": "fear",
"Sad": "sadness",
}
def retrieve_semantic_recommendations(query: str, category: str, tone: str, initial_top_k=80, final_top_k=16):
qv = hf_embed_query(query)
scores, idx = index.search(qv, initial_top_k)
retrieved_isbns = id_map[idx[0]].tolist()
retrieved_scores = scores[0].tolist()
rank_df = pd.DataFrame({
"isbn13": [str(x) for x in retrieved_isbns],
"rank": list(range(len(retrieved_isbns))),
"sim": retrieved_scores,
})
recs = (
books.merge(rank_df, on="isbn13", how="inner")
.sort_values("rank")
.copy()
)
if category and category != "All":
recs = recs[recs["simple_categories"] == category]
recs = recs.head(initial_top_k)
if tone and tone != "All":
col = TONE_TO_COL.get(tone)
if col in recs.columns:
recs = recs.sort_values(by=col, ascending=False)
return recs.head(final_top_k).copy()
# -----------------------------
# UI HELPERS
# -----------------------------
def format_authors(authors_raw: str) -> str:
authors_raw = str(authors_raw or "")
authors_split = [a.strip() for a in authors_raw.split(";") if a.strip()]
if len(authors_split) == 2:
return f"{authors_split[0]} and {authors_split[1]}"
if len(authors_split) > 2:
return f"{', '.join(authors_split[:-1])}, and {authors_split[-1]}"
return authors_raw
def truncate(text: str, n_words=28) -> str:
words = str(text or "").split()
return " ".join(words[:n_words]) + ("…" if len(words) > n_words else "")
def emotion_chips(row) -> str:
cols = ["joy", "surprise", "anger", "fear", "sadness"]
vals = []
for c in cols:
if c in row and pd.notna(row[c]):
try:
vals.append((c, float(row[c])))
except:
pass
vals.sort(key=lambda x: x[1], reverse=True)
top = vals[:2]
if not top:
return ""
return " ".join([f"{k}: {v:.2f}" for k, v in top])
def cards_html(df: pd.DataFrame) -> str:
if df is None or df.empty:
return "
No results found. Try a different query or set filters to All.
"
cards = []
for _, row in df.iterrows():
title = row.get("title", "")
authors = format_authors(row.get("authors", ""))
cat = row.get("simple_categories", "Unknown")
desc = truncate(row.get("description", ""), 28)
img = row.get("large_thumbnail", "cover-not-found.jpg")
sim = row.get("sim", None)
sim_str = f"{float(sim):.3f}" if sim is not None else "—"
chips = emotion_chips(row)
cards.append(f"""
{title}
{authors}
{cat}
Similarity: {sim_str}
{desc}
{chips}
""")
return f"{''.join(cards)}
"
# -----------------------------
# MAIN ACTION
# -----------------------------
def run_search(query, category, tone, top_k):
if not query or not query.strip():
return "Type a short description to get recommendations.
"
recs = retrieve_semantic_recommendations(
query=query.strip(),
category=category,
tone=tone,
initial_top_k=80,
final_top_k=int(top_k),
)
return cards_html(recs)
# -----------------------------
# FANCY CSS
# -----------------------------
CSS = """
:root { --radius: 18px; }
.wrap { max-width: 1200px; margin: 0 auto; }
.hero { padding: 18px 18px 6px 18px; }
.hero h1 { margin: 0; font-size: 28px; }
.hero p { margin: 6px 0 0 0; opacity: 0.85; }
.grid {
display: grid;
grid-template-columns: repeat(4, minmax(220px, 1fr));
gap: 14px;
padding: 8px 2px 2px 2px;
}
@media (max-width: 1100px) { .grid { grid-template-columns: repeat(3, minmax(220px, 1fr)); } }
@media (max-width: 850px) { .grid { grid-template-columns: repeat(2, minmax(220px, 1fr)); } }
@media (max-width: 520px) { .grid { grid-template-columns: 1fr; } }
.card {
border: 1px solid rgba(255,255,255,0.10);
border-radius: var(--radius);
overflow: hidden;
background: rgba(255,255,255,0.04);
box-shadow: 0 8px 30px rgba(0,0,0,0.18);
display: flex;
flex-direction: column;
min-height: 360px;
}
.cover { width: 100%; height: 220px; overflow: hidden; background: rgba(0,0,0,0.10); }
.cover img { width: 100%; height: 100%; object-fit: cover; display:block; }
.info { padding: 12px 12px 14px 12px; display:flex; flex-direction: column; gap: 6px; }
.title { font-weight: 700; font-size: 14.5px; line-height: 1.2; }
.authors { opacity: 0.85; font-size: 12.5px; }
.meta { display:flex; align-items:center; justify-content: space-between; gap: 8px; margin-top: 2px;}
.badge {
font-size: 11px;
padding: 4px 8px;
border-radius: 999px;
background: rgba(255,255,255,0.10);
border: 1px solid rgba(255,255,255,0.12);
}
.score { font-size: 11px; opacity: 0.85; }
.desc { font-size: 12px; opacity: 0.85; line-height: 1.35; margin-top: 2px; }
.chips { display:flex; flex-wrap: wrap; gap: 6px; margin-top: 6px;}
.chip {
font-size: 10.5px;
padding: 3px 8px;
border-radius: 999px;
background: rgba(255,255,255,0.07);
border: 1px solid rgba(255,255,255,0.10);
}
.empty {
padding: 14px;
border-radius: var(--radius);
border: 1px dashed rgba(255,255,255,0.18);
opacity: 0.9;
}
"""
categories = ["All"] + sorted(books["simple_categories"].dropna().unique().tolist())
tones = ["All", "Happy", "Surprising", "Angry", "Suspenseful", "Sad"]
EXAMPLES = [
("A cozy small-town mystery with a charming detective and light humor", "All", "All", 16),
("A deeply emotional story about healing after loss and finding hope", "All", "Sad", 16),
("A fast-paced sci-fi adventure with space travel and big surprises", "All", "Surprising", 16),
("A smart non-fiction book that explains psychology and human behavior", "All", "All", 16),
]
with gr.Blocks() as demo:
with gr.Column(elem_classes=["wrap"]):
gr.HTML("""
📚 Semantic Book Recommender
Describe a book vibe you want. Get semantic matches + optional category filter + emotion-based ranking.
""")
with gr.Row():
with gr.Column(scale=2):
query = gr.Textbox(
label="What kind of book are you looking for?",
placeholder="e.g., A suspenseful mystery with a clever twist and strong characters",
lines=2
)
with gr.Row():
ex1 = gr.Button("✨ Cozy mystery")
ex2 = gr.Button("💔 Emotional healing")
ex3 = gr.Button("🚀 Sci-fi adventure")
ex4 = gr.Button("🧠 Smart non-fiction")
with gr.Column(scale=1):
gr.Markdown("### Filters")
category = gr.Dropdown(choices=categories, value="All", label="Category")
tone = gr.Dropdown(choices=tones, value="All", label="Emotional tone")
top_k = gr.Slider(4, 24, value=16, step=4, label="Number of results")
btn = gr.Button("🔎 Find recommendations", variant="primary")
gr.Markdown("### Results")
results = gr.HTML("Search results will appear here.
")
# Normal search
btn.click(run_search, inputs=[query, category, tone, top_k], outputs=results)
# Example buttons: set query + optionally set tone, then run search
ex1.click(lambda: EXAMPLES[0][0], inputs=None, outputs=query)\
.then(lambda: "All", inputs=None, outputs=category)\
.then(lambda: "All", inputs=None, outputs=tone)\
.then(lambda: 16, inputs=None, outputs=top_k)\
.then(run_search, inputs=[query, category, tone, top_k], outputs=results)
ex2.click(lambda: EXAMPLES[1][0], inputs=None, outputs=query)\
.then(lambda: "All", inputs=None, outputs=category)\
.then(lambda: "Sad", inputs=None, outputs=tone)\
.then(lambda: 16, inputs=None, outputs=top_k)\
.then(run_search, inputs=[query, category, tone, top_k], outputs=results)
ex3.click(lambda: EXAMPLES[2][0], inputs=None, outputs=query)\
.then(lambda: "All", inputs=None, outputs=category)\
.then(lambda: "Surprising", inputs=None, outputs=tone)\
.then(lambda: 16, inputs=None, outputs=top_k)\
.then(run_search, inputs=[query, category, tone, top_k], outputs=results)
ex4.click(lambda: EXAMPLES[3][0], inputs=None, outputs=query)\
.then(lambda: "All", inputs=None, outputs=category)\
.then(lambda: "All", inputs=None, outputs=tone)\
.then(lambda: 16, inputs=None, outputs=top_k)\
.then(run_search, inputs=[query, category, tone, top_k], outputs=results)
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860, theme=gr.themes.Glass(), css=CSS, ssr_mode=False)