Spaces:

rakeshjv2000
/

book-recommender

Running

App Files Files Community

rakeshjv2000 commited on 3 days ago

Commit

516e2ce

verified ·

1 Parent(s): c3027ad

Upload 6 files

Browse files

Files changed (7) hide show

.gitattributes +1 -0
app.py +181 -0
books.index +3 -0
books_with_emotions.csv +0 -0
cover-not-found.jpg +0 -0
id_map.npy +3 -0
requirements.txt +6 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+books.index filter=lfs diff=lfs merge=lfs -text

app.py ADDED Viewed

	@@ -0,0 +1,181 @@

+import os
+import numpy as np
+import pandas as pd
+import faiss
+import gradio as gr
+from dotenv import load_dotenv
+from huggingface_hub import InferenceClient
+load_dotenv()
+# -----------------------------
+# CONFIG
+# -----------------------------
+BOOKS_CSV = "books_with_emotions.csv"
+FAISS_INDEX_PATH = "books.index"
+ID_MAP_PATH = "id_map.npy"  # isbn13 list aligned with FAISS vectors
+HF_TOKEN = os.getenv("HF_TOKEN")
+HF_EMBEDDING_MODEL = os.getenv("HF_EMBEDDING_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
+if not HF_TOKEN:
+    # Works locally if you set env var / .env, and on Spaces if set as Secret.
+    raise RuntimeError("HF_TOKEN missing. Set in .env (local) or HF Spaces Secrets.")
+client = InferenceClient(provider="hf-inference", api_key=HF_TOKEN)
+# -----------------------------
+# LOAD DATA
+# -----------------------------
+books = pd.read_csv(BOOKS_CSV)
+books["isbn13"] = books["isbn13"].astype(str)
+# Keep your thumbnail behavior exactly
+books["large_thumbnail"] = books["thumbnail"] + "&fife=w800"
+books["large_thumbnail"] = np.where(
+    books["large_thumbnail"].isna(),
+    "cover-not-found.jpg",
+    books["large_thumbnail"],
+)
+# Load FAISS + id_map (must match index order)
+index = faiss.read_index(FAISS_INDEX_PATH)
+id_map = np.load(ID_MAP_PATH, allow_pickle=True).astype(str)
+# -----------------------------
+# EMBEDDING: HF InferenceClient
+# -----------------------------
+def hf_embed_query(text: str, retry=3, sleep_s=2.0) -> np.ndarray:
+    """
+    Returns shape (1, dim) float32 normalized for cosine similarity with IndexFlatIP.
+    """
+    last_err = None
+    for attempt in range(retry):
+        try:
+            out = client.feature_extraction(text, model=HF_EMBEDDING_MODEL)
+            arr = np.array(out, dtype=np.float32)
+            # If token-level: (tokens, dim) -> mean pool
+            if arr.ndim == 2:
+                v = arr.mean(axis=0)
+            elif arr.ndim == 1:
+                v = arr
+            else:
+                v = arr.reshape(-1, arr.shape[-1]).mean(axis=0)
+            v = v.reshape(1, -1).astype(np.float32)
+            faiss.normalize_L2(v)
+            return v
+        except Exception as e:
+            last_err = e
+            import time
+            time.sleep(sleep_s * (attempt + 1))
+    raise RuntimeError(f"HF query embedding failed after retries: {last_err}")
+# -----------------------------
+# RETRIEVAL + FILTERING (same logic)
+# -----------------------------
+def retrieve_semantic_recommendations(
+    query: str,
+    category: str = None,
+    tone: str = None,
+    initial_top_k: int = 50,
+    final_top_k: int = 16,
+) -> pd.DataFrame:
+    # 1) Vector search
+    qv = hf_embed_query(query)
+    scores, idx = index.search(qv, initial_top_k)
+    # 2) Map FAISS positions -> isbn13
+    retrieved_isbns = id_map[idx[0]]
+    retrieved_isbns = [str(x) for x in retrieved_isbns]
+    # 3) Preserve retrieval order using rank column
+    rank_df = pd.DataFrame({"isbn13": retrieved_isbns, "rank": range(len(retrieved_isbns))})
+    book_recs = (
+        books.merge(rank_df, on="isbn13", how="inner")
+             .sort_values("rank")
+             .head(initial_top_k)
+             .copy()
+    )
+    # 4) Category filter
+    if category and category != "All":
+        book_recs = book_recs[book_recs["simple_categories"] == category].head(final_top_k)
+    else:
+        book_recs = book_recs.head(final_top_k)
+    # 5) Tone sorting
+    # "All" -> no extra sorting
+    if tone == "Happy":
+        book_recs.sort_values(by="joy", ascending=False, inplace=True)
+    elif tone == "Surprising":
+        book_recs.sort_values(by="surprise", ascending=False, inplace=True)
+    elif tone == "Angry":
+        book_recs.sort_values(by="anger", ascending=False, inplace=True)
+    elif tone == "Suspenseful":
+        book_recs.sort_values(by="fear", ascending=False, inplace=True)
+    elif tone == "Sad":
+        book_recs.sort_values(by="sadness", ascending=False, inplace=True)
+    return book_recs
+# -----------------------------
+# OUTPUT FORMAT (same as yours)
+# -----------------------------
+def recommend_books(query: str, category: str, tone: str):
+    recommendations = retrieve_semantic_recommendations(query, category, tone)
+    results = []
+    for _, row in recommendations.iterrows():
+        description = str(row.get("description", ""))
+        truncated_desc_split = description.split()
+        truncated_description = " ".join(truncated_desc_split[:30]) + "..." if truncated_desc_split else ""
+        authors_raw = str(row.get("authors", ""))
+        authors_split = [a.strip() for a in authors_raw.split(";") if a.strip()]
+        if len(authors_split) == 2:
+            authors_str = f"{authors_split[0]} and {authors_split[1]}"
+        elif len(authors_split) > 2:
+            authors_str = f"{', '.join(authors_split[:-1])}, and {authors_split[-1]}"
+        else:
+            authors_str = authors_raw
+        caption = f"{row.get('title','')} by {authors_str}: {truncated_description}"
+        results.append((row["large_thumbnail"], caption))
+    return results
+# -----------------------------
+# UI (unchanged)
+# -----------------------------
+categories = ["All"] + sorted(books["simple_categories"].dropna().unique())
+tones = ["All"] + ["Happy", "Surprising", "Angry", "Suspenseful", "Sad"]
+with gr.Blocks(theme=gr.themes.Glass()) as dashboard:
+    gr.Markdown("# Semantic book recommender")
+    with gr.Row():
+        user_query = gr.Textbox(
+            label="Please enter a description of a book:",
+            placeholder="e.g., A story about forgiveness"
+        )
+        category_dropdown = gr.Dropdown(choices=categories, label="Select a category:", value="All")
+        tone_dropdown = gr.Dropdown(choices=tones, label="Select an emotional tone:", value="All")
+        submit_button = gr.Button("Find recommendations")
+    gr.Markdown("## Recommendations")
+    output = gr.Gallery(label="Recommended books", columns=8, rows=2)
+    submit_button.click(
+        fn=recommend_books,
+        inputs=[user_query, category_dropdown, tone_dropdown],
+        outputs=output
+    )
+if __name__ == "__main__":
+    dashboard.launch(server_name="0.0.0.0", server_port=7860)

books.index ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:fabd79cca3f7ae45ca2df4e649d107658bc91b0196b4a07753c8bc759a0c4c36
+size 7982637

books_with_emotions.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

cover-not-found.jpg ADDED Viewed

id_map.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:465089598bbd2651e8fd947f5738740fa2188e508319a12b487887a14b173986
+size 83449

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+gradio
+pandas
+numpy
+python-dotenv
+faiss-cpu
+huggingface_hub