# app_gradio.py import os import gradio as gr from typing import Dict, Any from pathlib import Path from data_sources import google_books, openlibrary_enrich, normalize from index_store_gradio import VectorStore from utils import jaccard # ----- locate catalog ----- BASE_DIR = Path(__file__).parent CATALOG = BASE_DIR / "data" / "catalog.jsonl" if not CATALOG.exists(): raise RuntimeError("Missing data/catalog.jsonl. Commit it to the repo or mount it.") # ----- load vector store once at startup ----- store = VectorStore(str(CATALOG)) def combo_tags(b: Dict[str, Any]) -> str: cats = (b.get("categories","") or "").strip() subs = (b.get("subjects","") or "").strip() return "; ".join([t for t in [cats, subs] if t]) def lookup_query_book(q: str) -> Dict[str, Any]: found = google_books(q, max_results=1) if not found: # Try Open Library fallback if Google misses (rare) # But simplest: just error out for now raise gr.Error("Book not found. Try a different title or add author.") qb = normalize(found[0], openlibrary_enrich(found[0]["title"])) return qb def recommend(title: str, k: int = 8): title = (title or "").strip() if not title: raise gr.Error("Type a book title.") qb = lookup_query_book(title) pairs = store.similar(qb, k=min(k, len(store.books))) md_lines = [] cards = [] # list of (image_url, caption) for the gallery for rank, (idx, sim) in enumerate(pairs, start=1): b = dict(store.books[idx]) b["similarity"] = float(sim) b["tag_overlap"] = jaccard(combo_tags(qb), combo_tags(b)) # Build markdown block with title, tags, and a real summary chips = "; ".join([t for t in [b.get('categories',''), b.get('subjects','')] if t]) desc = (b.get("description") or "").strip() desc = desc[:600] + ("…" if len(desc) > 600 else "") md_lines.append( f"**{rank}. {b.get('title','(untitled)')}** — {b.get('authors','')}\n\n" + (f"*{chips}*\n\n" if chips else "") + (f"{desc}\n\n" if desc else "_No description available._\n\n") + f"Similarity: **{b['similarity']:.2f}** · Tag overlap: **{b['tag_overlap']:.2f}**\n\n---\n" ) # Gallery still shows the cover; caption can be short cards.append((b.get("cover_url"), f"{rank}. {b.get('title','')}")) query_title_md = f"### Because you liked: **{qb.get('title','(unknown)')}**" query_auth_md = f"*{qb.get('authors','')}*" md_text = "\n".join(md_lines) return query_title_md, query_auth_md, md_text, cards STICKY_CSS = """ .sticky { position: sticky; top: 0; z-index: 100; background: white; padding: 8px 0 12px 0; border-bottom: 1px solid #eee; } """ with gr.Blocks(title="BookRec (Gradio)", css=STICKY_CSS) as demo: # Sticky header + inputs with gr.Column(elem_classes=["sticky"]): gr.Markdown("# BookRec — because you loved…") with gr.Row(): title_in = gr.Textbox(label="I liked…", placeholder="The Night Circus", scale=4) k_in = gr.Slider(3, 15, value=8, step=1, label="How many?", scale=1) with gr.Row(): btn = gr.Button("Find similar", variant="primary", scale=1) clear_btn = gr.Button("Clear", scale=1) # Results area query_title = gr.Markdown() query_auth = gr.Markdown() results_md = gr.Markdown() # full synopses gallery = gr.Gallery(label="Covers", columns=2, height=600, preview=True) # Wire actions btn.click(fn=recommend, inputs=[title_in, k_in], outputs=[query_title, query_auth, results_md, gallery]) clear_btn.click(fn=lambda: ("", "", "", []), inputs=None, outputs=[query_title, query_auth, results_md, gallery]) if __name__ == "__main__": demo.launch()