davanstrien HF Staff commited on
Commit
17a4402
·
verified ·
1 Parent(s): 588930d

Upload folder using huggingface_hub

Browse files
.dockerignore ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ __pycache__
2
+ *.pyc
3
+ .ruff_cache
4
+ .git
5
+ *.png
6
+ *.cast
.ruff_cache/.gitignore ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ # Automatically created by ruff.
2
+ *
.ruff_cache/0.14.14/6999155627130175505 ADDED
Binary file (95 Bytes). View file
 
.ruff_cache/CACHEDIR.TAG ADDED
@@ -0,0 +1 @@
 
 
1
+ Signature: 8a477f597d28d172789f06886806bc55
Dockerfile ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ # Install uv from official image
4
+ COPY --from=ghcr.io/astral-sh/uv:0.9.30 /uv /bin/uv
5
+
6
+ ENV HF_HUB_ENABLE_HF_TRANSFER=1
7
+
8
+ # Install dependencies — CPU-only torch via extra index
9
+ COPY requirements.in /tmp/requirements.in
10
+ RUN --mount=type=cache,target=/root/.cache/uv \
11
+ uv pip install --system \
12
+ --extra-index-url https://download.pytorch.org/whl/cpu \
13
+ -r /tmp/requirements.in
14
+
15
+ # Create non-root user (HF Spaces runs as user ID 1000)
16
+ RUN useradd -m -u 1000 user
17
+ USER user
18
+
19
+ ENV HOME=/home/user \
20
+ PATH=/home/user/.local/bin:$PATH
21
+
22
+ WORKDIR $HOME/app
23
+
24
+ # Copy application
25
+ COPY --chown=user . .
26
+
27
+ # HF Spaces expects port 7860
28
+ EXPOSE 7860
29
+
30
+ CMD ["python", "app.py", "--from-hub", "--host", "0.0.0.0", "--port", "7860"]
README.md CHANGED
@@ -1,10 +1,30 @@
1
  ---
2
- title: Bpl Card Catalog
3
- emoji: 😻
4
  colorFrom: gray
5
- colorTo: gray
6
  sdk: docker
7
  pinned: false
 
8
  ---
9
 
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: BPL Card Catalog Search
3
+ emoji: 🗃️
4
  colorFrom: gray
5
+ colorTo: yellow
6
  sdk: docker
7
  pinned: false
8
+ license: mit
9
  ---
10
 
11
+ # BPL Card Catalog Search
12
+
13
+ Search and browse ~453,000 digitized catalog cards from the Boston Public Library's Rare Books & Manuscripts Department.
14
+
15
+ Uses AI-powered OCR (small vision-language models) to make handwritten and typewritten catalog cards searchable for the first time.
16
+
17
+ ## Features
18
+
19
+ - **Semantic search** — find cards by meaning, not just keywords
20
+ - **Keyword search** — full-text search across OCR transcriptions
21
+ - **Compare OCR** — see old Tesseract vs new VLM OCR results side by side
22
+ - **Browse by drawer** — navigate the physical organization of the catalog
23
+ - **Image lightbox** — click any card image to view full-size
24
+
25
+ ## Stack
26
+
27
+ - FastAPI + HTMX + Jinja2
28
+ - LanceDB (vector + full-text search)
29
+ - sentence-transformers (BAAI/bge-base-en-v1.5)
30
+ - Dataset: Lance format on HF Hub
app.py ADDED
@@ -0,0 +1,496 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # /// script
2
+ # requires-python = ">=3.11"
3
+ # dependencies = [
4
+ # "lancedb>=0.17",
5
+ # "fastapi>=0.115",
6
+ # "uvicorn[standard]>=0.32",
7
+ # "jinja2>=3.1",
8
+ # "sentence-transformers",
9
+ # "pillow",
10
+ # "huggingface-hub",
11
+ # ]
12
+ # ///
13
+ """
14
+ BPL Card Catalog — OCR Search Comparison
15
+
16
+ FastAPI + HTMX app comparing old (Tesseract) and new (GLM-OCR) search
17
+ results side by side. Clean, Tufte-inspired design.
18
+
19
+ Usage (local dev):
20
+ uv run app.py --db-path ../bpl-lance-db
21
+
22
+ Usage (HF Spaces / Hub dataset):
23
+ uv run app.py --from-hub davanstrien/bpl-card-catalog-lance
24
+ """
25
+
26
+ from __future__ import annotations
27
+
28
+ import argparse
29
+ import io
30
+ import os
31
+ import random
32
+ from pathlib import Path
33
+
34
+ import lancedb
35
+ import uvicorn
36
+ from fastapi import FastAPI, Request
37
+ from fastapi.responses import HTMLResponse, StreamingResponse
38
+ from fastapi.staticfiles import StaticFiles
39
+ from fastapi.templating import Jinja2Templates
40
+ from huggingface_hub import snapshot_download
41
+ from PIL import Image
42
+ from sentence_transformers import SentenceTransformer
43
+
44
+ DEFAULT_HUB_REPO = "davanstrien/bpl-card-catalog-lance"
45
+ DB_PATH = "../bpl-lance-db"
46
+ TABLE_NAME = "cards"
47
+ EMBEDDING_MODEL = "BAAI/bge-base-en-v1.5"
48
+ SPOTLIGHT_COUNT = 3
49
+
50
+ OLD_OCR_LABEL = "Tesseract"
51
+ NEW_OCR_LABEL = "VLM OCR"
52
+
53
+ APP_DIR = Path(__file__).parent
54
+ TEMPLATES_DIR = APP_DIR / "templates"
55
+ STATIC_DIR = APP_DIR / "static"
56
+
57
+ EXAMPLE_QUERIES = [
58
+ "abolitionism",
59
+ "Civil War letters",
60
+ "Shakespeare plays",
61
+ "Boston history",
62
+ "illuminated manuscripts",
63
+ "African American history",
64
+ "French literature",
65
+ "music composition",
66
+ "botanical illustrations",
67
+ "theater history",
68
+ ]
69
+
70
+ SELECT_COLS = [
71
+ "drawer_id",
72
+ "card_number",
73
+ "text",
74
+ "markdown",
75
+ "source_url",
76
+ "image",
77
+ ]
78
+
79
+
80
+ def truncate(text: str, n: int = 800) -> str:
81
+ if not text:
82
+ return "(empty)"
83
+ return text[:n] + ("\u2026" if len(text) > n else "")
84
+
85
+
86
+ def parse_drawer_id(drawer_id: str) -> tuple[str, str]:
87
+ """'145-great-britain-acts' -> ('145', 'Great Britain Acts')"""
88
+ parts = drawer_id.split("-", 1)
89
+ num = parts[0]
90
+ label = (
91
+ parts[1].replace("-", " ").replace(".", " ").strip().title()
92
+ if len(parts) > 1
93
+ else ""
94
+ )
95
+ return num, label
96
+
97
+
98
+ def create_app(db_path: str = DB_PATH) -> FastAPI:
99
+ app = FastAPI(title="BPL Card Catalog — OCR Search Comparison")
100
+ app.mount("/static", StaticFiles(directory=str(STATIC_DIR)), name="static")
101
+ templates = Jinja2Templates(directory=str(TEMPLATES_DIR))
102
+
103
+ db = lancedb.connect(db_path)
104
+ table = db.open_table(TABLE_NAME)
105
+ model = SentenceTransformer(EMBEDDING_MODEL)
106
+ total_cards = table.count_rows()
107
+
108
+ # -- Build drawer index at startup --
109
+ all_rows = table.search().select(["drawer_id"]).limit(total_cards).to_list()
110
+ drawer_counts: dict[str, int] = {}
111
+ for row in all_rows:
112
+ did = row["drawer_id"]
113
+ drawer_counts[did] = drawer_counts.get(did, 0) + 1
114
+
115
+ drawer_list: list[dict] = []
116
+ for did, count in drawer_counts.items():
117
+ num, label = parse_drawer_id(did)
118
+ drawer_list.append(
119
+ {"drawer_id": did, "drawer_num": num, "drawer_label": label, "count": count}
120
+ )
121
+ drawer_list.sort(
122
+ key=lambda d: (
123
+ int(d["drawer_num"]) if d["drawer_num"].isdigit() else 9999,
124
+ d["drawer_id"],
125
+ )
126
+ )
127
+ known_drawer_ids = {d["drawer_id"] for d in drawer_list}
128
+
129
+ # -- Image cache (row_idx -> JPEG bytes) --
130
+ image_cache: dict[int, bytes] = {}
131
+
132
+ def _get_image_bytes(row_idx: int) -> bytes | None:
133
+ if row_idx in image_cache:
134
+ return image_cache[row_idx]
135
+ rows = (
136
+ table.search()
137
+ .where(f"_rowid = {row_idx}")
138
+ .select(["image"])
139
+ .limit(1)
140
+ .to_list()
141
+ )
142
+ if not rows:
143
+ return None
144
+ image_cache[row_idx] = rows[0]["image"]
145
+ return rows[0]["image"]
146
+
147
+ # -- Search functions --
148
+
149
+ def search_old_vector(query: str, limit: int) -> list[dict]:
150
+ q_vec = model.encode(query, normalize_embeddings=True).tolist()
151
+ return (
152
+ table.search(q_vec, vector_column_name="old_ocr_embedding")
153
+ .select(SELECT_COLS)
154
+ .limit(limit)
155
+ .to_list()
156
+ )
157
+
158
+ def search_new_vector(query: str, limit: int) -> list[dict]:
159
+ q_vec = model.encode(query, normalize_embeddings=True).tolist()
160
+ return (
161
+ table.search(q_vec, vector_column_name="new_ocr_embedding")
162
+ .select(SELECT_COLS)
163
+ .limit(limit)
164
+ .to_list()
165
+ )
166
+
167
+ def search_old_fts(query: str, limit: int) -> list[dict]:
168
+ return (
169
+ table.search(query, query_type="fts", fts_columns="text")
170
+ .select(SELECT_COLS)
171
+ .limit(limit)
172
+ .to_list()
173
+ )
174
+
175
+ def search_new_fts(query: str, limit: int) -> list[dict]:
176
+ return (
177
+ table.search(query, query_type="fts", fts_columns="markdown")
178
+ .select(SELECT_COLS)
179
+ .limit(limit)
180
+ .to_list()
181
+ )
182
+
183
+ def format_results(
184
+ results: list[dict],
185
+ ocr_field: str,
186
+ other_field: str,
187
+ ocr_label: str,
188
+ compare_label: str,
189
+ ) -> list[dict]:
190
+ formatted = []
191
+ for i, row in enumerate(results):
192
+ score = (
193
+ row.get("_distance") or row.get("_score") or row.get("_relevance_score")
194
+ )
195
+ # Cache image bytes using a simple incrementing key
196
+ row_idx = len(image_cache)
197
+ if isinstance(row.get("image"), bytes):
198
+ image_cache[row_idx] = row["image"]
199
+
200
+ drawer_id = row.get("drawer_id", "?")
201
+ drawer_num, drawer_label = parse_drawer_id(drawer_id)
202
+
203
+ formatted.append(
204
+ {
205
+ "rank": i + 1,
206
+ "row_idx": row_idx,
207
+ "drawer_id": drawer_id,
208
+ "drawer_num": drawer_num,
209
+ "drawer_label": drawer_label,
210
+ "card_number": row.get("card_number", "?"),
211
+ "ocr_text": truncate(row.get(ocr_field, ""), 800),
212
+ "other_ocr": truncate(row.get(other_field, ""), 800),
213
+ "ocr_label": ocr_label,
214
+ "compare_label": compare_label,
215
+ "score": f"{score:.4f}" if score is not None else "",
216
+ "source_url": row.get("source_url", ""),
217
+ }
218
+ )
219
+ return formatted
220
+
221
+ # -- Routes --
222
+
223
+ @app.get("/", response_class=HTMLResponse)
224
+ async def index(request: Request):
225
+ return templates.TemplateResponse(
226
+ request,
227
+ "index.html",
228
+ {
229
+ "total_cards": f"{total_cards:,}",
230
+ "examples": EXAMPLE_QUERIES,
231
+ "query": "",
232
+ "mode": "vector",
233
+ "limit": 10,
234
+ "old_ocr_label": OLD_OCR_LABEL,
235
+ "new_ocr_label": NEW_OCR_LABEL,
236
+ },
237
+ )
238
+
239
+ @app.get("/search", response_class=HTMLResponse)
240
+ async def search(
241
+ request: Request,
242
+ query: str = "",
243
+ mode: str = "vector",
244
+ limit: int = 5,
245
+ ):
246
+ limit = max(1, min(20, limit))
247
+
248
+ if not query.strip():
249
+ return templates.TemplateResponse(
250
+ request,
251
+ "results.html",
252
+ {"query": "", "total_cards": f"{total_cards:,}"},
253
+ )
254
+
255
+ if mode == "fts":
256
+ old_raw = search_old_fts(query, limit)
257
+ new_raw = search_new_fts(query, limit)
258
+ else:
259
+ old_raw = search_old_vector(query, limit)
260
+ new_raw = search_new_vector(query, limit)
261
+
262
+ old_label = f"Old OCR ({OLD_OCR_LABEL})"
263
+ new_label = f"New OCR ({NEW_OCR_LABEL})"
264
+
265
+ old_results = format_results(
266
+ old_raw,
267
+ "text",
268
+ "markdown",
269
+ old_label,
270
+ new_label,
271
+ )
272
+ new_results = format_results(
273
+ new_raw,
274
+ "markdown",
275
+ "text",
276
+ new_label,
277
+ old_label,
278
+ )
279
+
280
+ return templates.TemplateResponse(
281
+ request,
282
+ "results.html",
283
+ {
284
+ "query": query,
285
+ "mode": mode,
286
+ "old_results": old_results,
287
+ "new_results": new_results,
288
+ "total_cards": f"{total_cards:,}",
289
+ "old_ocr_label": OLD_OCR_LABEL,
290
+ "new_ocr_label": NEW_OCR_LABEL,
291
+ },
292
+ )
293
+
294
+ @app.get("/search-single", response_class=HTMLResponse)
295
+ async def search_single(
296
+ request: Request,
297
+ query: str = "",
298
+ mode: str = "vector",
299
+ limit: int = 10,
300
+ ):
301
+ limit = max(1, min(20, limit))
302
+
303
+ if not query.strip():
304
+ return templates.TemplateResponse(
305
+ request,
306
+ "results-search.html",
307
+ {"query": "", "total_cards": f"{total_cards:,}"},
308
+ )
309
+
310
+ if mode == "fts":
311
+ raw = search_new_fts(query, limit)
312
+ else:
313
+ raw = search_new_vector(query, limit)
314
+
315
+ new_label = f"New OCR ({NEW_OCR_LABEL})"
316
+ old_label = f"Old OCR ({OLD_OCR_LABEL})"
317
+ results = format_results(raw, "markdown", "text", new_label, old_label)
318
+
319
+ return templates.TemplateResponse(
320
+ request,
321
+ "results-search.html",
322
+ {
323
+ "query": query,
324
+ "mode": mode,
325
+ "results": results,
326
+ "total_cards": f"{total_cards:,}",
327
+ },
328
+ )
329
+
330
+ @app.get("/random-cards", response_class=HTMLResponse)
331
+ async def random_cards(request: Request):
332
+ indices = random.sample(
333
+ range(total_cards), min(SPOTLIGHT_COUNT, total_cards)
334
+ )
335
+ cards = []
336
+ for idx in indices:
337
+ rows = (
338
+ table.search().select(SELECT_COLS).limit(1).offset(idx).to_list()
339
+ )
340
+ if not rows:
341
+ continue
342
+ row = rows[0]
343
+ row_idx = len(image_cache)
344
+ if isinstance(row.get("image"), bytes):
345
+ image_cache[row_idx] = row["image"]
346
+ drawer_id = row.get("drawer_id", "?")
347
+ drawer_num, drawer_label = parse_drawer_id(drawer_id)
348
+ cards.append(
349
+ {
350
+ "row_idx": row_idx,
351
+ "drawer_id": drawer_id,
352
+ "drawer_num": drawer_num,
353
+ "drawer_label": drawer_label,
354
+ "card_number": row.get("card_number", "?"),
355
+ "ocr_text": truncate(row.get("markdown", ""), 200),
356
+ "source_url": row.get("source_url", ""),
357
+ }
358
+ )
359
+ return templates.TemplateResponse(
360
+ request, "spotlight.html", {"cards": cards}
361
+ )
362
+
363
+ @app.get("/drawers", response_class=HTMLResponse)
364
+ async def drawers_index(request: Request):
365
+ return templates.TemplateResponse(
366
+ request,
367
+ "drawers.html",
368
+ {
369
+ "drawers": drawer_list,
370
+ "total_drawers": len(drawer_list),
371
+ "total_cards": f"{total_cards:,}",
372
+ },
373
+ )
374
+
375
+ @app.get("/drawer/{drawer_id}", response_class=HTMLResponse)
376
+ async def drawer_detail(request: Request, drawer_id: str):
377
+ # Validate against known drawer IDs to prevent injection
378
+ if drawer_id not in known_drawer_ids:
379
+ return HTMLResponse("Drawer not found", status_code=404)
380
+
381
+ rows = (
382
+ table.search()
383
+ .where(f"drawer_id = '{drawer_id}'", prefilter=True)
384
+ .select(SELECT_COLS)
385
+ .limit(2000)
386
+ .to_list()
387
+ )
388
+ rows.sort(key=lambda r: r.get("card_number", 0))
389
+
390
+ cards = []
391
+ for i, row in enumerate(rows):
392
+ row_idx = len(image_cache)
393
+ if isinstance(row.get("image"), bytes):
394
+ image_cache[row_idx] = row["image"]
395
+ cards.append(
396
+ {
397
+ "card_number": row.get("card_number", i),
398
+ "row_idx": row_idx,
399
+ "ocr_text": truncate(row.get("markdown", ""), 800),
400
+ "source_url": row.get("source_url", ""),
401
+ }
402
+ )
403
+
404
+ # Prev/next drawer navigation
405
+ idx = next(
406
+ (i for i, d in enumerate(drawer_list) if d["drawer_id"] == drawer_id), -1
407
+ )
408
+ prev_drawer = drawer_list[idx - 1] if idx > 0 else None
409
+ next_drawer = drawer_list[idx + 1] if idx < len(drawer_list) - 1 else None
410
+ drawer_num, drawer_label = parse_drawer_id(drawer_id)
411
+
412
+ return templates.TemplateResponse(
413
+ request,
414
+ "drawer.html",
415
+ {
416
+ "drawer_id": drawer_id,
417
+ "drawer_num": drawer_num,
418
+ "drawer_label": drawer_label,
419
+ "cards": cards,
420
+ "card_count": len(cards),
421
+ "prev_drawer": prev_drawer,
422
+ "next_drawer": next_drawer,
423
+ },
424
+ )
425
+
426
+ @app.get("/image/{row_idx}")
427
+ async def image(row_idx: int):
428
+ img_bytes = image_cache.get(row_idx)
429
+ if img_bytes is None:
430
+ return HTMLResponse("Image not found", status_code=404)
431
+ buf = io.BytesIO()
432
+ img = Image.open(io.BytesIO(img_bytes))
433
+ img.save(buf, format="JPEG", quality=85)
434
+ buf.seek(0)
435
+ return StreamingResponse(buf, media_type="image/jpeg")
436
+
437
+ return app
438
+
439
+
440
+ def resolve_db_path(args) -> str:
441
+ """Resolve database path from CLI args, env var, or HF Hub download."""
442
+ # Explicit local path takes priority
443
+ if args.db_path:
444
+ db_path = Path(args.db_path)
445
+ if not db_path.exists():
446
+ print(f"Database not found at {db_path}")
447
+ print("Run 'uv run bpl-lance-poc.py build' first.")
448
+ raise SystemExit(1)
449
+ return str(db_path)
450
+
451
+ # Download from HF Hub
452
+ repo_id = args.from_hub or os.environ.get("BPL_HUB_REPO", DEFAULT_HUB_REPO)
453
+ cache_base = os.environ.get("HF_HOME", "/tmp/hf_cache")
454
+ local_dir = f"{cache_base}/bpl-lance"
455
+ print(f"Downloading dataset from {repo_id} to {local_dir}...")
456
+ snapshot_download(repo_id, repo_type="dataset", local_dir=local_dir)
457
+ print("Download complete.")
458
+ return local_dir
459
+
460
+
461
+ def main():
462
+ parser = argparse.ArgumentParser(
463
+ description="BPL OCR search comparison (FastAPI + HTMX)"
464
+ )
465
+ source = parser.add_mutually_exclusive_group()
466
+ source.add_argument(
467
+ "--db-path",
468
+ default=None,
469
+ help="Path to local LanceDB directory (for local dev)",
470
+ )
471
+ source.add_argument(
472
+ "--from-hub",
473
+ nargs="?",
474
+ const=DEFAULT_HUB_REPO,
475
+ default=None,
476
+ help=f"Download Lance DB from HF Hub (default: {DEFAULT_HUB_REPO})",
477
+ )
478
+ parser.add_argument("--host", default="127.0.0.1")
479
+ parser.add_argument("--port", type=int, default=8000)
480
+ args = parser.parse_args()
481
+
482
+ # If neither --db-path nor --from-hub, try default local path
483
+ if args.db_path is None and args.from_hub is None:
484
+ default = Path(DB_PATH)
485
+ if default.exists():
486
+ args.db_path = DB_PATH
487
+ else:
488
+ args.from_hub = DEFAULT_HUB_REPO
489
+
490
+ db_path = resolve_db_path(args)
491
+ app = create_app(db_path)
492
+ uvicorn.run(app, host=args.host, port=args.port)
493
+
494
+
495
+ if __name__ == "__main__":
496
+ main()
requirements.in ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ lancedb>=0.17
2
+ fastapi>=0.115
3
+ uvicorn[standard]>=0.32
4
+ jinja2>=3.1
5
+ sentence-transformers
6
+ pillow
7
+ huggingface-hub
static/style.css ADDED
@@ -0,0 +1,679 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* BPL Card Catalog OCR Comparison — Tufte-inspired minimal styles */
2
+
3
+ *,
4
+ *::before,
5
+ *::after {
6
+ box-sizing: border-box;
7
+ }
8
+
9
+ body {
10
+ font-family: system-ui, -apple-system, sans-serif;
11
+ color: #333;
12
+ background: #fff;
13
+ margin: 0;
14
+ padding: 0;
15
+ line-height: 1.5;
16
+ }
17
+
18
+ .container {
19
+ max-width: 1200px;
20
+ margin: 0 auto;
21
+ padding: 0 1.5rem 3rem;
22
+ }
23
+
24
+ /* Header */
25
+ header {
26
+ border-bottom: 1px solid #ddd;
27
+ padding: 0.75rem 0;
28
+ margin-bottom: 0.5rem;
29
+ display: flex;
30
+ align-items: baseline;
31
+ gap: 2rem;
32
+ }
33
+
34
+ header .brand {
35
+ font-weight: 600;
36
+ color: #333;
37
+ font-size: 0.9rem;
38
+ letter-spacing: 0.02em;
39
+ }
40
+
41
+ header .brand {
42
+ text-decoration: none;
43
+ color: inherit;
44
+ }
45
+
46
+ header .subtitle {
47
+ font-size: 0.8rem;
48
+ color: #999;
49
+ }
50
+
51
+ header .header-link {
52
+ font-size: 0.8rem;
53
+ color: #999;
54
+ text-decoration: none;
55
+ margin-left: auto;
56
+ }
57
+
58
+ header .header-link:hover {
59
+ color: #333;
60
+ }
61
+
62
+ /* Tab bar */
63
+ .tab-bar {
64
+ display: flex;
65
+ gap: 0;
66
+ margin-bottom: 0.75rem;
67
+ border-bottom: 1px solid #ddd;
68
+ }
69
+
70
+ .tab {
71
+ font-size: 0.85rem;
72
+ font-weight: 500;
73
+ color: #999;
74
+ background: none;
75
+ border: none;
76
+ border-bottom: 2px solid transparent;
77
+ padding: 0.6rem 1.2rem;
78
+ cursor: pointer;
79
+ transition: color 0.15s, border-color 0.15s;
80
+ }
81
+
82
+ .tab:hover {
83
+ color: #666;
84
+ }
85
+
86
+ .tab.active {
87
+ color: #333;
88
+ border-bottom-color: #333;
89
+ }
90
+
91
+ /* Page heading */
92
+ .page-intro {
93
+ margin-bottom: 0.75rem;
94
+ }
95
+
96
+ .page-intro h1 {
97
+ font-size: 1.2rem;
98
+ font-weight: 600;
99
+ margin: 0 0 0.25rem;
100
+ color: #333;
101
+ }
102
+
103
+ .page-intro p {
104
+ font-size: 0.85rem;
105
+ color: #666;
106
+ margin: 0;
107
+ max-width: 720px;
108
+ line-height: 1.5;
109
+ }
110
+
111
+ .page-intro .card-count {
112
+ font-variant-numeric: tabular-nums;
113
+ font-weight: 500;
114
+ color: #333;
115
+ }
116
+
117
+ /* Search form */
118
+ .search-form {
119
+ display: flex;
120
+ gap: 0.75rem;
121
+ align-items: flex-end;
122
+ margin-bottom: 1rem;
123
+ flex-wrap: wrap;
124
+ }
125
+
126
+ .search-form .field {
127
+ display: flex;
128
+ flex-direction: column;
129
+ gap: 0.25rem;
130
+ }
131
+
132
+ .search-form .field.query-field {
133
+ flex: 1;
134
+ min-width: 200px;
135
+ }
136
+
137
+ .search-form label {
138
+ font-size: 0.75rem;
139
+ color: #999;
140
+ letter-spacing: 0.02em;
141
+ text-transform: uppercase;
142
+ }
143
+
144
+ .search-form input[type="text"] {
145
+ font-size: 0.9rem;
146
+ padding: 0.45rem 0.75rem;
147
+ border: 1px solid #ddd;
148
+ border-radius: 4px;
149
+ background: #fff;
150
+ color: #333;
151
+ outline: none;
152
+ transition: border-color 0.15s;
153
+ }
154
+
155
+ .search-form input[type="text"]:focus {
156
+ border-color: #999;
157
+ }
158
+
159
+ .search-form select {
160
+ font-size: 0.85rem;
161
+ padding: 0.45rem 0.5rem;
162
+ border: 1px solid #ddd;
163
+ border-radius: 4px;
164
+ background: #fff;
165
+ color: #333;
166
+ }
167
+
168
+ .search-form input[type="number"] {
169
+ font-size: 0.85rem;
170
+ padding: 0.45rem 0.5rem;
171
+ border: 1px solid #ddd;
172
+ border-radius: 4px;
173
+ width: 4rem;
174
+ text-align: center;
175
+ }
176
+
177
+ .search-form button {
178
+ font-size: 0.85rem;
179
+ padding: 0.45rem 1.2rem;
180
+ border: 1px solid #333;
181
+ border-radius: 4px;
182
+ background: #333;
183
+ color: #fff;
184
+ cursor: pointer;
185
+ transition: background 0.15s;
186
+ white-space: nowrap;
187
+ }
188
+
189
+ .search-form button:hover {
190
+ background: #555;
191
+ }
192
+
193
+ /* Example queries */
194
+ .examples {
195
+ margin-bottom: 1rem;
196
+ display: flex;
197
+ flex-wrap: wrap;
198
+ gap: 0.4rem;
199
+ align-items: center;
200
+ }
201
+
202
+ .examples .examples-label {
203
+ font-size: 0.75rem;
204
+ color: #999;
205
+ margin-right: 0.25rem;
206
+ }
207
+
208
+ .examples a {
209
+ font-size: 0.8rem;
210
+ color: #666;
211
+ text-decoration: none;
212
+ padding: 0.2rem 0.6rem;
213
+ border: 1px solid #e0e0e0;
214
+ border-radius: 3px;
215
+ transition: border-color 0.15s, color 0.15s;
216
+ }
217
+
218
+ .examples a:hover {
219
+ color: #333;
220
+ border-color: #999;
221
+ }
222
+
223
+ /* Results header */
224
+ .results-header {
225
+ padding: 0.5rem 0;
226
+ border-bottom: 2px solid #333;
227
+ margin-bottom: 1rem;
228
+ display: flex;
229
+ justify-content: space-between;
230
+ align-items: baseline;
231
+ }
232
+
233
+ .results-header .query-display {
234
+ font-size: 0.95rem;
235
+ color: #666;
236
+ }
237
+
238
+ .results-header .query-text {
239
+ color: #333;
240
+ font-weight: 600;
241
+ }
242
+
243
+ .results-header .results-meta {
244
+ font-size: 0.8rem;
245
+ color: #999;
246
+ }
247
+
248
+ /* Two-column grid (Compare tab) */
249
+ .results-grid {
250
+ display: grid;
251
+ grid-template-columns: 1fr 1fr;
252
+ gap: 2.5rem;
253
+ }
254
+
255
+ /* Single-column list (Search tab) */
256
+ .results-list {
257
+ }
258
+
259
+ /* Column headers */
260
+ .column-header {
261
+ font-size: 0.85rem;
262
+ font-weight: 600;
263
+ color: #666;
264
+ margin: 0 0 1rem;
265
+ padding-bottom: 0.5rem;
266
+ border-bottom: 1px solid #ddd;
267
+ letter-spacing: 0.02em;
268
+ }
269
+
270
+ .column-header .model-name {
271
+ font-weight: 400;
272
+ color: #999;
273
+ }
274
+
275
+ .old-header { color: #8b6914; border-bottom-color: #d4b86a; }
276
+ .new-header { color: #2d6a4f; border-bottom-color: #74c69d; }
277
+
278
+ /* Result cards */
279
+ .result-card {
280
+ margin-bottom: 1rem;
281
+ border: 1px solid #e8e8e8;
282
+ border-radius: 4px;
283
+ overflow: hidden;
284
+ transition: box-shadow 0.15s ease;
285
+ }
286
+
287
+ .result-card:hover {
288
+ box-shadow: 0 2px 12px rgba(0, 0, 0, 0.06);
289
+ }
290
+
291
+ /* Search card: side-by-side image + text */
292
+ .search-card-layout {
293
+ display: grid;
294
+ grid-template-columns: 320px 1fr;
295
+ }
296
+
297
+ .search-card .card-image {
298
+ max-height: 300px;
299
+ }
300
+
301
+ .card-rank-bar {
302
+ display: flex;
303
+ justify-content: space-between;
304
+ align-items: center;
305
+ padding: 0.35rem 0.75rem;
306
+ background: #f8f8f8;
307
+ border-bottom: 1px solid #eee;
308
+ font-size: 0.75rem;
309
+ color: #bbb;
310
+ }
311
+
312
+ .card-rank {
313
+ font-weight: 600;
314
+ color: #999;
315
+ }
316
+
317
+ .card-score {
318
+ font-variant-numeric: tabular-nums;
319
+ }
320
+
321
+ .card-image-wrap {
322
+ position: relative;
323
+ background: #f5f5f5;
324
+ overflow: hidden;
325
+ }
326
+
327
+ .card-image {
328
+ width: 100%;
329
+ display: block;
330
+ object-fit: contain;
331
+ background: #f5f5f5;
332
+ max-height: 360px;
333
+ transition: transform 0.3s ease;
334
+ }
335
+
336
+ .rotate-btn {
337
+ position: absolute;
338
+ top: 0.4rem;
339
+ right: 0.4rem;
340
+ width: 1.6rem;
341
+ height: 1.6rem;
342
+ border: none;
343
+ border-radius: 3px;
344
+ background: rgba(0, 0, 0, 0.45);
345
+ color: #fff;
346
+ font-size: 0.9rem;
347
+ line-height: 1;
348
+ cursor: pointer;
349
+ opacity: 0;
350
+ transition: opacity 0.15s;
351
+ display: flex;
352
+ align-items: center;
353
+ justify-content: center;
354
+ }
355
+
356
+ .card-image-wrap:hover .rotate-btn {
357
+ opacity: 1;
358
+ }
359
+
360
+ .card-body {
361
+ padding: 0.75rem 1rem;
362
+ }
363
+
364
+ .card-meta {
365
+ font-size: 0.75rem;
366
+ color: #bbb;
367
+ margin-bottom: 0.5rem;
368
+ }
369
+
370
+ .card-meta a {
371
+ color: #999;
372
+ text-decoration: none;
373
+ }
374
+
375
+ .card-meta a:hover {
376
+ color: #333;
377
+ text-decoration: underline;
378
+ }
379
+
380
+ .drawer-label {
381
+ font-weight: 500;
382
+ color: #999;
383
+ text-decoration: none;
384
+ }
385
+
386
+ a.drawer-label:hover {
387
+ color: #333;
388
+ text-decoration: underline;
389
+ }
390
+
391
+ .drawer-subject {
392
+ color: #bbb;
393
+ }
394
+
395
+ /* OCR text blocks */
396
+ .ocr-label {
397
+ font-size: 0.65rem;
398
+ text-transform: uppercase;
399
+ letter-spacing: 0.04em;
400
+ color: #bbb;
401
+ margin-bottom: 0.2rem;
402
+ }
403
+
404
+ .ocr-text {
405
+ font-family: "SF Mono", Menlo, Consolas, monospace;
406
+ font-size: 0.8rem;
407
+ line-height: 1.6;
408
+ white-space: pre-wrap;
409
+ word-break: break-word;
410
+ max-height: 12rem;
411
+ overflow-y: auto;
412
+ padding: 0.25rem 0;
413
+ color: #444;
414
+ }
415
+
416
+ /* Collapsible comparison */
417
+ details {
418
+ margin-top: 0.5rem;
419
+ }
420
+
421
+ details summary {
422
+ font-size: 0.75rem;
423
+ color: #bbb;
424
+ cursor: pointer;
425
+ user-select: none;
426
+ }
427
+
428
+ details summary:hover {
429
+ color: #666;
430
+ }
431
+
432
+ details .ocr-text {
433
+ margin-top: 0.35rem;
434
+ padding: 0.5rem;
435
+ background: #f8faf8;
436
+ border-radius: 3px;
437
+ }
438
+
439
+ /* No results */
440
+ .no-results {
441
+ text-align: center;
442
+ color: #999;
443
+ padding: 3rem 0;
444
+ font-size: 0.9rem;
445
+ }
446
+
447
+ /* Empty state */
448
+ .empty-state {
449
+ text-align: center;
450
+ padding: 4rem 2rem;
451
+ }
452
+
453
+ .empty-state p {
454
+ color: #999;
455
+ margin: 0.25rem 0;
456
+ }
457
+
458
+ .empty-state .prompt {
459
+ font-size: 1.1rem;
460
+ color: #666;
461
+ }
462
+
463
+ /* HTMX loading indicator */
464
+ .htmx-indicator {
465
+ opacity: 0;
466
+ transition: opacity 200ms ease-in;
467
+ }
468
+
469
+ .htmx-request .htmx-indicator,
470
+ .htmx-request.htmx-indicator {
471
+ opacity: 1;
472
+ }
473
+
474
+ .loading-indicator {
475
+ color: #999;
476
+ font-size: 0.8rem;
477
+ margin-left: 0.5rem;
478
+ }
479
+
480
+ /* Footer */
481
+ .site-footer {
482
+ border-top: 1px solid #ddd;
483
+ margin-top: 3rem;
484
+ padding: 1.5rem 0;
485
+ background: #fafafa;
486
+ }
487
+
488
+ .footer-content {
489
+ max-width: 1200px;
490
+ margin: 0 auto;
491
+ padding: 0 1.5rem;
492
+ display: flex;
493
+ gap: 2rem;
494
+ flex-wrap: wrap;
495
+ }
496
+
497
+ .footer-about,
498
+ .footer-note {
499
+ flex: 1;
500
+ min-width: 280px;
501
+ }
502
+
503
+ .footer-content p {
504
+ font-size: 0.78rem;
505
+ color: #999;
506
+ line-height: 1.6;
507
+ margin: 0 0 0.5rem;
508
+ }
509
+
510
+ .footer-content a {
511
+ color: #888;
512
+ text-decoration: none;
513
+ }
514
+
515
+ .footer-content a:hover {
516
+ color: #333;
517
+ text-decoration: underline;
518
+ }
519
+
520
+ .footer-note p {
521
+ font-style: italic;
522
+ color: #aaa;
523
+ }
524
+
525
+ /* Spotlight (random cards on landing) */
526
+ .spotlight { padding: 2rem 0; }
527
+ .spotlight-header {
528
+ font-size: 0.8rem; color: #999; text-transform: uppercase;
529
+ letter-spacing: 0.04em; margin-bottom: 1rem;
530
+ }
531
+ .spotlight-grid {
532
+ display: grid; grid-template-columns: repeat(3, 1fr); gap: 1.5rem;
533
+ }
534
+ .spotlight-card {
535
+ border: 1px solid #e8e8e8; border-radius: 4px; overflow: hidden;
536
+ }
537
+ .spotlight-image { width: 100%; display: block; max-height: 240px; object-fit: contain; background: #f5f5f5; }
538
+ .spotlight-meta { font-size: 0.72rem; color: #bbb; padding: 0.5rem 0.75rem 0.25rem; }
539
+ .spotlight-text {
540
+ font-family: "SF Mono", Menlo, Consolas, monospace;
541
+ font-size: 0.75rem; color: #666; padding: 0.25rem 0.75rem 0.75rem;
542
+ line-height: 1.5; max-height: 4.5rem; overflow: hidden;
543
+ }
544
+
545
+ /* Drawer table */
546
+ .drawer-table {
547
+ width: 100%;
548
+ border-collapse: collapse;
549
+ font-size: 0.85rem;
550
+ }
551
+
552
+ .drawer-table th {
553
+ text-align: left;
554
+ font-size: 0.75rem;
555
+ color: #999;
556
+ text-transform: uppercase;
557
+ letter-spacing: 0.03em;
558
+ padding: 0.5rem 0.75rem;
559
+ border-bottom: 2px solid #ddd;
560
+ }
561
+
562
+ .drawer-table td {
563
+ padding: 0.45rem 0.75rem;
564
+ border-bottom: 1px solid #f0f0f0;
565
+ }
566
+
567
+ .drawer-table tbody tr:hover {
568
+ background: #f8f8f8;
569
+ }
570
+
571
+ .drawer-table a {
572
+ color: #333;
573
+ text-decoration: none;
574
+ }
575
+
576
+ .drawer-table a:hover {
577
+ text-decoration: underline;
578
+ }
579
+
580
+ .dt-num { width: 5rem; font-variant-numeric: tabular-nums; }
581
+ .dt-count { width: 5rem; text-align: right; font-variant-numeric: tabular-nums; color: #999; }
582
+ .dt-count:first-child { text-align: right; }
583
+
584
+ /* Drawer navigation */
585
+ .drawer-nav {
586
+ display: flex;
587
+ justify-content: space-between;
588
+ align-items: center;
589
+ padding: 0.5rem 0;
590
+ margin-bottom: 0.5rem;
591
+ font-size: 0.85rem;
592
+ }
593
+
594
+ .drawer-nav a {
595
+ color: #666;
596
+ text-decoration: none;
597
+ }
598
+
599
+ .drawer-nav a:hover {
600
+ color: #333;
601
+ }
602
+
603
+ .drawer-nav-center {
604
+ display: flex;
605
+ gap: 1rem;
606
+ align-items: center;
607
+ }
608
+
609
+ .drawer-nav .drawer-nav-index {
610
+ font-weight: 500;
611
+ }
612
+
613
+ .drawer-nav .drawer-nav-back {
614
+ font-size: 0.8rem;
615
+ }
616
+
617
+ .drawer-nav .disabled {
618
+ color: #ddd;
619
+ }
620
+
621
+ /* Lightbox */
622
+ .lightbox {
623
+ display: none;
624
+ position: fixed;
625
+ inset: 0;
626
+ z-index: 1000;
627
+ background: rgba(0, 0, 0, 0.85);
628
+ cursor: pointer;
629
+ align-items: center;
630
+ justify-content: center;
631
+ }
632
+
633
+ .lightbox.active {
634
+ display: flex;
635
+ }
636
+
637
+ .lightbox img {
638
+ max-width: 90vw;
639
+ max-height: 90vh;
640
+ object-fit: contain;
641
+ border-radius: 4px;
642
+ box-shadow: 0 4px 40px rgba(0, 0, 0, 0.5);
643
+ }
644
+
645
+ .card-image, .spotlight-image {
646
+ cursor: zoom-in;
647
+ }
648
+
649
+ /* Responsive */
650
+ @media (max-width: 768px) {
651
+ .results-grid {
652
+ grid-template-columns: 1fr;
653
+ gap: 2rem;
654
+ }
655
+
656
+ .search-card-layout {
657
+ grid-template-columns: 1fr;
658
+ }
659
+
660
+ .container {
661
+ padding: 0 1rem 2rem;
662
+ }
663
+
664
+ .search-form {
665
+ flex-direction: column;
666
+ align-items: stretch;
667
+ }
668
+
669
+ .search-form .field.query-field {
670
+ min-width: auto;
671
+ }
672
+
673
+ .spotlight-grid { grid-template-columns: 1fr; }
674
+
675
+ .footer-content {
676
+ flex-direction: column;
677
+ gap: 1rem;
678
+ }
679
+ }
templates/base.html ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="utf-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1">
6
+ <title>{% block title %}BPL Card Catalog Search{% endblock %}</title>
7
+ <link rel="stylesheet" href="/static/style.css">
8
+ <script src="https://unpkg.com/htmx.org@2.0.4"></script>
9
+ <script>
10
+ function rotateImage(btn) {
11
+ var img = btn.previousElementSibling;
12
+ var cur = parseInt(img.dataset.rotation || "0", 10);
13
+ cur = (cur + 90) % 360;
14
+ img.dataset.rotation = cur;
15
+ img.style.transform = "rotate(" + cur + "deg)";
16
+ }
17
+ </script>
18
+ </head>
19
+ <body>
20
+ <div class="lightbox" id="lightbox" onclick="closeLightbox()">
21
+ <img id="lightbox-img" src="" alt="">
22
+ </div>
23
+ <script>
24
+ function openLightbox(src, rotation) {
25
+ var lb = document.getElementById('lightbox');
26
+ var img = document.getElementById('lightbox-img');
27
+ img.src = src;
28
+ img.style.transform = rotation ? 'rotate(' + rotation + 'deg)' : '';
29
+ lb.classList.add('active');
30
+ }
31
+ function closeLightbox() {
32
+ document.getElementById('lightbox').classList.remove('active');
33
+ }
34
+ document.addEventListener('keydown', function(e) {
35
+ if (e.key === 'Escape') closeLightbox();
36
+ });
37
+ // Make all card images clickable — works for dynamically loaded HTMX content too
38
+ document.addEventListener('click', function(e) {
39
+ var img = e.target.closest('.card-image, .spotlight-image');
40
+ if (img) {
41
+ e.preventDefault();
42
+ openLightbox(img.src, img.dataset.rotation || 0);
43
+ }
44
+ });
45
+ </script>
46
+ <div class="container">
47
+ <header>
48
+ <a href="/" class="brand">BPL Card Catalog</a>
49
+ <span class="subtitle">Rare Books &amp; Manuscripts</span>
50
+ <a href="/drawers" class="header-link">Browse</a>
51
+ </header>
52
+ {% block content %}{% endblock %}
53
+ </div>
54
+ <footer class="site-footer">
55
+ <div class="footer-content">
56
+ <div class="footer-about">
57
+ <p>Cards from the <a href="https://guides.bpl.org/rarebooks">Rare Books &amp; Manuscripts Department</a>,
58
+ Boston Public Library, 700 Boylston St, Boston.</p>
59
+ <p>Reading Room: Wed&ndash;Fri 9:30&ndash;4:30 · <a href="mailto:specialcollections@bpl.org">specialcollections@bpl.org</a></p>
60
+ </div>
61
+ <div class="footer-note">
62
+ <p>These catalog records describe historical materials and may contain
63
+ outdated or harmful language reflecting the time periods in which they were created.
64
+ <a href="https://guides.bpl.org/rarebooks/book-search">Read more</a></p>
65
+ </div>
66
+ </div>
67
+ </footer>
68
+ </body>
69
+ </html>
templates/card.html ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <div class="result-card">
2
+ <div class="card-rank-bar">
3
+ <span class="card-rank">#{{ r.rank }}</span>
4
+ {% if r.score %}<span class="card-score">{{ r.score }}</span>{% endif %}
5
+ </div>
6
+ <div class="card-image-wrap">
7
+ <img class="card-image" src="/image/{{ r.row_idx }}" alt="Catalog card #{{ r.rank }}" loading="lazy">
8
+ <button class="rotate-btn" onclick="rotateImage(this)" title="Rotate image 90°">&#x21bb;</button>
9
+ </div>
10
+ <div class="card-body">
11
+ <div class="card-meta">
12
+ <a href="/drawer/{{ r.drawer_id }}" class="drawer-label">Drawer {{ r.drawer_num }}</a>{% if r.drawer_label %} <span class="drawer-subject">· {{ r.drawer_label }}</span>{% endif %}
13
+ · card {{ r.card_number }}
14
+ {% if r.source_url %}· <a href="{{ r.source_url }}" target="_blank" rel="noopener">View on Internet Archive</a>{% endif %}
15
+ </div>
16
+ <div class="ocr-label">{{ r.ocr_label }}</div>
17
+ <div class="ocr-text">{{ r.ocr_text }}</div>
18
+ <details>
19
+ <summary>Compare with {{ r.compare_label }}</summary>
20
+ <div class="ocr-text">{{ r.other_ocr }}</div>
21
+ </details>
22
+ </div>
23
+ </div>
templates/drawer.html ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {% extends "base.html" %}
2
+ {% block title %}Drawer {{ drawer_num }} · {{ drawer_label }} — BPL Card Catalog{% endblock %}
3
+ {% block content %}
4
+ <div class="drawer-nav">
5
+ {% if prev_drawer %}
6
+ <a href="/drawer/{{ prev_drawer.drawer_id }}">&larr; Drawer {{ prev_drawer.drawer_num }}</a>
7
+ {% else %}
8
+ <span class="disabled">&larr;</span>
9
+ {% endif %}
10
+ <span class="drawer-nav-center">
11
+ <a href="/drawers" class="drawer-nav-index">All Drawers</a>
12
+ <a href="/" id="back-to-search" class="drawer-nav-back" style="display:none">&larr; Back to search</a>
13
+ </span>
14
+ {% if next_drawer %}
15
+ <a href="/drawer/{{ next_drawer.drawer_id }}">Drawer {{ next_drawer.drawer_num }} &rarr;</a>
16
+ {% else %}
17
+ <span class="disabled">&rarr;</span>
18
+ {% endif %}
19
+ </div>
20
+ <script>
21
+ (function() {
22
+ var ref = document.referrer;
23
+ if (ref && new URL(ref).pathname === '/' && new URL(ref).search) {
24
+ var link = document.getElementById('back-to-search');
25
+ link.href = ref;
26
+ link.style.display = '';
27
+ }
28
+ })();
29
+ </script>
30
+
31
+ <div class="page-intro">
32
+ <h1>Drawer {{ drawer_num }}{% if drawer_label %} · {{ drawer_label }}{% endif %}</h1>
33
+ <p>{{ card_count }} card{{ 's' if card_count != 1 else '' }} in this drawer.</p>
34
+ </div>
35
+
36
+ <div class="results-list">
37
+ {% for card in cards %}
38
+ <div class="result-card search-card">
39
+ <div class="card-rank-bar">
40
+ <span class="card-rank">Card {{ card.card_number }}</span>
41
+ </div>
42
+ <div class="search-card-layout">
43
+ <div class="card-image-wrap">
44
+ <img class="card-image" src="/image/{{ card.row_idx }}" alt="Card {{ card.card_number }}" loading="lazy">
45
+ <button class="rotate-btn" onclick="rotateImage(this)" title="Rotate image 90°">&#x21bb;</button>
46
+ </div>
47
+ <div class="card-body">
48
+ <div class="card-meta">
49
+ <span class="drawer-label">Drawer {{ drawer_num }}</span>{% if drawer_label %} <span class="drawer-subject">· {{ drawer_label }}</span>{% endif %}
50
+ · card {{ card.card_number }}
51
+ {% if card.source_url %}· <a href="{{ card.source_url }}" target="_blank" rel="noopener">View on Internet Archive</a>{% endif %}
52
+ </div>
53
+ <div class="ocr-text">{{ card.ocr_text }}</div>
54
+ </div>
55
+ </div>
56
+ </div>
57
+ {% endfor %}
58
+ {% if not cards %}
59
+ <div class="no-results">No cards found in this drawer.</div>
60
+ {% endif %}
61
+ </div>
62
+ {% endblock %}
templates/drawers.html ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {% extends "base.html" %}
2
+ {% block title %}Browse Drawers — BPL Card Catalog{% endblock %}
3
+ {% block content %}
4
+ <div class="page-intro">
5
+ <h1>Browse by Drawer</h1>
6
+ <p>
7
+ {{ total_drawers }} drawers containing <span class="card-count">{{ total_cards }}</span> catalog cards.
8
+ Each drawer groups cards by subject, author, or title range.
9
+ </p>
10
+ </div>
11
+
12
+ <table class="drawer-table">
13
+ <thead>
14
+ <tr>
15
+ <th class="dt-num">Drawer</th>
16
+ <th class="dt-label">Label</th>
17
+ <th class="dt-count">Cards</th>
18
+ </tr>
19
+ </thead>
20
+ <tbody>
21
+ {% for d in drawers %}
22
+ <tr onclick="location.href='/drawer/{{ d.drawer_id }}'" style="cursor:pointer">
23
+ <td class="dt-num">{{ d.drawer_num }}</td>
24
+ <td class="dt-label"><a href="/drawer/{{ d.drawer_id }}">{{ d.drawer_label or '—' }}</a></td>
25
+ <td class="dt-count">{{ d.count }}</td>
26
+ </tr>
27
+ {% endfor %}
28
+ </tbody>
29
+ </table>
30
+ {% endblock %}
templates/index.html ADDED
@@ -0,0 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {% extends "base.html" %}
2
+ {% block content %}
3
+ <div class="tab-bar">
4
+ <button class="tab active" data-tab="search" onclick="switchTab('search')">Search</button>
5
+ <button class="tab" data-tab="compare" onclick="switchTab('compare')">Compare OCR</button>
6
+ </div>
7
+
8
+ <div class="page-intro" id="intro-search">
9
+ <h1>Search the BPL Card Catalog</h1>
10
+ <p>
11
+ Search <span class="card-count">{{ total_cards }}</span> digitized catalog cards
12
+ from the Boston Public Library's Rare Books &amp; Manuscripts Department.
13
+ The BPL's card catalogs &mdash; over 410,000 cards across 385 drawers &mdash;
14
+ remain the most complete index of the department's holdings.
15
+ This tool uses AI-powered OCR to make them searchable.
16
+ </p>
17
+ </div>
18
+
19
+ <div class="page-intro" id="intro-compare" style="display:none">
20
+ <h1>Compare OCR Search Quality</h1>
21
+ <p>
22
+ Same query runs against old {{ old_ocr_label }} OCR and new {{ new_ocr_label }}
23
+ transcriptions. See how better OCR improves search results side by side.
24
+ </p>
25
+ </div>
26
+
27
+ <form class="search-form"
28
+ id="search-form"
29
+ hx-get="/search-single"
30
+ hx-target="#results"
31
+ hx-indicator=".loading-indicator">
32
+ <input type="hidden" id="tab-field" name="tab" value="search">
33
+ <div class="field query-field">
34
+ <label for="query">Query</label>
35
+ <input type="text" id="query" name="query" value="{{ query or '' }}"
36
+ placeholder="abolitionism, Civil War letters, Shakespeare…"
37
+ autocomplete="off">
38
+ </div>
39
+ <div class="field">
40
+ <label for="mode">Mode</label>
41
+ <select id="mode" name="mode">
42
+ <option value="vector" {% if mode != "fts" %}selected{% endif %}>Semantic</option>
43
+ <option value="fts" {% if mode == "fts" %}selected{% endif %}>Keyword</option>
44
+ </select>
45
+ </div>
46
+ <div class="field">
47
+ <label for="limit">Results</label>
48
+ <input type="number" id="limit" name="limit" value="{{ limit or 10 }}" min="1" max="20">
49
+ </div>
50
+ <button type="submit" id="submit-btn">Search</button>
51
+ <span class="loading-indicator htmx-indicator">searching…</span>
52
+ </form>
53
+
54
+ <div class="examples" id="examples">
55
+ <span class="examples-label">Try:</span>
56
+ {% for q in examples %}
57
+ <a href="#" class="example-link"
58
+ hx-get="/search-single?query={{ q | urlencode }}&mode={{ mode or 'vector' }}&limit={{ limit or 10 }}"
59
+ hx-target="#results"
60
+ hx-indicator=".loading-indicator"
61
+ onclick="document.getElementById('query').value='{{ q }}'">{{ q }}</a>
62
+ {% endfor %}
63
+ </div>
64
+
65
+ <div id="results">
66
+ <div id="spotlight" hx-get="/random-cards" hx-trigger="load" hx-swap="innerHTML"></div>
67
+ </div>
68
+
69
+ <script>
70
+ function switchTab(tab) {
71
+ // Update tab buttons
72
+ document.querySelectorAll('.tab').forEach(function(btn) {
73
+ btn.classList.toggle('active', btn.dataset.tab === tab);
74
+ });
75
+
76
+ // Swap intro text
77
+ document.getElementById('intro-search').style.display = tab === 'search' ? '' : 'none';
78
+ document.getElementById('intro-compare').style.display = tab === 'compare' ? '' : 'none';
79
+
80
+ // Update hidden field
81
+ document.getElementById('tab-field').value = tab;
82
+
83
+ // Update form endpoint + button text + default limit
84
+ var form = document.getElementById('search-form');
85
+ var btn = document.getElementById('submit-btn');
86
+ var limitInput = document.getElementById('limit');
87
+ if (tab === 'search') {
88
+ form.setAttribute('hx-get', '/search-single');
89
+ btn.textContent = 'Search';
90
+ if (limitInput.value === '5') limitInput.value = '10';
91
+ } else {
92
+ form.setAttribute('hx-get', '/search');
93
+ btn.textContent = 'Compare';
94
+ if (limitInput.value === '10') limitInput.value = '5';
95
+ }
96
+
97
+ // Update example links
98
+ var endpoint = tab === 'search' ? '/search-single' : '/search';
99
+ var limit = limitInput.value;
100
+ var mode = document.getElementById('mode').value;
101
+ document.querySelectorAll('.example-link').forEach(function(link) {
102
+ var query = link.textContent;
103
+ link.setAttribute('hx-get', endpoint + '?query=' + encodeURIComponent(query) + '&mode=' + mode + '&limit=' + limit);
104
+ });
105
+
106
+ // Re-process HTMX attributes after dynamic changes
107
+ htmx.process(form);
108
+ htmx.process(document.getElementById('examples'));
109
+
110
+ // Re-run query if results already showing and query is non-empty
111
+ var queryInput = document.getElementById('query');
112
+ if (queryInput.value.trim()) {
113
+ htmx.trigger(form, 'submit');
114
+ }
115
+ }
116
+
117
+ // After HTMX swaps results, update URL with current search state
118
+ document.addEventListener('htmx:afterSwap', function(e) {
119
+ if (e.detail.target.id === 'results') {
120
+ var q = document.getElementById('query').value;
121
+ var mode = document.getElementById('mode').value;
122
+ var limit = document.getElementById('limit').value;
123
+ var tab = document.getElementById('tab-field').value;
124
+ var params = new URLSearchParams();
125
+ if (q) params.set('q', q);
126
+ if (mode !== 'vector') params.set('mode', mode);
127
+ if (tab !== 'search') params.set('tab', tab);
128
+ if ((tab === 'search' && limit !== '10') || (tab === 'compare' && limit !== '5'))
129
+ params.set('limit', limit);
130
+ var url = params.toString() ? '/?' + params.toString() : '/';
131
+ history.replaceState(null, '', url);
132
+ }
133
+ });
134
+
135
+ // On page load, read URL params and auto-search
136
+ (function() {
137
+ var params = new URLSearchParams(window.location.search);
138
+ var q = params.get('q');
139
+ if (!q) return;
140
+
141
+ var mode = params.get('mode') || 'vector';
142
+ var tab = params.get('tab') || 'search';
143
+ var limit = params.get('limit') || (tab === 'search' ? '10' : '5');
144
+
145
+ document.getElementById('query').value = q;
146
+ document.getElementById('mode').value = mode;
147
+ document.getElementById('limit').value = limit;
148
+
149
+ if (tab !== 'search') switchTab(tab);
150
+
151
+ // Trigger search after a short delay to let HTMX initialize
152
+ setTimeout(function() {
153
+ htmx.trigger(document.getElementById('search-form'), 'submit');
154
+ }, 100);
155
+ })();
156
+ </script>
157
+ {% endblock %}
templates/results-search.html ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {% if query %}
2
+ <div class="results-header">
3
+ <div class="query-display">
4
+ Results for <span class="query-text">"{{ query }}"</span>
5
+ </div>
6
+ <div class="results-meta">
7
+ {{ "Semantic" if mode != "fts" else "Keyword" }}
8
+ · {{ results | length }} results
9
+ </div>
10
+ </div>
11
+
12
+ <div class="results-list">
13
+ {% for r in results %}
14
+ <div class="result-card search-card">
15
+ <div class="card-rank-bar">
16
+ <span class="card-rank">#{{ r.rank }}</span>
17
+ {% if r.score %}<span class="card-score">{{ r.score }}</span>{% endif %}
18
+ </div>
19
+ <div class="search-card-layout">
20
+ <div class="card-image-wrap">
21
+ <img class="card-image" src="/image/{{ r.row_idx }}" alt="Catalog card #{{ r.rank }}" loading="lazy">
22
+ <button class="rotate-btn" onclick="rotateImage(this)" title="Rotate image 90°">&#x21bb;</button>
23
+ </div>
24
+ <div class="card-body">
25
+ <div class="card-meta">
26
+ <a href="/drawer/{{ r.drawer_id }}" class="drawer-label">Drawer {{ r.drawer_num }}</a>{% if r.drawer_label %} <span class="drawer-subject">· {{ r.drawer_label }}</span>{% endif %}
27
+ · card {{ r.card_number }}
28
+ {% if r.source_url %}· <a href="{{ r.source_url }}" target="_blank" rel="noopener">View on Internet Archive</a>{% endif %}
29
+ </div>
30
+ <div class="ocr-text">{{ r.ocr_text }}</div>
31
+ </div>
32
+ </div>
33
+ </div>
34
+ {% endfor %}
35
+ {% if not results %}
36
+ <div class="no-results">No results found</div>
37
+ {% endif %}
38
+ </div>
39
+
40
+ {% else %}
41
+ <div class="empty-state">
42
+ <p class="prompt">Search {{ total_cards }} digitized catalog cards</p>
43
+ <p>Enter a query to search the card catalog.</p>
44
+ </div>
45
+ {% endif %}
templates/results.html ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {% if query %}
2
+ <div class="results-header">
3
+ <div class="query-display">
4
+ Results for <span class="query-text">"{{ query }}"</span>
5
+ </div>
6
+ <div class="results-meta">
7
+ {{ "Semantic" if mode != "fts" else "Keyword" }}
8
+ · {{ old_ocr_label or "Tesseract" }}: {{ old_results | length }}
9
+ · {{ new_ocr_label or "VLM OCR" }}: {{ new_results | length }}
10
+ </div>
11
+ </div>
12
+
13
+ <div class="results-grid">
14
+ <div class="results-column">
15
+ <h3 class="column-header old-header">Old OCR <span class="model-name">({{ old_ocr_label or "Tesseract" }})</span></h3>
16
+ {% for r in old_results %}
17
+ {% include "card.html" %}
18
+ {% endfor %}
19
+ {% if not old_results %}
20
+ <div class="no-results">No results found</div>
21
+ {% endif %}
22
+ </div>
23
+
24
+ <div class="results-column">
25
+ <h3 class="column-header new-header">New OCR <span class="model-name">({{ new_ocr_label or "VLM OCR" }})</span></h3>
26
+ {% for r in new_results %}
27
+ {% include "card.html" %}
28
+ {% endfor %}
29
+ {% if not new_results %}
30
+ <div class="no-results">No results found</div>
31
+ {% endif %}
32
+ </div>
33
+ </div>
34
+
35
+ {% else %}
36
+ <div class="empty-state">
37
+ <p class="prompt">Search {{ total_cards }} digitized catalog cards</p>
38
+ <p>Enter a query above to compare old and new OCR search results side by side.</p>
39
+ </div>
40
+ {% endif %}
templates/spotlight.html ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <div class="spotlight">
2
+ <div class="spotlight-header">From the collection</div>
3
+ <div class="spotlight-grid">
4
+ {% for card in cards %}
5
+ <div class="spotlight-card">
6
+ <div class="spotlight-image-wrap">
7
+ <img class="spotlight-image" src="/image/{{ card.row_idx }}" alt="Catalog card" loading="lazy">
8
+ </div>
9
+ <div class="spotlight-meta">
10
+ <a href="/drawer/{{ card.drawer_id }}" class="drawer-label">Drawer {{ card.drawer_num }}</a>{% if card.drawer_label %} · {{ card.drawer_label }}{% endif %}
11
+ · card {{ card.card_number }}
12
+ </div>
13
+ <div class="spotlight-text">{{ card.ocr_text }}</div>
14
+ </div>
15
+ {% endfor %}
16
+ </div>
17
+ </div>