Spaces:

rairo
/

dev-stroke

Sleeping

App Files Files Community

rairo commited on Sep 16, 2025

Commit

3bafcbc

verified ·

1 Parent(s): e96bd6d

Update main.py

Browse files

Files changed (1) hide show

main.py +274 -67

main.py CHANGED Viewed

@@ -1,8 +1,8 @@
 # app.py — Hidden Stroke (AI Noir Investigation) with Internet Archive ingestion
 # Flask + Firebase Realtime DB + Firebase Storage + Gemini (exact model names)
-# Runs on Hugging Face like your reference app (same envs & init flow)
-import os, io, uuid, json, time, hmac, hashlib, random, traceback, requests
 from datetime import datetime, timedelta, timezone
 from typing import Dict, Any, Tuple, List, Optional
@@ -19,7 +19,7 @@ from google import genai
 from google.genai import types
 # -----------------------------------------------------------------------------
-# 1) CONFIG & INIT  (env names EXACTLY as your reference code)
 # -----------------------------------------------------------------------------
 app = Flask(__name__)
 CORS(app)
@@ -71,8 +71,14 @@ LEADERBOARD_TOP_N = 50
 # --- Misc config ---
 GAME_SALT = os.environ.get("GAME_SALT", "dev-salt")  # for deterministic seeds / HMAC
-ADMIN_KEY = os.environ.get("ADMIN_KEY")  # optional for admin endpoints
-IA_USER_AGENT = os.environ.get("IA_USER_AGENT", "HiddenStrokeBot/1.0 (+https://reddit.com)")  # polite UA
 # -----------------------------------------------------------------------------
 # 2) UTILS
@@ -142,7 +148,6 @@ def http_get_bytes(url: str) -> bytes:
 def ia_advanced_search(query: str, rows: int, page: int) -> List[dict]:
     # Internet Archive Advanced Search (no key required)
-    # docs: /advancedsearch.php?q=...&rows=...&page=...&output=json
     url = "https://archive.org/advancedsearch.php"
     params = {"q": query, "rows": rows, "page": page, "output": "json"}
     data = http_get_json(url, params=params)
@@ -153,30 +158,23 @@ def ia_metadata(identifier: str) -> dict:
     return http_get_json(url)
 def ia_best_image_from_metadata(meta: dict) -> Optional[dict]:
-    # Pick the largest suitable image file from /metadata result
     files = meta.get("files", []) or []
-    best = None
-    best_pixels = -1
     for f in files:
-        name = f.get("name", "")
         fmt = (f.get("format") or "").lower()
         if any(x in fmt for x in ["jpeg", "jpg", "png", "tiff", "image"]):
-            # width/height sometimes present
             w = int(f.get("width") or 0)
             h = int(f.get("height") or 0)
             if w and h:
                 px = w * h
             else:
-                px = int(f.get("size") or 0)  # fallback by bytes
             if px > best_pixels:
-                best_pixels = px
-                best = f
     return best
 def ingest_ia_doc(doc: dict) -> Optional[dict]:
-    """
-    Given a doc from advancedsearch, fetch /metadata and store best image into ia_pool.
-    """
     identifier = doc.get("identifier")
     if not identifier:
         return None
@@ -185,11 +183,12 @@ def ingest_ia_doc(doc: dict) -> Optional[dict]:
     if not best:
         return None
-    title = (meta.get("metadata", {}) or {}).get("title", "") or doc.get("title", "")
-    date = (meta.get("metadata", {}) or {}).get("date", "") or doc.get("date", "")
-    creator = (meta.get("metadata", {}) or {}).get("creator", "") or doc.get("creator", "")
-    rights = (meta.get("metadata", {}) or {}).get("rights", "") or doc.get("rights", "")
-    licenseurl = (meta.get("metadata", {}) or {}).get("licenseurl", "") or doc.get("licenseurl", "")
     download_url = f"https://archive.org/download/{identifier}/{best['name']}"
     record = {
@@ -216,17 +215,15 @@ def choose_ia_item_for_case(case_id: str) -> Optional[dict]:
         return None
     identifiers = sorted(pool.keys())
     case_seed = seed_for_date(case_id)
-    idx = case_seed % len(identifiers)
-    ident = identifiers[idx]
     return pool[ident]
 def download_image_to_pil(url: str) -> Image.Image:
     data = http_get_bytes(url)
-    img = Image.open(io.BytesIO(data)).convert("RGB")
-    return img
 def crop_signature_macro(img: Image.Image, size: int = 512) -> Image.Image:
-    # simple lower-right macro crop (if smaller, clamp)
     w, h = img.size
     cw = min(size, w)
     ch = min(size, h)
@@ -235,27 +232,209 @@ def crop_signature_macro(img: Image.Image, size: int = 512) -> Image.Image:
     return img.crop((left, top, left + cw, top + ch))
 # -----------------------------------------------------------------------------
-# 3) CASE GENERATION (now uses IA for the authentic image)
 # -----------------------------------------------------------------------------
 def ensure_case_generated(case_id: str) -> Dict[str, Any]:
     existing_public = case_ref(case_id).child("public").get()
     if existing_public:
         return existing_public
-    # Make sure we have a minimum pool of cached images ready
     try:
-        ensure_minimum_ia_pool()
     except Exception as e:
         print("Bootstrap warning:", e)
-    # Ensure we have at least some IA records; if not, auto-ingest a default set (one page)
     pool = ia_pool_ref().get() or {}
     if not pool:
         try:
-            # Default query targets well-known museum collections with images
-            default_query = '(collection:(metropolitanmuseum OR smithsonian OR getty OR artic) AND mediatype:image)'
-            docs = ia_advanced_search(default_query, rows=100, page=1)
             for d in docs:
                 try:
                     ingest_ia_doc(d)
@@ -267,22 +446,18 @@ def ensure_case_generated(case_id: str) -> Dict[str, Any]:
     # Pick authentic from ia_pool deterministically
     ia_item = choose_ia_item_for_case(case_id)
     if not ia_item:
-        # absolute fallback (rare)
         raise RuntimeError("No IA items available. Ingest needed.")
     # Deterministic mode
     case_seed = seed_for_date(case_id)
     rng = random.Random(case_seed)
     mode = "knowledge" if (case_seed % 2 == 0) else "observation"
-    # Style label (flavor text only)
     style_period = "sourced from Internet Archive; museum catalog reproduction"
-    # Download authentic image
     source_url = ia_item.get("storage_url") or ia_item["download_url"]
     auth_img = download_image_to_pil(source_url)
     images_urls: List[str] = []
     signature_crops: List[str] = []
@@ -299,10 +474,10 @@ def ensure_case_generated(case_id: str) -> Dict[str, Any]:
     if mode == "knowledge":
         # Use the same authentic visual for all three; differences come from metadata only
         for idx in [2, 3]:
-            images_urls.append(images_urls[0])  # same URL OK (client treats as separate cards)
             signature_crops.append(signature_crops[0])
     else:
-        # observation: generate 2 subtle variants (near-identical; tweak signature micro-geometry)
         for i in range(2):
             forg_prompt = """
 Create a near-identical variant of the provided painting.
@@ -330,7 +505,6 @@ No annotations. Differences must be visible only at macro zoom.
             signature_crops.append(c_url)
     # === Gemini: Case brief + 3 metadata bundles + ledger + solution ===
-    # Feed IA title/creator/year so the authentic bundle aligns with reality.
     title = ia_item.get("title") or "Untitled"
     creator = ia_item.get("creator") or ""
     date = ia_item.get("date") or ""
@@ -353,7 +527,7 @@ TASK:
 1) Create a short, punchy "case_brief" (2–4 sentences) explaining why the artifact matters and why fraud is suspected — NO SPOILERS.
 2) Prepare THREE metadata bundles for images A,B,C with NEARLY IDENTICAL fields.
    Ensure exactly ONE bundle is AUTHENTIC and that it corresponds to the above authentic context.
-   The other two are FORGERIES that are almost correct but contain subtle, reality-checkable anomalies.
 3) Provide a concise "ledger_summary" describing a believable ownership/payment trail.
 4) Provide the solution with: "answer_index" (0 for A, 1 for B, 2 for C) and detailed flags for signature/metadata/financial, plus an "explanation".
@@ -392,9 +566,11 @@ CONSTRAINTS:
     except Exception:
         cleaned = raw_text
         if "```" in raw_text:
-            cleaned = raw_text.split("```")[1]
-            if cleaned.lower().startswith("json"):
-                cleaned = cleaned.split("\n", 1)[1]
         meta_json = json.loads(cleaned)
     case_brief = meta_json.get("case_brief", "A resurfaced portrait raises questions—its paper trail glitters a little too perfectly.")
@@ -417,7 +593,7 @@ CONSTRAINTS:
         "style_period": style_period,
         "images": images_urls,
         "signature_crops": signature_crops,
-        "metadata": metadata,          # sanitized (no answer)
         "ledger_summary": ledger_summary,
         "timer_seconds": TIMER_SECONDS,
         "initial_ip": INITIAL_IP,
@@ -445,7 +621,7 @@ CONSTRAINTS:
     return public
 # -----------------------------------------------------------------------------
-# 4) SESSIONS, TOOLS, GUESS, LEADERBOARD (same behavior as before)
 # -----------------------------------------------------------------------------
 def create_session(user_id: str, username: str, case_id: str) -> Dict[str, Any]:
     session_id = str(uuid.uuid4())
@@ -515,21 +691,20 @@ def upsert_leaderboard(case_id: str, user_id: str, username: str, score: int):
     leaderboard_ref(case_id).set(top)
 # -----------------------------------------------------------------------------
-# 5) ROUTES
 # -----------------------------------------------------------------------------
 @app.route("/health", methods=["GET"])
 def health():
     return jsonify({"ok": True, "time": datetime.now(timezone.utc).isoformat()})
-# --- Admin: Internet Archive ingestion ---
 @app.route("/admin/ingest-ia", methods=["POST"])
 def admin_ingest_ia():
     if not ADMIN_KEY or request.headers.get("X-Admin-Key") != ADMIN_KEY:
         return jsonify({"error": "Forbidden"}), 403
     body = request.get_json() or {}
-    # Example default: a few reputable museum collections with images
-    query = body.get("query") or '(collection:(metropolitanmuseum OR smithsonian OR getty OR artic) AND mediatype:image)'
     pages = int(body.get("pages") or 2)
     rows = int(body.get("rows") or 100)
     ingested = 0
@@ -538,14 +713,13 @@ def admin_ingest_ia():
     for page in range(1, pages + 1):
         try:
             docs = ia_advanced_search(query, rows=rows, page=page)
-        except Exception as e:
             errors += 1
             continue
         for d in docs:
             ident = d.get("identifier")
             if not ident:
                 continue
-            # skip if already ingested
             if ia_pool_ref().child(ident).get():
                 continue
             try:
@@ -559,6 +733,26 @@ def admin_ingest_ia():
     pool_size = len(ia_pool_ref().get() or {})
     return jsonify({"ok": True, "ingested": ingested, "errors": errors, "pool_size": pool_size})
 @app.route("/admin/ia-pool/stats", methods=["GET"])
 def ia_pool_stats():
     if not ADMIN_KEY or request.headers.get("X-Admin-Key") != ADMIN_KEY:
@@ -566,7 +760,7 @@ def ia_pool_stats():
     pool = ia_pool_ref().get() or {}
     return jsonify({"pool_size": len(pool)})
-# --- Admin: pre-generate today's case (optional) ---
 @app.route("/admin/generate-today", methods=["POST"])
 def admin_generate_today():
     if not ADMIN_KEY or request.headers.get("X-Admin-Key") != ADMIN_KEY:
@@ -575,6 +769,21 @@ def admin_generate_today():
     public = ensure_case_generated(case_id)
     return jsonify({"generated": True, "case_id": case_id, "mode": public.get("mode")})
 # --- Player flow ---
 @app.route("/cases/today/start", methods=["POST"])
 def start_case():
@@ -586,18 +795,14 @@ def start_case():
     existing = sessions_ref().order_by_child("user_id").equal_to(user_id).get()
     sess = None
     if existing:
-        for sid, sdoc in existing.items():
             if sdoc.get("case_id") == case_id and sdoc.get("status") == "active":
                 sess = sdoc
                 break
     if not sess:
         sess = create_session(user_id, username, case_id)
-    payload = {
-        "session_id": sess["session_id"],
-        "case": public
-    }
-    return jsonify(payload)
 @app.route("/cases/<case_id>/tool/signature", methods=["POST"])
 def tool_signature(case_id):
@@ -707,7 +912,7 @@ def submit_guess(case_id):
 def leaderboard_daily():
     case_id = utc_today_str()
     top = leaderboard_ref(case_id).get() or []
-    user_id, username = extract_user_from_headers(request)
     me = plays_ref(case_id).child(user_id).get() or {}
     rank = None
     if top:
@@ -718,14 +923,16 @@ def leaderboard_daily():
     return jsonify({"case_id": case_id, "top": top, "me": {"score": me.get("score"), "rank": rank}})
 # -----------------------------------------------------------------------------
-# 6) MAIN
 # -----------------------------------------------------------------------------
 if __name__ == "__main__":
     # Optional: pre-warm pool on boot so you’re ready before first request
     if os.environ.get("BOOTSTRAP_IA", "1") == "1":
         print("Bootstrapping Internet Archive pool...")
-        stats = ensure_minimum_ia_pool()
-        print("Bootstrap complete:", stats)
     app.run(host="0.0.0.0", port=int(os.environ.get("PORT", "7860")), debug=True)

 # app.py — Hidden Stroke (AI Noir Investigation) with Internet Archive ingestion
 # Flask + Firebase Realtime DB + Firebase Storage + Gemini (exact model names)
+# Runs on Hugging Face (envs: FIREBASE, Firebase_DB, Firebase_Storage, Gemini, optional GAME_SALT/ADMIN_KEY)
+import os, io, uuid, json, hmac, hashlib, random, traceback, requests
 from datetime import datetime, timedelta, timezone
 from typing import Dict, Any, Tuple, List, Optional
 from google.genai import types
 # -----------------------------------------------------------------------------
+# 1) CONFIG & INIT
 # -----------------------------------------------------------------------------
 app = Flask(__name__)
 CORS(app)
 # --- Misc config ---
 GAME_SALT = os.environ.get("GAME_SALT", "dev-salt")  # for deterministic seeds / HMAC
+ADMIN_KEY = os.environ.get("ADMIN_KEY")              # optional for admin endpoints
+IA_USER_AGENT = os.environ.get("IA_USER_AGENT", "HiddenStrokeBot/1.0 (+https://reddit.com)")
+MIN_IA_POOL = int(os.environ.get("MIN_IA_POOL", "60"))
+DEFAULT_IA_QUERY = os.environ.get(
+    "IA_QUERY",
+    '(collection:(metropolitanmuseum OR smithsonian OR getty OR artic) AND mediatype:image)'
+)
+ALLOW_DEV_BOOTSTRAP = os.environ.get("ALLOW_DEV_BOOTSTRAP", "0") == "1"
 # -----------------------------------------------------------------------------
 # 2) UTILS
 def ia_advanced_search(query: str, rows: int, page: int) -> List[dict]:
     # Internet Archive Advanced Search (no key required)
     url = "https://archive.org/advancedsearch.php"
     params = {"q": query, "rows": rows, "page": page, "output": "json"}
     data = http_get_json(url, params=params)
     return http_get_json(url)
 def ia_best_image_from_metadata(meta: dict) -> Optional[dict]:
     files = meta.get("files", []) or []
+    best, best_pixels = None, -1
     for f in files:
         fmt = (f.get("format") or "").lower()
         if any(x in fmt for x in ["jpeg", "jpg", "png", "tiff", "image"]):
             w = int(f.get("width") or 0)
             h = int(f.get("height") or 0)
             if w and h:
                 px = w * h
             else:
+                px = int(f.get("size") or 0)
             if px > best_pixels:
+                best_pixels, best = px, f
     return best
 def ingest_ia_doc(doc: dict) -> Optional[dict]:
+    """Fetch /metadata and store best image entry into ia_pool."""
     identifier = doc.get("identifier")
     if not identifier:
         return None
     if not best:
         return None
+    md = meta.get("metadata", {}) or {}
+    title = md.get("title", "") or doc.get("title", "")
+    date = md.get("date", "") or doc.get("date", "")
+    creator = md.get("creator", "") or doc.get("creator", "")
+    rights = md.get("rights", "") or doc.get("rights", "")
+    licenseurl = md.get("licenseurl", "") or doc.get("licenseurl", "")
     download_url = f"https://archive.org/download/{identifier}/{best['name']}"
     record = {
         return None
     identifiers = sorted(pool.keys())
     case_seed = seed_for_date(case_id)
+    ident = identifiers[case_seed % len(identifiers)]
     return pool[ident]
 def download_image_to_pil(url: str) -> Image.Image:
     data = http_get_bytes(url)
+    return Image.open(io.BytesIO(data)).convert("RGB")
 def crop_signature_macro(img: Image.Image, size: int = 512) -> Image.Image:
+    """Lower-right macro crop."""
     w, h = img.size
     cw = min(size, w)
     ch = min(size, h)
     return img.crop((left, top, left + cw, top + ch))
 # -----------------------------------------------------------------------------
+# 3) IA -> Firebase Storage caching + Zero-admin bootstrap
+# -----------------------------------------------------------------------------
+def _resize_if_needed(img: Image.Image, max_dim: int = 4096) -> Image.Image:
+    w, h = img.size
+    if max(w, h) <= max_dim:
+        return img
+    if w >= h:
+        new_w = max_dim
+        new_h = int(h * (max_dim / w))
+    else:
+        new_h = max_dim
+        new_w = int(w * (max_dim / h))
+    return img.resize((new_w, new_h), Image.LANCZOS)
+def cache_single_ia_identifier(
+    identifier: str,
+    overwrite: bool = False,
+    max_dim: int = 4096,
+    jpeg_quality: int = 90,
+    skip_if_restricted: bool = True,
+) -> dict:
+    """
+    Download one IA item from ia_pool, upload image + signature macro crop to Firebase Storage,
+    and update the ia_pool record with storage URLs & dimensions.
+    """
+    rec_ref = ia_pool_ref().child(identifier)
+    rec = rec_ref.get() or {}
+    if not rec:
+        return {"identifier": identifier, "stored": False, "reason": "not_in_pool"}
+    rights = (rec.get("rights") or "").lower()
+    if skip_if_restricted and ("in copyright" in rights or "all rights reserved" in rights):
+        return {"identifier": identifier, "stored": False, "reason": "restricted_rights"}
+    if rec.get("storage_url") and not overwrite:
+        return {"identifier": identifier, "stored": False, "reason": "already_cached", "storage_url": rec["storage_url"]}
+    # Prefer existing cached URL as source; fall back to IA
+    source_url = rec.get("storage_url") or rec.get("download_url")
+    if not source_url:
+        return {"identifier": identifier, "stored": False, "reason": "missing_source_url"}
+    try:
+        img = download_image_to_pil(source_url)
+    except Exception as e:
+        if rec.get("download_url"):
+            try:
+                img = download_image_to_pil(rec["download_url"])
+            except Exception as e2:
+                return {"identifier": identifier, "stored": False, "reason": f"download_failed: {e2}"}
+        else:
+            return {"identifier": identifier, "stored": False, "reason": f"download_failed: {e}"}
+    img = _resize_if_needed(img, max_dim=max_dim)
+    w, h = img.size
+    # Upload original
+    img_bytes = io.BytesIO()
+    img.save(img_bytes, format="JPEG", quality=jpeg_quality, optimize=True)
+    img_bytes.seek(0)
+    img_path = f"ia_cache/{identifier}/original.jpg"
+    storage_url = upload_bytes_to_storage(img_bytes.getvalue(), img_path, "image/jpeg")
+    # Upload macro crop
+    crop = crop_signature_macro(img, 512)
+    crop_bytes = io.BytesIO()
+    crop.save(crop_bytes, format="JPEG", quality=jpeg_quality, optimize=True)
+    crop_bytes.seek(0)
+    crop_path = f"ia_cache/{identifier}/signature_crop.jpg"
+    signature_crop_url = upload_bytes_to_storage(crop_bytes.getvalue(), crop_path, "image/jpeg")
+    rec_update = {
+        "storage_url": storage_url,
+        "signature_crop_url": signature_crop_url,
+        "image_path": img_path,
+        "crop_path": crop_path,
+        "width": w,
+        "height": h,
+        "cached_at": datetime.now(timezone.utc).isoformat()
+    }
+    rec_ref.update(rec_update)
+    return {
+        "identifier": identifier,
+        "stored": True,
+        "storage_url": storage_url,
+        "signature_crop_url": signature_crop_url,
+        "width": w,
+        "height": h
+    }
+def batch_cache_ia_pool(
+    limit: int = 100,
+    overwrite: bool = False,
+    randomize: bool = True,
+    min_width: int = 800,
+    min_height: int = 800,
+    max_dim: int = 4096,
+    jpeg_quality: int = 90,
+    skip_if_restricted: bool = True,
+) -> dict:
+    """Cache up to `limit` uncached IA items into Firebase Storage."""
+    pool = ia_pool_ref().get() or {}
+    if not pool:
+        return {"ok": True, "processed": 0, "stored": 0, "skipped": 0, "results": []}
+    candidates = []
+    for ident, rec in pool.items():
+        if overwrite or not rec.get("storage_url"):
+            w = int(rec.get("width") or 0)
+            h = int(rec.get("height") or 0)
+            if (w and h) and (w < min_width or h < min_height):
+                continue
+            candidates.append(ident)
+    if randomize:
+        random.shuffle(candidates)
+    candidates = candidates[:max(0, limit)]
+    results, stored, skipped = [], 0, 0
+    for ident in candidates:
+        res = cache_single_ia_identifier(
+            ident,
+            overwrite=overwrite,
+            max_dim=max_dim,
+            jpeg_quality=jpeg_quality,
+            skip_if_restricted=skip_if_restricted,
+        )
+        results.append(res)
+        if res.get("stored"):
+            stored += 1
+        else:
+            skipped += 1
+    return {"ok": True, "processed": len(candidates), "stored": stored, "skipped": skipped, "results": results}
+def ensure_minimum_ia_pool(min_items: int = MIN_IA_POOL, rows: int = 100, max_pages: int = 5) -> dict:
+    """
+    Zero-admin bootstrap:
+    - If ia_pool has fewer than `min_items`, pull from IA Advanced Search and ingest.
+    - Then cache enough images to reach `min_items`.
+    """
+    pool = ia_pool_ref().get() or {}
+    have = len(pool)
+    added = 0
+    cached = 0
+    if have < min_items:
+        page = 1
+        while have + added < min_items and page <= max_pages:
+            try:
+                docs = ia_advanced_search(DEFAULT_IA_QUERY, rows=rows, page=page)
+            except Exception as e:
+                print("IA search failed on page", page, e)
+                break
+            if not docs:
+                break
+            for d in docs:
+                ident = d.get("identifier")
+                if not ident:
+                    continue
+                if ia_pool_ref().child(ident).get():
+                    continue
+                try:
+                    rec = ingest_ia_doc(d)
+                    if rec:
+                        added += 1
+                except Exception:
+                    continue
+            page += 1
+    # Cache up to min_items
+    pool = ia_pool_ref().get() or {}
+    have_now = len(pool)
+    need_cache = max(0, min_items - have_now)
+    if need_cache:
+        res = batch_cache_ia_pool(limit=need_cache, randomize=True)
+        cached = res.get("stored", 0)
+    final_size = len(ia_pool_ref().get() or {})
+    return {"ok": True, "had": have, "added": added, "cached": cached, "final_size": final_size}
+# -----------------------------------------------------------------------------
+# 4) CASE GENERATION (uses IA for authentic image, Gemini for forgeries/meta)
 # -----------------------------------------------------------------------------
 def ensure_case_generated(case_id: str) -> Dict[str, Any]:
     existing_public = case_ref(case_id).child("public").get()
     if existing_public:
         return existing_public
+    # Ensure we have a cached pool ready
     try:
+        stats = ensure_minimum_ia_pool()
+        if stats.get("added") or stats.get("cached"):
+            print("Bootstrap:", stats)
     except Exception as e:
         print("Bootstrap warning:", e)
+    # Fallback ingest if pool is empty
     pool = ia_pool_ref().get() or {}
     if not pool:
         try:
+            docs = ia_advanced_search(DEFAULT_IA_QUERY, rows=100, page=1)
             for d in docs:
                 try:
                     ingest_ia_doc(d)
     # Pick authentic from ia_pool deterministically
     ia_item = choose_ia_item_for_case(case_id)
     if not ia_item:
         raise RuntimeError("No IA items available. Ingest needed.")
     # Deterministic mode
     case_seed = seed_for_date(case_id)
     rng = random.Random(case_seed)
     mode = "knowledge" if (case_seed % 2 == 0) else "observation"
     style_period = "sourced from Internet Archive; museum catalog reproduction"
+    # Load authentic image (prefer cached)
     source_url = ia_item.get("storage_url") or ia_item["download_url"]
     auth_img = download_image_to_pil(source_url)
     images_urls: List[str] = []
     signature_crops: List[str] = []
     if mode == "knowledge":
         # Use the same authentic visual for all three; differences come from metadata only
         for idx in [2, 3]:
+            images_urls.append(images_urls[0])  # same URL is ok
             signature_crops.append(signature_crops[0])
     else:
+        # observation: two subtle variants (signature micro-geometry)
         for i in range(2):
             forg_prompt = """
 Create a near-identical variant of the provided painting.
             signature_crops.append(c_url)
     # === Gemini: Case brief + 3 metadata bundles + ledger + solution ===
     title = ia_item.get("title") or "Untitled"
     creator = ia_item.get("creator") or ""
     date = ia_item.get("date") or ""
 1) Create a short, punchy "case_brief" (2–4 sentences) explaining why the artifact matters and why fraud is suspected — NO SPOILERS.
 2) Prepare THREE metadata bundles for images A,B,C with NEARLY IDENTICAL fields.
    Ensure exactly ONE bundle is AUTHENTIC and that it corresponds to the above authentic context.
+   The other two are FORGERIES with subtle, reality-checkable anomalies.
 3) Provide a concise "ledger_summary" describing a believable ownership/payment trail.
 4) Provide the solution with: "answer_index" (0 for A, 1 for B, 2 for C) and detailed flags for signature/metadata/financial, plus an "explanation".
     except Exception:
         cleaned = raw_text
         if "```" in raw_text:
+            parts = raw_text.split("```")
+            if len(parts) >= 2:
+                cleaned = parts[1]
+                if cleaned.lower().startswith("json"):
+                    cleaned = cleaned.split("\n", 1)[1]
         meta_json = json.loads(cleaned)
     case_brief = meta_json.get("case_brief", "A resurfaced portrait raises questions—its paper trail glitters a little too perfectly.")
         "style_period": style_period,
         "images": images_urls,
         "signature_crops": signature_crops,
+        "metadata": metadata,
         "ledger_summary": ledger_summary,
         "timer_seconds": TIMER_SECONDS,
         "initial_ip": INITIAL_IP,
     return public
 # -----------------------------------------------------------------------------
+# 5) SESSIONS, TOOLS, GUESS, LEADERBOARD
 # -----------------------------------------------------------------------------
 def create_session(user_id: str, username: str, case_id: str) -> Dict[str, Any]:
     session_id = str(uuid.uuid4())
     leaderboard_ref(case_id).set(top)
 # -----------------------------------------------------------------------------
+# 6) ROUTES
 # -----------------------------------------------------------------------------
 @app.route("/health", methods=["GET"])
 def health():
     return jsonify({"ok": True, "time": datetime.now(timezone.utc).isoformat()})
+# --- Admin: Internet Archive ingestion (manual) ---
 @app.route("/admin/ingest-ia", methods=["POST"])
 def admin_ingest_ia():
     if not ADMIN_KEY or request.headers.get("X-Admin-Key") != ADMIN_KEY:
         return jsonify({"error": "Forbidden"}), 403
     body = request.get_json() or {}
+    query = body.get("query") or DEFAULT_IA_QUERY
     pages = int(body.get("pages") or 2)
     rows = int(body.get("rows") or 100)
     ingested = 0
     for page in range(1, pages + 1):
         try:
             docs = ia_advanced_search(query, rows=rows, page=page)
+        except Exception:
             errors += 1
             continue
         for d in docs:
             ident = d.get("identifier")
             if not ident:
                 continue
             if ia_pool_ref().child(ident).get():
                 continue
             try:
     pool_size = len(ia_pool_ref().get() or {})
     return jsonify({"ok": True, "ingested": ingested, "errors": errors, "pool_size": pool_size})
+# --- Admin: Cache IA images to Firebase Storage (manual) ---
+@app.route("/admin/cache-ia", methods=["POST"])
+def admin_cache_ia():
+    if not ADMIN_KEY or request.headers.get("X-Admin-Key") != ADMIN_KEY:
+        return jsonify({"error": "Forbidden"}), 403
+    cfg = request.get_json() or {}
+    out = batch_cache_ia_pool(
+        limit=int(cfg.get("limit", 100)),
+        overwrite=bool(cfg.get("overwrite", False)),
+        randomize=bool(cfg.get("randomize", True)),
+        min_width=int(cfg.get("min_width", 800)),
+        min_height=int(cfg.get("min_height", 800)),
+        max_dim=int(cfg.get("max_dim", 4096)),
+        jpeg_quality=int(cfg.get("jpeg_quality", 90)),
+        skip_if_restricted=bool(cfg.get("skip_if_restricted", True)),
+    )
+    return jsonify(out)
+# --- Admin: pool stats ---
 @app.route("/admin/ia-pool/stats", methods=["GET"])
 def ia_pool_stats():
     if not ADMIN_KEY or request.headers.get("X-Admin-Key") != ADMIN_KEY:
     pool = ia_pool_ref().get() or {}
     return jsonify({"pool_size": len(pool)})
+# --- Admin: pre-generate today's case (manual) ---
 @app.route("/admin/generate-today", methods=["POST"])
 def admin_generate_today():
     if not ADMIN_KEY or request.headers.get("X-Admin-Key") != ADMIN_KEY:
     public = ensure_case_generated(case_id)
     return jsonify({"generated": True, "case_id": case_id, "mode": public.get("mode")})
+# --- DEV-ONLY: panic button bootstrap (no auth; gated by env) ---
+@app.route("/admin/bootstrap-now", methods=["POST"])
+def admin_bootstrap_now():
+    if not ALLOW_DEV_BOOTSTRAP:
+        return jsonify({"error": "Disabled. Set ALLOW_DEV_BOOTSTRAP=1 to enable."}), 403
+    cfg = request.get_json() or {}
+    min_items = int(cfg.get("min_items", MIN_IA_POOL))
+    rows = int(cfg.get("rows", 100))
+    max_pages = int(cfg.get("max_pages", 5))
+    try:
+        stats = ensure_minimum_ia_pool(min_items=min_items, rows=rows, max_pages=max_pages)
+        return jsonify({"ok": True, "stats": stats})
+    except Exception as e:
+        return jsonify({"ok": False, "error": str(e)}), 500
 # --- Player flow ---
 @app.route("/cases/today/start", methods=["POST"])
 def start_case():
     existing = sessions_ref().order_by_child("user_id").equal_to(user_id).get()
     sess = None
     if existing:
+        for _, sdoc in existing.items():
             if sdoc.get("case_id") == case_id and sdoc.get("status") == "active":
                 sess = sdoc
                 break
     if not sess:
         sess = create_session(user_id, username, case_id)
+    return jsonify({"session_id": sess["session_id"], "case": public})
 @app.route("/cases/<case_id>/tool/signature", methods=["POST"])
 def tool_signature(case_id):
 def leaderboard_daily():
     case_id = utc_today_str()
     top = leaderboard_ref(case_id).get() or []
+    user_id, _ = extract_user_from_headers(request)
     me = plays_ref(case_id).child(user_id).get() or {}
     rank = None
     if top:
     return jsonify({"case_id": case_id, "top": top, "me": {"score": me.get("score"), "rank": rank}})
 # -----------------------------------------------------------------------------
+# 7) MAIN
 # -----------------------------------------------------------------------------
 if __name__ == "__main__":
     # Optional: pre-warm pool on boot so you’re ready before first request
     if os.environ.get("BOOTSTRAP_IA", "1") == "1":
         print("Bootstrapping Internet Archive pool...")
+        try:
+            stats = ensure_minimum_ia_pool()
+            print("Bootstrap complete:", stats)
+        except Exception as e:
+            print("Bootstrap failed:", e)
     app.run(host="0.0.0.0", port=int(os.environ.get("PORT", "7860")), debug=True)