Spaces:

rairo
/

dev-stroke

Sleeping

App Files Files Community

rairo commited on Sep 16, 2025

Commit

6ebaf2b

verified ·

1 Parent(s): 3bafcbc

Update main.py

Browse files

Files changed (1) hide show

main.py +151 -76

main.py CHANGED Viewed

@@ -1,6 +1,8 @@
-# app.py — Hidden Stroke (AI Noir Investigation) with Internet Archive ingestion
-# Flask + Firebase Realtime DB + Firebase Storage + Gemini (exact model names)
-# Runs on Hugging Face (envs: FIREBASE, Firebase_DB, Firebase_Storage, Gemini, optional GAME_SALT/ADMIN_KEY)
 import os, io, uuid, json, hmac, hashlib, random, traceback, requests
 from datetime import datetime, timedelta, timezone
@@ -10,6 +12,15 @@ from flask import Flask, request, jsonify
 from flask_cors import CORS
 from PIL import Image
 # ---------------- Firebase Admin (Realtime DB + Storage) ----------------
 import firebase_admin
 from firebase_admin import credentials, db, storage
@@ -43,9 +54,9 @@ try:
     })
     bucket = storage.bucket()
     db_root = db.reference("/")
-    print("Firebase Realtime DB + Storage initialized.")
 except Exception as e:
-    print(f"FATAL: Firebase init failed: {e}")
     raise
 # --- Gemini ---
@@ -54,9 +65,9 @@ try:
     if not GEMINI_API_KEY:
         raise ValueError("The 'Gemini' environment variable is not set.")
     client = genai.Client(api_key=GEMINI_API_KEY)
-    print("Gemini client initialized.")
 except Exception as e:
-    print(f"FATAL: Gemini init failed: {e}")
     raise
 # --- Models (exact names) ---
@@ -70,8 +81,8 @@ TOOL_COSTS = {"signature": 1, "metadata": 1, "financial": 2}
 LEADERBOARD_TOP_N = 50
 # --- Misc config ---
-GAME_SALT = os.environ.get("GAME_SALT", "dev-salt")  # for deterministic seeds / HMAC
-ADMIN_KEY = os.environ.get("ADMIN_KEY")              # optional for admin endpoints
 IA_USER_AGENT = os.environ.get("IA_USER_AGENT", "HiddenStrokeBot/1.0 (+https://reddit.com)")
 MIN_IA_POOL = int(os.environ.get("MIN_IA_POOL", "60"))
 DEFAULT_IA_QUERY = os.environ.get(
@@ -79,6 +90,7 @@ DEFAULT_IA_QUERY = os.environ.get(
     '(collection:(metropolitanmuseum OR smithsonian OR getty OR artic) AND mediatype:image)'
 )
 ALLOW_DEV_BOOTSTRAP = os.environ.get("ALLOW_DEV_BOOTSTRAP", "0") == "1"
 # -----------------------------------------------------------------------------
 # 2) UTILS
@@ -105,10 +117,13 @@ def hmac_hex(s: str) -> str:
     return hmac.new(GAME_SALT.encode(), s.encode(), hashlib.sha256).hexdigest()
 def upload_bytes_to_storage(data: bytes, path: str, content_type: str) -> str:
     blob = bucket.blob(path)
     blob.upload_from_string(data, content_type=content_type)
     blob.make_public()
-    return blob.public_url
 def pil_from_inline_image_part(part) -> Image.Image:
     image_bytes = part.inline_data.data
@@ -135,27 +150,42 @@ def fifty_fifty_mode(case_seed: int) -> str:
     return "knowledge" if (case_seed % 2 == 0) else "observation"
 def http_get_json(url: str, params: dict = None) -> dict:
     headers = {"User-Agent": IA_USER_AGENT}
     r = requests.get(url, params=params, headers=headers, timeout=30)
     r.raise_for_status()
     return r.json()
 def http_get_bytes(url: str) -> bytes:
     headers = {"User-Agent": IA_USER_AGENT}
     r = requests.get(url, headers=headers, timeout=60)
     r.raise_for_status()
     return r.content
 def ia_advanced_search(query: str, rows: int, page: int) -> List[dict]:
-    # Internet Archive Advanced Search (no key required)
     url = "https://archive.org/advancedsearch.php"
     params = {"q": query, "rows": rows, "page": page, "output": "json"}
-    data = http_get_json(url, params=params)
-    return data.get("response", {}).get("docs", [])
 def ia_metadata(identifier: str) -> dict:
     url = f"https://archive.org/metadata/{identifier}"
-    return http_get_json(url)
 def ia_best_image_from_metadata(meta: dict) -> Optional[dict]:
     files = meta.get("files", []) or []
@@ -165,12 +195,13 @@ def ia_best_image_from_metadata(meta: dict) -> Optional[dict]:
         if any(x in fmt for x in ["jpeg", "jpg", "png", "tiff", "image"]):
             w = int(f.get("width") or 0)
             h = int(f.get("height") or 0)
-            if w and h:
-                px = w * h
-            else:
-                px = int(f.get("size") or 0)
             if px > best_pixels:
                 best_pixels, best = px, f
     return best
 def ingest_ia_doc(doc: dict) -> Optional[dict]:
@@ -178,9 +209,11 @@ def ingest_ia_doc(doc: dict) -> Optional[dict]:
     identifier = doc.get("identifier")
     if not identifier:
         return None
     meta = ia_metadata(identifier)
     best = ia_best_image_from_metadata(meta)
     if not best:
         return None
     md = meta.get("metadata", {}) or {}
@@ -207,28 +240,33 @@ def ingest_ia_doc(doc: dict) -> Optional[dict]:
         "source": "internet_archive"
     }
     ia_pool_ref().child(identifier).set(record)
     return record
 def choose_ia_item_for_case(case_id: str) -> Optional[dict]:
     pool = ia_pool_ref().get() or {}
     if not pool:
         return None
     identifiers = sorted(pool.keys())
     case_seed = seed_for_date(case_id)
     ident = identifiers[case_seed % len(identifiers)]
     return pool[ident]
 def download_image_to_pil(url: str) -> Image.Image:
     data = http_get_bytes(url)
-    return Image.open(io.BytesIO(data)).convert("RGB")
 def crop_signature_macro(img: Image.Image, size: int = 512) -> Image.Image:
-    """Lower-right macro crop."""
     w, h = img.size
     cw = min(size, w)
     ch = min(size, h)
     left = max(0, w - cw)
     top = max(0, h - ch)
     return img.crop((left, top, left + cw, top + ch))
 # -----------------------------------------------------------------------------
@@ -244,6 +282,7 @@ def _resize_if_needed(img: Image.Image, max_dim: int = 4096) -> Image.Image:
     else:
         new_h = max_dim
         new_w = int(w * (max_dim / h))
     return img.resize((new_w, new_h), Image.LANCZOS)
 def cache_single_ia_identifier(
@@ -253,10 +292,6 @@ def cache_single_ia_identifier(
     jpeg_quality: int = 90,
     skip_if_restricted: bool = True,
 ) -> dict:
-    """
-    Download one IA item from ia_pool, upload image + signature macro crop to Firebase Storage,
-    and update the ia_pool record with storage URLs & dimensions.
-    """
     rec_ref = ia_pool_ref().child(identifier)
     rec = rec_ref.get() or {}
     if not rec:
@@ -264,25 +299,31 @@ def cache_single_ia_identifier(
     rights = (rec.get("rights") or "").lower()
     if skip_if_restricted and ("in copyright" in rights or "all rights reserved" in rights):
         return {"identifier": identifier, "stored": False, "reason": "restricted_rights"}
     if rec.get("storage_url") and not overwrite:
         return {"identifier": identifier, "stored": False, "reason": "already_cached", "storage_url": rec["storage_url"]}
-    # Prefer existing cached URL as source; fall back to IA
     source_url = rec.get("storage_url") or rec.get("download_url")
     if not source_url:
         return {"identifier": identifier, "stored": False, "reason": "missing_source_url"}
     try:
         img = download_image_to_pil(source_url)
     except Exception as e:
-        if rec.get("download_url"):
             try:
                 img = download_image_to_pil(rec["download_url"])
             except Exception as e2:
                 return {"identifier": identifier, "stored": False, "reason": f"download_failed: {e2}"}
         else:
             return {"identifier": identifier, "stored": False, "reason": f"download_failed: {e}"}
     img = _resize_if_needed(img, max_dim=max_dim)
@@ -313,6 +354,7 @@ def cache_single_ia_identifier(
         "cached_at": datetime.now(timezone.utc).isoformat()
     }
     rec_ref.update(rec_update)
     return {
         "identifier": identifier,
@@ -333,8 +375,8 @@ def batch_cache_ia_pool(
     jpeg_quality: int = 90,
     skip_if_restricted: bool = True,
 ) -> dict:
-    """Cache up to `limit` uncached IA items into Firebase Storage."""
     pool = ia_pool_ref().get() or {}
     if not pool:
         return {"ok": True, "processed": 0, "stored": 0, "skipped": 0, "results": []}
@@ -344,12 +386,14 @@ def batch_cache_ia_pool(
             w = int(rec.get("width") or 0)
             h = int(rec.get("height") or 0)
             if (w and h) and (w < min_width or h < min_height):
                 continue
             candidates.append(ident)
     if randomize:
         random.shuffle(candidates)
     candidates = candidates[:max(0, limit)]
     results, stored, skipped = [], 0, 0
     for ident in candidates:
@@ -366,28 +410,26 @@ def batch_cache_ia_pool(
         else:
             skipped += 1
     return {"ok": True, "processed": len(candidates), "stored": stored, "skipped": skipped, "results": results}
 def ensure_minimum_ia_pool(min_items: int = MIN_IA_POOL, rows: int = 100, max_pages: int = 5) -> dict:
-    """
-    Zero-admin bootstrap:
-    - If ia_pool has fewer than `min_items`, pull from IA Advanced Search and ingest.
-    - Then cache enough images to reach `min_items`.
-    """
     pool = ia_pool_ref().get() or {}
     have = len(pool)
     added = 0
     cached = 0
     if have < min_items:
         page = 1
         while have + added < min_items and page <= max_pages:
             try:
                 docs = ia_advanced_search(DEFAULT_IA_QUERY, rows=rows, page=page)
-            except Exception as e:
-                print("IA search failed on page", page, e)
                 break
             if not docs:
                 break
             for d in docs:
                 ident = d.get("identifier")
@@ -400,19 +442,25 @@ def ensure_minimum_ia_pool(min_items: int = MIN_IA_POOL, rows: int = 100, max_pa
                     if rec:
                         added += 1
                 except Exception:
                     continue
             page += 1
     # Cache up to min_items
     pool = ia_pool_ref().get() or {}
     have_now = len(pool)
     need_cache = max(0, min_items - have_now)
     if need_cache:
         res = batch_cache_ia_pool(limit=need_cache, randomize=True)
         cached = res.get("stored", 0)
     final_size = len(ia_pool_ref().get() or {})
-    return {"ok": True, "had": have, "added": added, "cached": cached, "final_size": final_size}
 # -----------------------------------------------------------------------------
 # 4) CASE GENERATION (uses IA for authentic image, Gemini for forgeries/meta)
@@ -420,28 +468,15 @@ def ensure_minimum_ia_pool(min_items: int = MIN_IA_POOL, rows: int = 100, max_pa
 def ensure_case_generated(case_id: str) -> Dict[str, Any]:
     existing_public = case_ref(case_id).child("public").get()
     if existing_public:
         return existing_public
     # Ensure we have a cached pool ready
     try:
         stats = ensure_minimum_ia_pool()
-        if stats.get("added") or stats.get("cached"):
-            print("Bootstrap:", stats)
-    except Exception as e:
-        print("Bootstrap warning:", e)
-    # Fallback ingest if pool is empty
-    pool = ia_pool_ref().get() or {}
-    if not pool:
-        try:
-            docs = ia_advanced_search(DEFAULT_IA_QUERY, rows=100, page=1)
-            for d in docs:
-                try:
-                    ingest_ia_doc(d)
-                except Exception:
-                    continue
-        except Exception as e:
-            print("WARNING: IA default ingest failed:", e)
     # Pick authentic from ia_pool deterministically
     ia_item = choose_ia_item_for_case(case_id)
@@ -450,34 +485,35 @@ def ensure_case_generated(case_id: str) -> Dict[str, Any]:
     # Deterministic mode
     case_seed = seed_for_date(case_id)
-    rng = random.Random(case_seed)
     mode = "knowledge" if (case_seed % 2 == 0) else "observation"
     style_period = "sourced from Internet Archive; museum catalog reproduction"
     # Load authentic image (prefer cached)
     source_url = ia_item.get("storage_url") or ia_item["download_url"]
     auth_img = download_image_to_pil(source_url)
     images_urls: List[str] = []
     signature_crops: List[str] = []
     # Save authentic as image #1
-    images_urls.append(
-        save_image_return_url(auth_img, f"hidden_stroke/{case_id}/images/img_1.jpg")
-    )
     # Macro crop for signature area
     crop1 = crop_signature_macro(auth_img, 512)
-    signature_crops.append(
-        save_image_return_url(crop1, f"hidden_stroke/{case_id}/signature_crops/crop_1.jpg", quality=88)
-    )
     if mode == "knowledge":
-        # Use the same authentic visual for all three; differences come from metadata only
         for idx in [2, 3]:
-            images_urls.append(images_urls[0])  # same URL is ok
             signature_crops.append(signature_crops[0])
     else:
-        # observation: two subtle variants (signature micro-geometry)
         for i in range(2):
             forg_prompt = """
 Create a near-identical variant of the provided painting.
@@ -485,6 +521,7 @@ Keep composition, palette, and lighting the same.
 Only introduce a subtle change in signature micro-geometry (baseline alignment, stroke overlap order, or curve spacing).
 No annotations. Differences must be visible only at macro zoom.
 """
             resp = client.models.generate_content(
                 model=GENERATION_MODEL,
                 contents=[forg_prompt, auth_img],
@@ -496,6 +533,7 @@ No annotations. Differences must be visible only at macro zoom.
                     f_img = pil_from_inline_image_part(p)
                     break
             if f_img is None:
                 f_img = auth_img.copy()
             url = save_image_return_url(f_img, f"hidden_stroke/{case_id}/images/img_{i+2}.jpg")
@@ -503,13 +541,15 @@ No annotations. Differences must be visible only at macro zoom.
             crop = crop_signature_macro(f_img, 512)
             c_url = save_image_return_url(crop, f"hidden_stroke/{case_id}/signature_crops/crop_{i+2}.jpg", quality=88)
             signature_crops.append(c_url)
-    # === Gemini: Case brief + 3 metadata bundles + ledger + solution ===
     title = ia_item.get("title") or "Untitled"
     creator = ia_item.get("creator") or ""
     date = ia_item.get("date") or ""
     rights = ia_item.get("rights") or ""
     licenseurl = ia_item.get("licenseurl") or ""
     meta_prompt = f"""
 You are generating a daily case for a noir art investigation game.
@@ -548,19 +588,13 @@ OUTPUT STRICT JSON with this schema:
      "explanation": "A few sentences that justify the authentic pick without listing spoilers."
   }}
 }}
-CONSTRAINTS:
-- Keep all three bundles plausible and near-identical at a glance.
-- Anomalies must be subtle and testable (chemistry/ink era, currency introductions, institution timelines, accession formats, etc.).
-- If MODE=KNOWLEDGE, the tells should be discoverable via metadata/ledger alone.
-- If MODE=OBSERVATION, include at least one signature micro-geometry flag in "flags_signature".
-- The authentic bundle should be consistent with the AUTHENTIC CONTEXT.
 """
     meta_resp = client.models.generate_content(
         model=CATEGORY_MODEL,
         contents=[meta_prompt]
     )
     raw_text = meta_resp.text.strip()
     try:
         meta_json = json.loads(raw_text)
     except Exception:
@@ -582,8 +616,10 @@ CONSTRAINTS:
     flags_metadata = solution.get("flags_metadata", [])
     flags_financial = solution.get("flags_financial", [])
     explanation = solution.get("explanation", "The authentic work aligns with period-accurate details; the others contain subtle contradictions.")
     if len(metadata) != 3:
         raise RuntimeError("Expected exactly 3 metadata bundles.")
     public = {
@@ -618,6 +654,7 @@ CONSTRAINTS:
     cref = case_ref(case_id)
     cref.child("public").set(public)
     cref.child("solution").set(solution_doc)
     return public
 # -----------------------------------------------------------------------------
@@ -638,6 +675,7 @@ def create_session(user_id: str, username: str, case_id: str) -> Dict[str, Any]:
         "status": "active"
     }
     sessions_ref().child(session_id).set(session_doc)
     return session_doc
 def get_session(session_id: str) -> Dict[str, Any]:
@@ -666,6 +704,7 @@ def spend_ip(session: Dict[str, Any], cost: int, action: Dict[str, Any]) -> Tupl
     action["ts"] = datetime.now(timezone.utc).isoformat()
     sessions_ref().child(session["session_id"]).child("ip_remaining").set(new_ip)
     sessions_ref().child(session["session_id"]).child("actions").push(action)
     return session, {}
 def score_result(correct: bool, session: Dict[str, Any]) -> Dict[str, Any]:
@@ -709,6 +748,7 @@ def admin_ingest_ia():
     rows = int(body.get("rows") or 100)
     ingested = 0
     errors = 0
     for page in range(1, pages + 1):
         try:
@@ -728,6 +768,7 @@ def admin_ingest_ia():
                     ingested += 1
             except Exception:
                 errors += 1
                 continue
     pool_size = len(ia_pool_ref().get() or {})
@@ -758,7 +799,8 @@ def ia_pool_stats():
     if not ADMIN_KEY or request.headers.get("X-Admin-Key") != ADMIN_KEY:
         return jsonify({"error": "Forbidden"}), 403
     pool = ia_pool_ref().get() or {}
-    return jsonify({"pool_size": len(pool)})
 # --- Admin: pre-generate today's case (manual) ---
 @app.route("/admin/generate-today", methods=["POST"])
@@ -782,8 +824,42 @@ def admin_bootstrap_now():
         stats = ensure_minimum_ia_pool(min_items=min_items, rows=rows, max_pages=max_pages)
         return jsonify({"ok": True, "stats": stats})
     except Exception as e:
         return jsonify({"ok": False, "error": str(e)}), 500
 # --- Player flow ---
 @app.route("/cases/today/start", methods=["POST"])
 def start_case():
@@ -926,13 +1002,12 @@ def leaderboard_daily():
 # 7) MAIN
 # -----------------------------------------------------------------------------
 if __name__ == "__main__":
-    # Optional: pre-warm pool on boot so you’re ready before first request
     if os.environ.get("BOOTSTRAP_IA", "1") == "1":
-        print("Bootstrapping Internet Archive pool...")
         try:
             stats = ensure_minimum_ia_pool()
-            print("Bootstrap complete:", stats)
-        except Exception as e:
-            print("Bootstrap failed:", e)
     app.run(host="0.0.0.0", port=int(os.environ.get("PORT", "7860")), debug=True)

+# app.py — Hidden Stroke (AI Noir Investigation) with verbose logging
+# Flask + Firebase Realtime DB + Firebase Storage + Gemini
+# Envs required: FIREBASE, Firebase_DB, Firebase_Storage, Gemini
+# Optional envs: GAME_SALT, ADMIN_KEY, IA_USER_AGENT, MIN_IA_POOL, IA_QUERY,
+#                BOOTSTRAP_IA, LOG_LEVEL, ALLOW_DEV_BOOTSTRAP, ALLOW_DEV_DIAGNOSTICS
 import os, io, uuid, json, hmac, hashlib, random, traceback, requests
 from datetime import datetime, timedelta, timezone
 from flask_cors import CORS
 from PIL import Image
+# ----- Logging ---------------------------------------------------------------
+import logging
+LOG_LEVEL = os.environ.get("LOG_LEVEL", "DEBUG").upper()
+logging.basicConfig(
+    level=getattr(logging, LOG_LEVEL, logging.DEBUG),
+    format="%(asctime)s | %(levelname)s | %(name)s | %(message)s"
+)
+log = logging.getLogger("hidden_stroke")
 # ---------------- Firebase Admin (Realtime DB + Storage) ----------------
 import firebase_admin
 from firebase_admin import credentials, db, storage
     })
     bucket = storage.bucket()
     db_root = db.reference("/")
+    log.info("Firebase Realtime DB + Storage initialized.")
 except Exception as e:
+    log.exception("FATAL: Firebase init failed")
     raise
 # --- Gemini ---
     if not GEMINI_API_KEY:
         raise ValueError("The 'Gemini' environment variable is not set.")
     client = genai.Client(api_key=GEMINI_API_KEY)
+    log.info("Gemini client initialized.")
 except Exception as e:
+    log.exception("FATAL: Gemini init failed")
     raise
 # --- Models (exact names) ---
 LEADERBOARD_TOP_N = 50
 # --- Misc config ---
+GAME_SALT = os.environ.get("GAME_SALT", "dev-salt")
+ADMIN_KEY = os.environ.get("ADMIN_KEY")
 IA_USER_AGENT = os.environ.get("IA_USER_AGENT", "HiddenStrokeBot/1.0 (+https://reddit.com)")
 MIN_IA_POOL = int(os.environ.get("MIN_IA_POOL", "60"))
 DEFAULT_IA_QUERY = os.environ.get(
     '(collection:(metropolitanmuseum OR smithsonian OR getty OR artic) AND mediatype:image)'
 )
 ALLOW_DEV_BOOTSTRAP = os.environ.get("ALLOW_DEV_BOOTSTRAP", "0") == "1"
+ALLOW_DEV_DIAGNOSTICS = os.environ.get("ALLOW_DEV_DIAGNOSTICS", "0") == "1"
 # -----------------------------------------------------------------------------
 # 2) UTILS
     return hmac.new(GAME_SALT.encode(), s.encode(), hashlib.sha256).hexdigest()
 def upload_bytes_to_storage(data: bytes, path: str, content_type: str) -> str:
+    log.debug(f"Uploading to Storage: path={path}, content_type={content_type}, bytes={len(data)}")
     blob = bucket.blob(path)
     blob.upload_from_string(data, content_type=content_type)
     blob.make_public()
+    url = blob.public_url
+    log.debug(f"Uploaded: {url}")
+    return url
 def pil_from_inline_image_part(part) -> Image.Image:
     image_bytes = part.inline_data.data
     return "knowledge" if (case_seed % 2 == 0) else "observation"
 def http_get_json(url: str, params: dict = None) -> dict:
+    log.debug(f"HTTP GET JSON: {url} params={params}")
     headers = {"User-Agent": IA_USER_AGENT}
     r = requests.get(url, params=params, headers=headers, timeout=30)
+    log.debug(f"HTTP {r.status_code} for {r.url}")
     r.raise_for_status()
     return r.json()
 def http_get_bytes(url: str) -> bytes:
+    log.debug(f"HTTP GET BYTES: {url}")
     headers = {"User-Agent": IA_USER_AGENT}
     r = requests.get(url, headers=headers, timeout=60)
+    log.debug(f"HTTP {r.status_code} for {r.url} bytes={len(r.content)}")
     r.raise_for_status()
     return r.content
 def ia_advanced_search(query: str, rows: int, page: int) -> List[dict]:
     url = "https://archive.org/advancedsearch.php"
     params = {"q": query, "rows": rows, "page": page, "output": "json"}
+    try:
+        data = http_get_json(url, params=params)
+        docs = data.get("response", {}).get("docs", [])
+        log.info(f"IA search page={page} rows={rows} -> {len(docs)} docs")
+        return docs
+    except Exception:
+        log.exception("IA advanced search failed")
+        raise
 def ia_metadata(identifier: str) -> dict:
     url = f"https://archive.org/metadata/{identifier}"
+    try:
+        meta = http_get_json(url)
+        log.debug(f"Fetched metadata for {identifier}, files={len(meta.get('files', []) or [])}")
+        return meta
+    except Exception:
+        log.exception(f"IA metadata fetch failed for {identifier}")
+        raise
 def ia_best_image_from_metadata(meta: dict) -> Optional[dict]:
     files = meta.get("files", []) or []
         if any(x in fmt for x in ["jpeg", "jpg", "png", "tiff", "image"]):
             w = int(f.get("width") or 0)
             h = int(f.get("height") or 0)
+            px = w * h if (w and h) else int(f.get("size") or 0)
             if px > best_pixels:
                 best_pixels, best = px, f
+    if best:
+        log.debug(f"Best image: name={best.get('name')} fmt={best.get('format')} dims={best.get('width')}x{best.get('height')} size={best.get('size')}")
+    else:
+        log.warning("No suitable image file found in metadata")
     return best
 def ingest_ia_doc(doc: dict) -> Optional[dict]:
     identifier = doc.get("identifier")
     if not identifier:
         return None
+    log.info(f"Ingesting IA identifier={identifier}")
     meta = ia_metadata(identifier)
     best = ia_best_image_from_metadata(meta)
     if not best:
+        log.warning(f"Skipping {identifier}: no image file")
         return None
     md = meta.get("metadata", {}) or {}
         "source": "internet_archive"
     }
     ia_pool_ref().child(identifier).set(record)
+    log.info(f"Ingested {identifier} -> ia_pool (title='{title}')")
     return record
 def choose_ia_item_for_case(case_id: str) -> Optional[dict]:
     pool = ia_pool_ref().get() or {}
     if not pool:
+        log.warning("choose_ia_item_for_case: pool is empty")
         return None
     identifiers = sorted(pool.keys())
     case_seed = seed_for_date(case_id)
     ident = identifiers[case_seed % len(identifiers)]
+    log.info(f"Chosen IA item for case {case_id}: {ident}")
     return pool[ident]
 def download_image_to_pil(url: str) -> Image.Image:
     data = http_get_bytes(url)
+    img = Image.open(io.BytesIO(data)).convert("RGB")
+    log.debug(f"Opened image from {url} size={img.size}")
+    return img
 def crop_signature_macro(img: Image.Image, size: int = 512) -> Image.Image:
     w, h = img.size
     cw = min(size, w)
     ch = min(size, h)
     left = max(0, w - cw)
     top = max(0, h - ch)
+    log.debug(f"Signature crop from ({left},{top}) to ({left+cw},{top+ch})")
     return img.crop((left, top, left + cw, top + ch))
 # -----------------------------------------------------------------------------
     else:
         new_h = max_dim
         new_w = int(w * (max_dim / h))
+    log.debug(f"Resizing image from {w}x{h} to {new_w}x{new_h}")
     return img.resize((new_w, new_h), Image.LANCZOS)
 def cache_single_ia_identifier(
     jpeg_quality: int = 90,
     skip_if_restricted: bool = True,
 ) -> dict:
     rec_ref = ia_pool_ref().child(identifier)
     rec = rec_ref.get() or {}
     if not rec:
     rights = (rec.get("rights") or "").lower()
     if skip_if_restricted and ("in copyright" in rights or "all rights reserved" in rights):
+        log.info(f"Skipping {identifier}: restricted rights")
         return {"identifier": identifier, "stored": False, "reason": "restricted_rights"}
     if rec.get("storage_url") and not overwrite:
+        log.info(f"Skipping {identifier}: already cached")
         return {"identifier": identifier, "stored": False, "reason": "already_cached", "storage_url": rec["storage_url"]}
     source_url = rec.get("storage_url") or rec.get("download_url")
     if not source_url:
+        log.warning(f"{identifier}: missing source_url")
         return {"identifier": identifier, "stored": False, "reason": "missing_source_url"}
     try:
+        log.info(f"Caching {identifier} from {source_url}")
         img = download_image_to_pil(source_url)
     except Exception as e:
+        if rec.get("download_url") and source_url != rec.get("download_url"):
             try:
+                log.warning(f"Retrying {identifier} from IA download_url")
                 img = download_image_to_pil(rec["download_url"])
             except Exception as e2:
+                log.exception(f"{identifier}: download failed")
                 return {"identifier": identifier, "stored": False, "reason": f"download_failed: {e2}"}
         else:
+            log.exception(f"{identifier}: download failed")
             return {"identifier": identifier, "stored": False, "reason": f"download_failed: {e}"}
     img = _resize_if_needed(img, max_dim=max_dim)
         "cached_at": datetime.now(timezone.utc).isoformat()
     }
     rec_ref.update(rec_update)
+    log.info(f"Cached {identifier} -> {storage_url}")
     return {
         "identifier": identifier,
     jpeg_quality: int = 90,
     skip_if_restricted: bool = True,
 ) -> dict:
     pool = ia_pool_ref().get() or {}
+    log.info(f"batch_cache_ia_pool: pool_size={len(pool)}")
     if not pool:
         return {"ok": True, "processed": 0, "stored": 0, "skipped": 0, "results": []}
             w = int(rec.get("width") or 0)
             h = int(rec.get("height") or 0)
             if (w and h) and (w < min_width or h < min_height):
+                log.debug(f"Skip {ident}: too small {w}x{h}")
                 continue
             candidates.append(ident)
     if randomize:
         random.shuffle(candidates)
     candidates = candidates[:max(0, limit)]
+    log.info(f"Caching candidates: {len(candidates)} (limit={limit})")
     results, stored, skipped = [], 0, 0
     for ident in candidates:
         else:
             skipped += 1
+    log.info(f"batch_cache_ia_pool done: processed={len(candidates)} stored={stored} skipped={skipped}")
     return {"ok": True, "processed": len(candidates), "stored": stored, "skipped": skipped, "results": results}
 def ensure_minimum_ia_pool(min_items: int = MIN_IA_POOL, rows: int = 100, max_pages: int = 5) -> dict:
     pool = ia_pool_ref().get() or {}
     have = len(pool)
     added = 0
     cached = 0
+    log.info(f"ensure_minimum_ia_pool: have={have}, target={min_items}")
     if have < min_items:
         page = 1
         while have + added < min_items and page <= max_pages:
             try:
                 docs = ia_advanced_search(DEFAULT_IA_QUERY, rows=rows, page=page)
+            except Exception:
+                log.warning(f"IA search failed on page {page}, stopping ingest loop")
                 break
             if not docs:
+                log.warning("IA search returned 0 docs; stopping")
                 break
             for d in docs:
                 ident = d.get("identifier")
                     if rec:
                         added += 1
                 except Exception:
+                    log.exception(f"Failed to ingest {ident}")
                     continue
+                if have + added >= min_items:
+                    break
             page += 1
     # Cache up to min_items
     pool = ia_pool_ref().get() or {}
     have_now = len(pool)
     need_cache = max(0, min_items - have_now)
+    log.info(f"ensure_minimum_ia_pool: post-ingest have={have_now}, need_cache={need_cache}")
     if need_cache:
         res = batch_cache_ia_pool(limit=need_cache, randomize=True)
         cached = res.get("stored", 0)
     final_size = len(ia_pool_ref().get() or {})
+    stats = {"ok": True, "had": have, "added": added, "cached": cached, "final_size": final_size}
+    log.info(f"ensure_minimum_ia_pool: stats={stats}")
+    return stats
 # -----------------------------------------------------------------------------
 # 4) CASE GENERATION (uses IA for authentic image, Gemini for forgeries/meta)
 def ensure_case_generated(case_id: str) -> Dict[str, Any]:
     existing_public = case_ref(case_id).child("public").get()
     if existing_public:
+        log.info(f"Case {case_id} already exists")
         return existing_public
     # Ensure we have a cached pool ready
     try:
         stats = ensure_minimum_ia_pool()
+        log.debug(f"Bootstrap stats for case {case_id}: {stats}")
+    except Exception:
+        log.exception("Bootstrap failed inside ensure_case_generated")
     # Pick authentic from ia_pool deterministically
     ia_item = choose_ia_item_for_case(case_id)
     # Deterministic mode
     case_seed = seed_for_date(case_id)
     mode = "knowledge" if (case_seed % 2 == 0) else "observation"
+    log.info(f"Case {case_id}: mode={mode}")
     style_period = "sourced from Internet Archive; museum catalog reproduction"
     # Load authentic image (prefer cached)
     source_url = ia_item.get("storage_url") or ia_item["download_url"]
+    log.info(f"Case {case_id}: authentic source={source_url}")
     auth_img = download_image_to_pil(source_url)
     images_urls: List[str] = []
     signature_crops: List[str] = []
     # Save authentic as image #1
+    url1 = save_image_return_url(auth_img, f"hidden_stroke/{case_id}/images/img_1.jpg")
+    images_urls.append(url1)
+    log.debug(f"Case {case_id}: saved authentic -> {url1}")
     # Macro crop for signature area
     crop1 = crop_signature_macro(auth_img, 512)
+    crop1_url = save_image_return_url(crop1, f"hidden_stroke/{case_id}/signature_crops/crop_1.jpg", quality=88)
+    signature_crops.append(crop1_url)
+    log.debug(f"Case {case_id}: saved authentic crop -> {crop1_url}")
     if mode == "knowledge":
         for idx in [2, 3]:
+            images_urls.append(images_urls[0])
             signature_crops.append(signature_crops[0])
     else:
         for i in range(2):
             forg_prompt = """
 Create a near-identical variant of the provided painting.
 Only introduce a subtle change in signature micro-geometry (baseline alignment, stroke overlap order, or curve spacing).
 No annotations. Differences must be visible only at macro zoom.
 """
+            log.info(f"Case {case_id}: generating forgery {i+1}")
             resp = client.models.generate_content(
                 model=GENERATION_MODEL,
                 contents=[forg_prompt, auth_img],
                     f_img = pil_from_inline_image_part(p)
                     break
             if f_img is None:
+                log.warning("Gemini returned no image; falling back to copy of authentic")
                 f_img = auth_img.copy()
             url = save_image_return_url(f_img, f"hidden_stroke/{case_id}/images/img_{i+2}.jpg")
             crop = crop_signature_macro(f_img, 512)
             c_url = save_image_return_url(crop, f"hidden_stroke/{case_id}/signature_crops/crop_{i+2}.jpg", quality=88)
             signature_crops.append(c_url)
+            log.debug(f"Case {case_id}: forgery saved -> {url}; crop -> {c_url}")
+    # === Gemini: Case brief + metadata + ledger + solution ===
     title = ia_item.get("title") or "Untitled"
     creator = ia_item.get("creator") or ""
     date = ia_item.get("date") or ""
     rights = ia_item.get("rights") or ""
     licenseurl = ia_item.get("licenseurl") or ""
+    log.info(f"Case {case_id}: prompting metadata with title='{title}' creator='{creator}' date='{date}'")
     meta_prompt = f"""
 You are generating a daily case for a noir art investigation game.
      "explanation": "A few sentences that justify the authentic pick without listing spoilers."
   }}
 }}
 """
     meta_resp = client.models.generate_content(
         model=CATEGORY_MODEL,
         contents=[meta_prompt]
     )
     raw_text = meta_resp.text.strip()
+    log.debug(f"Case {case_id}: raw meta JSON text len={len(raw_text)}")
     try:
         meta_json = json.loads(raw_text)
     except Exception:
     flags_metadata = solution.get("flags_metadata", [])
     flags_financial = solution.get("flags_financial", [])
     explanation = solution.get("explanation", "The authentic work aligns with period-accurate details; the others contain subtle contradictions.")
+    log.info(f"Case {case_id}: answer_index={answer_index}, meta_count={len(metadata)}")
     if len(metadata) != 3:
+        log.error("Gemini did not return exactly 3 metadata bundles")
         raise RuntimeError("Expected exactly 3 metadata bundles.")
     public = {
     cref = case_ref(case_id)
     cref.child("public").set(public)
     cref.child("solution").set(solution_doc)
+    log.info(f"Case {case_id}: generated and stored")
     return public
 # -----------------------------------------------------------------------------
         "status": "active"
     }
     sessions_ref().child(session_id).set(session_doc)
+    log.info(f"New session {session_id} for user={username} case={case_id}")
     return session_doc
 def get_session(session_id: str) -> Dict[str, Any]:
     action["ts"] = datetime.now(timezone.utc).isoformat()
     sessions_ref().child(session["session_id"]).child("ip_remaining").set(new_ip)
     sessions_ref().child(session["session_id"]).child("actions").push(action)
+    log.debug(f"Spend IP: {cost} -> remaining={new_ip}")
     return session, {}
 def score_result(correct: bool, session: Dict[str, Any]) -> Dict[str, Any]:
     rows = int(body.get("rows") or 100)
     ingested = 0
     errors = 0
+    log.info(f"Manual ingest: query='{query}' pages={pages} rows={rows}")
     for page in range(1, pages + 1):
         try:
                     ingested += 1
             except Exception:
                 errors += 1
+                log.exception(f"Manual ingest failed for {ident}")
                 continue
     pool_size = len(ia_pool_ref().get() or {})
     if not ADMIN_KEY or request.headers.get("X-Admin-Key") != ADMIN_KEY:
         return jsonify({"error": "Forbidden"}), 403
     pool = ia_pool_ref().get() or {}
+    cached = sum(1 for r in pool.values() if r.get("storage_url"))
+    return jsonify({"pool_size": len(pool), "cached": cached})
 # --- Admin: pre-generate today's case (manual) ---
 @app.route("/admin/generate-today", methods=["POST"])
         stats = ensure_minimum_ia_pool(min_items=min_items, rows=rows, max_pages=max_pages)
         return jsonify({"ok": True, "stats": stats})
     except Exception as e:
+        log.exception("bootstrap-now failed")
         return jsonify({"ok": False, "error": str(e)}), 500
+# --- DEV-ONLY: diagnostics (network + firebase sanity) ---
+@app.route("/admin/diagnostics", methods=["GET"])
+def diagnostics():
+    if not ALLOW_DEV_DIAGNOSTICS:
+        return jsonify({"error": "Disabled. Set ALLOW_DEV_DIAGNOSTICS=1 to enable."}), 403
+    info = {
+        "bucket": bucket.name,
+        "db_url": db_root.path,
+        "log_level": LOG_LEVEL,
+        "ia_query": DEFAULT_IA_QUERY,
+    }
+    diag = {"info": info, "ia": {}, "firebase": {}}
+    try:
+        docs = ia_advanced_search(DEFAULT_IA_QUERY, rows=3, page=1)
+        diag["ia"]["search_docs"] = [d.get("identifier") for d in docs]
+        if docs:
+            ident = docs[0].get("identifier")
+            meta = ia_metadata(ident)
+            best = ia_best_image_from_metadata(meta)
+            diag["ia"]["sample_identifier"] = ident
+            diag["ia"]["best_file"] = (best or {}).get("name")
+    except Exception as e:
+        diag["ia"]["error"] = str(e)
+    # Try a tiny upload
+    try:
+        tiny = upload_bytes_to_storage(b"ping", f"diag/ping_{uuid.uuid4().hex}.txt", "text/plain")
+        diag["firebase"]["upload_test"] = tiny
+    except Exception as e:
+        diag["firebase"]["error"] = str(e)
+    return jsonify(diag)
 # --- Player flow ---
 @app.route("/cases/today/start", methods=["POST"])
 def start_case():
 # 7) MAIN
 # -----------------------------------------------------------------------------
 if __name__ == "__main__":
     if os.environ.get("BOOTSTRAP_IA", "1") == "1":
+        log.info("Bootstrapping Internet Archive pool...")
         try:
             stats = ensure_minimum_ia_pool()
+            log.info(f"Bootstrap complete: {stats}")
+        except Exception:
+            log.exception("Bootstrap failed")
     app.run(host="0.0.0.0", port=int(os.environ.get("PORT", "7860")), debug=True)