rairo commited on
Commit
3bafcbc
·
verified ·
1 Parent(s): e96bd6d

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +274 -67
main.py CHANGED
@@ -1,8 +1,8 @@
1
  # app.py — Hidden Stroke (AI Noir Investigation) with Internet Archive ingestion
2
  # Flask + Firebase Realtime DB + Firebase Storage + Gemini (exact model names)
3
- # Runs on Hugging Face like your reference app (same envs & init flow)
4
 
5
- import os, io, uuid, json, time, hmac, hashlib, random, traceback, requests
6
  from datetime import datetime, timedelta, timezone
7
  from typing import Dict, Any, Tuple, List, Optional
8
 
@@ -19,7 +19,7 @@ from google import genai
19
  from google.genai import types
20
 
21
  # -----------------------------------------------------------------------------
22
- # 1) CONFIG & INIT (env names EXACTLY as your reference code)
23
  # -----------------------------------------------------------------------------
24
  app = Flask(__name__)
25
  CORS(app)
@@ -71,8 +71,14 @@ LEADERBOARD_TOP_N = 50
71
 
72
  # --- Misc config ---
73
  GAME_SALT = os.environ.get("GAME_SALT", "dev-salt") # for deterministic seeds / HMAC
74
- ADMIN_KEY = os.environ.get("ADMIN_KEY") # optional for admin endpoints
75
- IA_USER_AGENT = os.environ.get("IA_USER_AGENT", "HiddenStrokeBot/1.0 (+https://reddit.com)") # polite UA
 
 
 
 
 
 
76
 
77
  # -----------------------------------------------------------------------------
78
  # 2) UTILS
@@ -142,7 +148,6 @@ def http_get_bytes(url: str) -> bytes:
142
 
143
  def ia_advanced_search(query: str, rows: int, page: int) -> List[dict]:
144
  # Internet Archive Advanced Search (no key required)
145
- # docs: /advancedsearch.php?q=...&rows=...&page=...&output=json
146
  url = "https://archive.org/advancedsearch.php"
147
  params = {"q": query, "rows": rows, "page": page, "output": "json"}
148
  data = http_get_json(url, params=params)
@@ -153,30 +158,23 @@ def ia_metadata(identifier: str) -> dict:
153
  return http_get_json(url)
154
 
155
  def ia_best_image_from_metadata(meta: dict) -> Optional[dict]:
156
- # Pick the largest suitable image file from /metadata result
157
  files = meta.get("files", []) or []
158
- best = None
159
- best_pixels = -1
160
  for f in files:
161
- name = f.get("name", "")
162
  fmt = (f.get("format") or "").lower()
163
  if any(x in fmt for x in ["jpeg", "jpg", "png", "tiff", "image"]):
164
- # width/height sometimes present
165
  w = int(f.get("width") or 0)
166
  h = int(f.get("height") or 0)
167
  if w and h:
168
  px = w * h
169
  else:
170
- px = int(f.get("size") or 0) # fallback by bytes
171
  if px > best_pixels:
172
- best_pixels = px
173
- best = f
174
  return best
175
 
176
  def ingest_ia_doc(doc: dict) -> Optional[dict]:
177
- """
178
- Given a doc from advancedsearch, fetch /metadata and store best image into ia_pool.
179
- """
180
  identifier = doc.get("identifier")
181
  if not identifier:
182
  return None
@@ -185,11 +183,12 @@ def ingest_ia_doc(doc: dict) -> Optional[dict]:
185
  if not best:
186
  return None
187
 
188
- title = (meta.get("metadata", {}) or {}).get("title", "") or doc.get("title", "")
189
- date = (meta.get("metadata", {}) or {}).get("date", "") or doc.get("date", "")
190
- creator = (meta.get("metadata", {}) or {}).get("creator", "") or doc.get("creator", "")
191
- rights = (meta.get("metadata", {}) or {}).get("rights", "") or doc.get("rights", "")
192
- licenseurl = (meta.get("metadata", {}) or {}).get("licenseurl", "") or doc.get("licenseurl", "")
 
193
 
194
  download_url = f"https://archive.org/download/{identifier}/{best['name']}"
195
  record = {
@@ -216,17 +215,15 @@ def choose_ia_item_for_case(case_id: str) -> Optional[dict]:
216
  return None
217
  identifiers = sorted(pool.keys())
218
  case_seed = seed_for_date(case_id)
219
- idx = case_seed % len(identifiers)
220
- ident = identifiers[idx]
221
  return pool[ident]
222
 
223
  def download_image_to_pil(url: str) -> Image.Image:
224
  data = http_get_bytes(url)
225
- img = Image.open(io.BytesIO(data)).convert("RGB")
226
- return img
227
 
228
  def crop_signature_macro(img: Image.Image, size: int = 512) -> Image.Image:
229
- # simple lower-right macro crop (if smaller, clamp)
230
  w, h = img.size
231
  cw = min(size, w)
232
  ch = min(size, h)
@@ -235,27 +232,209 @@ def crop_signature_macro(img: Image.Image, size: int = 512) -> Image.Image:
235
  return img.crop((left, top, left + cw, top + ch))
236
 
237
  # -----------------------------------------------------------------------------
238
- # 3) CASE GENERATION (now uses IA for the authentic image)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
239
  # -----------------------------------------------------------------------------
240
  def ensure_case_generated(case_id: str) -> Dict[str, Any]:
241
  existing_public = case_ref(case_id).child("public").get()
242
  if existing_public:
243
  return existing_public
244
 
245
- # Make sure we have a minimum pool of cached images ready
246
  try:
247
- ensure_minimum_ia_pool()
 
 
248
  except Exception as e:
249
  print("Bootstrap warning:", e)
250
 
251
-
252
- # Ensure we have at least some IA records; if not, auto-ingest a default set (one page)
253
  pool = ia_pool_ref().get() or {}
254
  if not pool:
255
  try:
256
- # Default query targets well-known museum collections with images
257
- default_query = '(collection:(metropolitanmuseum OR smithsonian OR getty OR artic) AND mediatype:image)'
258
- docs = ia_advanced_search(default_query, rows=100, page=1)
259
  for d in docs:
260
  try:
261
  ingest_ia_doc(d)
@@ -267,22 +446,18 @@ def ensure_case_generated(case_id: str) -> Dict[str, Any]:
267
  # Pick authentic from ia_pool deterministically
268
  ia_item = choose_ia_item_for_case(case_id)
269
  if not ia_item:
270
- # absolute fallback (rare)
271
  raise RuntimeError("No IA items available. Ingest needed.")
272
 
273
  # Deterministic mode
274
  case_seed = seed_for_date(case_id)
275
  rng = random.Random(case_seed)
276
  mode = "knowledge" if (case_seed % 2 == 0) else "observation"
277
-
278
- # Style label (flavor text only)
279
  style_period = "sourced from Internet Archive; museum catalog reproduction"
280
 
281
- # Download authentic image
282
  source_url = ia_item.get("storage_url") or ia_item["download_url"]
283
  auth_img = download_image_to_pil(source_url)
284
 
285
-
286
  images_urls: List[str] = []
287
  signature_crops: List[str] = []
288
 
@@ -299,10 +474,10 @@ def ensure_case_generated(case_id: str) -> Dict[str, Any]:
299
  if mode == "knowledge":
300
  # Use the same authentic visual for all three; differences come from metadata only
301
  for idx in [2, 3]:
302
- images_urls.append(images_urls[0]) # same URL OK (client treats as separate cards)
303
  signature_crops.append(signature_crops[0])
304
  else:
305
- # observation: generate 2 subtle variants (near-identical; tweak signature micro-geometry)
306
  for i in range(2):
307
  forg_prompt = """
308
  Create a near-identical variant of the provided painting.
@@ -330,7 +505,6 @@ No annotations. Differences must be visible only at macro zoom.
330
  signature_crops.append(c_url)
331
 
332
  # === Gemini: Case brief + 3 metadata bundles + ledger + solution ===
333
- # Feed IA title/creator/year so the authentic bundle aligns with reality.
334
  title = ia_item.get("title") or "Untitled"
335
  creator = ia_item.get("creator") or ""
336
  date = ia_item.get("date") or ""
@@ -353,7 +527,7 @@ TASK:
353
  1) Create a short, punchy "case_brief" (2–4 sentences) explaining why the artifact matters and why fraud is suspected — NO SPOILERS.
354
  2) Prepare THREE metadata bundles for images A,B,C with NEARLY IDENTICAL fields.
355
  Ensure exactly ONE bundle is AUTHENTIC and that it corresponds to the above authentic context.
356
- The other two are FORGERIES that are almost correct but contain subtle, reality-checkable anomalies.
357
  3) Provide a concise "ledger_summary" describing a believable ownership/payment trail.
358
  4) Provide the solution with: "answer_index" (0 for A, 1 for B, 2 for C) and detailed flags for signature/metadata/financial, plus an "explanation".
359
 
@@ -392,9 +566,11 @@ CONSTRAINTS:
392
  except Exception:
393
  cleaned = raw_text
394
  if "```" in raw_text:
395
- cleaned = raw_text.split("```")[1]
396
- if cleaned.lower().startswith("json"):
397
- cleaned = cleaned.split("\n", 1)[1]
 
 
398
  meta_json = json.loads(cleaned)
399
 
400
  case_brief = meta_json.get("case_brief", "A resurfaced portrait raises questions—its paper trail glitters a little too perfectly.")
@@ -417,7 +593,7 @@ CONSTRAINTS:
417
  "style_period": style_period,
418
  "images": images_urls,
419
  "signature_crops": signature_crops,
420
- "metadata": metadata, # sanitized (no answer)
421
  "ledger_summary": ledger_summary,
422
  "timer_seconds": TIMER_SECONDS,
423
  "initial_ip": INITIAL_IP,
@@ -445,7 +621,7 @@ CONSTRAINTS:
445
  return public
446
 
447
  # -----------------------------------------------------------------------------
448
- # 4) SESSIONS, TOOLS, GUESS, LEADERBOARD (same behavior as before)
449
  # -----------------------------------------------------------------------------
450
  def create_session(user_id: str, username: str, case_id: str) -> Dict[str, Any]:
451
  session_id = str(uuid.uuid4())
@@ -515,21 +691,20 @@ def upsert_leaderboard(case_id: str, user_id: str, username: str, score: int):
515
  leaderboard_ref(case_id).set(top)
516
 
517
  # -----------------------------------------------------------------------------
518
- # 5) ROUTES
519
  # -----------------------------------------------------------------------------
520
  @app.route("/health", methods=["GET"])
521
  def health():
522
  return jsonify({"ok": True, "time": datetime.now(timezone.utc).isoformat()})
523
 
524
- # --- Admin: Internet Archive ingestion ---
525
  @app.route("/admin/ingest-ia", methods=["POST"])
526
  def admin_ingest_ia():
527
  if not ADMIN_KEY or request.headers.get("X-Admin-Key") != ADMIN_KEY:
528
  return jsonify({"error": "Forbidden"}), 403
529
 
530
  body = request.get_json() or {}
531
- # Example default: a few reputable museum collections with images
532
- query = body.get("query") or '(collection:(metropolitanmuseum OR smithsonian OR getty OR artic) AND mediatype:image)'
533
  pages = int(body.get("pages") or 2)
534
  rows = int(body.get("rows") or 100)
535
  ingested = 0
@@ -538,14 +713,13 @@ def admin_ingest_ia():
538
  for page in range(1, pages + 1):
539
  try:
540
  docs = ia_advanced_search(query, rows=rows, page=page)
541
- except Exception as e:
542
  errors += 1
543
  continue
544
  for d in docs:
545
  ident = d.get("identifier")
546
  if not ident:
547
  continue
548
- # skip if already ingested
549
  if ia_pool_ref().child(ident).get():
550
  continue
551
  try:
@@ -559,6 +733,26 @@ def admin_ingest_ia():
559
  pool_size = len(ia_pool_ref().get() or {})
560
  return jsonify({"ok": True, "ingested": ingested, "errors": errors, "pool_size": pool_size})
561
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
562
  @app.route("/admin/ia-pool/stats", methods=["GET"])
563
  def ia_pool_stats():
564
  if not ADMIN_KEY or request.headers.get("X-Admin-Key") != ADMIN_KEY:
@@ -566,7 +760,7 @@ def ia_pool_stats():
566
  pool = ia_pool_ref().get() or {}
567
  return jsonify({"pool_size": len(pool)})
568
 
569
- # --- Admin: pre-generate today's case (optional) ---
570
  @app.route("/admin/generate-today", methods=["POST"])
571
  def admin_generate_today():
572
  if not ADMIN_KEY or request.headers.get("X-Admin-Key") != ADMIN_KEY:
@@ -575,6 +769,21 @@ def admin_generate_today():
575
  public = ensure_case_generated(case_id)
576
  return jsonify({"generated": True, "case_id": case_id, "mode": public.get("mode")})
577
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
578
  # --- Player flow ---
579
  @app.route("/cases/today/start", methods=["POST"])
580
  def start_case():
@@ -586,18 +795,14 @@ def start_case():
586
  existing = sessions_ref().order_by_child("user_id").equal_to(user_id).get()
587
  sess = None
588
  if existing:
589
- for sid, sdoc in existing.items():
590
  if sdoc.get("case_id") == case_id and sdoc.get("status") == "active":
591
  sess = sdoc
592
  break
593
  if not sess:
594
  sess = create_session(user_id, username, case_id)
595
 
596
- payload = {
597
- "session_id": sess["session_id"],
598
- "case": public
599
- }
600
- return jsonify(payload)
601
 
602
  @app.route("/cases/<case_id>/tool/signature", methods=["POST"])
603
  def tool_signature(case_id):
@@ -707,7 +912,7 @@ def submit_guess(case_id):
707
  def leaderboard_daily():
708
  case_id = utc_today_str()
709
  top = leaderboard_ref(case_id).get() or []
710
- user_id, username = extract_user_from_headers(request)
711
  me = plays_ref(case_id).child(user_id).get() or {}
712
  rank = None
713
  if top:
@@ -718,14 +923,16 @@ def leaderboard_daily():
718
  return jsonify({"case_id": case_id, "top": top, "me": {"score": me.get("score"), "rank": rank}})
719
 
720
  # -----------------------------------------------------------------------------
721
- # 6) MAIN
722
  # -----------------------------------------------------------------------------
723
  if __name__ == "__main__":
724
  # Optional: pre-warm pool on boot so you’re ready before first request
725
  if os.environ.get("BOOTSTRAP_IA", "1") == "1":
726
  print("Bootstrapping Internet Archive pool...")
727
- stats = ensure_minimum_ia_pool()
728
- print("Bootstrap complete:", stats)
 
 
 
729
 
730
  app.run(host="0.0.0.0", port=int(os.environ.get("PORT", "7860")), debug=True)
731
-
 
1
  # app.py — Hidden Stroke (AI Noir Investigation) with Internet Archive ingestion
2
  # Flask + Firebase Realtime DB + Firebase Storage + Gemini (exact model names)
3
+ # Runs on Hugging Face (envs: FIREBASE, Firebase_DB, Firebase_Storage, Gemini, optional GAME_SALT/ADMIN_KEY)
4
 
5
+ import os, io, uuid, json, hmac, hashlib, random, traceback, requests
6
  from datetime import datetime, timedelta, timezone
7
  from typing import Dict, Any, Tuple, List, Optional
8
 
 
19
  from google.genai import types
20
 
21
  # -----------------------------------------------------------------------------
22
+ # 1) CONFIG & INIT
23
  # -----------------------------------------------------------------------------
24
  app = Flask(__name__)
25
  CORS(app)
 
71
 
72
  # --- Misc config ---
73
  GAME_SALT = os.environ.get("GAME_SALT", "dev-salt") # for deterministic seeds / HMAC
74
+ ADMIN_KEY = os.environ.get("ADMIN_KEY") # optional for admin endpoints
75
+ IA_USER_AGENT = os.environ.get("IA_USER_AGENT", "HiddenStrokeBot/1.0 (+https://reddit.com)")
76
+ MIN_IA_POOL = int(os.environ.get("MIN_IA_POOL", "60"))
77
+ DEFAULT_IA_QUERY = os.environ.get(
78
+ "IA_QUERY",
79
+ '(collection:(metropolitanmuseum OR smithsonian OR getty OR artic) AND mediatype:image)'
80
+ )
81
+ ALLOW_DEV_BOOTSTRAP = os.environ.get("ALLOW_DEV_BOOTSTRAP", "0") == "1"
82
 
83
  # -----------------------------------------------------------------------------
84
  # 2) UTILS
 
148
 
149
  def ia_advanced_search(query: str, rows: int, page: int) -> List[dict]:
150
  # Internet Archive Advanced Search (no key required)
 
151
  url = "https://archive.org/advancedsearch.php"
152
  params = {"q": query, "rows": rows, "page": page, "output": "json"}
153
  data = http_get_json(url, params=params)
 
158
  return http_get_json(url)
159
 
160
  def ia_best_image_from_metadata(meta: dict) -> Optional[dict]:
 
161
  files = meta.get("files", []) or []
162
+ best, best_pixels = None, -1
 
163
  for f in files:
 
164
  fmt = (f.get("format") or "").lower()
165
  if any(x in fmt for x in ["jpeg", "jpg", "png", "tiff", "image"]):
 
166
  w = int(f.get("width") or 0)
167
  h = int(f.get("height") or 0)
168
  if w and h:
169
  px = w * h
170
  else:
171
+ px = int(f.get("size") or 0)
172
  if px > best_pixels:
173
+ best_pixels, best = px, f
 
174
  return best
175
 
176
  def ingest_ia_doc(doc: dict) -> Optional[dict]:
177
+ """Fetch /metadata and store best image entry into ia_pool."""
 
 
178
  identifier = doc.get("identifier")
179
  if not identifier:
180
  return None
 
183
  if not best:
184
  return None
185
 
186
+ md = meta.get("metadata", {}) or {}
187
+ title = md.get("title", "") or doc.get("title", "")
188
+ date = md.get("date", "") or doc.get("date", "")
189
+ creator = md.get("creator", "") or doc.get("creator", "")
190
+ rights = md.get("rights", "") or doc.get("rights", "")
191
+ licenseurl = md.get("licenseurl", "") or doc.get("licenseurl", "")
192
 
193
  download_url = f"https://archive.org/download/{identifier}/{best['name']}"
194
  record = {
 
215
  return None
216
  identifiers = sorted(pool.keys())
217
  case_seed = seed_for_date(case_id)
218
+ ident = identifiers[case_seed % len(identifiers)]
 
219
  return pool[ident]
220
 
221
  def download_image_to_pil(url: str) -> Image.Image:
222
  data = http_get_bytes(url)
223
+ return Image.open(io.BytesIO(data)).convert("RGB")
 
224
 
225
  def crop_signature_macro(img: Image.Image, size: int = 512) -> Image.Image:
226
+ """Lower-right macro crop."""
227
  w, h = img.size
228
  cw = min(size, w)
229
  ch = min(size, h)
 
232
  return img.crop((left, top, left + cw, top + ch))
233
 
234
  # -----------------------------------------------------------------------------
235
+ # 3) IA -> Firebase Storage caching + Zero-admin bootstrap
236
+ # -----------------------------------------------------------------------------
237
+ def _resize_if_needed(img: Image.Image, max_dim: int = 4096) -> Image.Image:
238
+ w, h = img.size
239
+ if max(w, h) <= max_dim:
240
+ return img
241
+ if w >= h:
242
+ new_w = max_dim
243
+ new_h = int(h * (max_dim / w))
244
+ else:
245
+ new_h = max_dim
246
+ new_w = int(w * (max_dim / h))
247
+ return img.resize((new_w, new_h), Image.LANCZOS)
248
+
249
+ def cache_single_ia_identifier(
250
+ identifier: str,
251
+ overwrite: bool = False,
252
+ max_dim: int = 4096,
253
+ jpeg_quality: int = 90,
254
+ skip_if_restricted: bool = True,
255
+ ) -> dict:
256
+ """
257
+ Download one IA item from ia_pool, upload image + signature macro crop to Firebase Storage,
258
+ and update the ia_pool record with storage URLs & dimensions.
259
+ """
260
+ rec_ref = ia_pool_ref().child(identifier)
261
+ rec = rec_ref.get() or {}
262
+ if not rec:
263
+ return {"identifier": identifier, "stored": False, "reason": "not_in_pool"}
264
+
265
+ rights = (rec.get("rights") or "").lower()
266
+ if skip_if_restricted and ("in copyright" in rights or "all rights reserved" in rights):
267
+ return {"identifier": identifier, "stored": False, "reason": "restricted_rights"}
268
+
269
+ if rec.get("storage_url") and not overwrite:
270
+ return {"identifier": identifier, "stored": False, "reason": "already_cached", "storage_url": rec["storage_url"]}
271
+
272
+ # Prefer existing cached URL as source; fall back to IA
273
+ source_url = rec.get("storage_url") or rec.get("download_url")
274
+ if not source_url:
275
+ return {"identifier": identifier, "stored": False, "reason": "missing_source_url"}
276
+
277
+ try:
278
+ img = download_image_to_pil(source_url)
279
+ except Exception as e:
280
+ if rec.get("download_url"):
281
+ try:
282
+ img = download_image_to_pil(rec["download_url"])
283
+ except Exception as e2:
284
+ return {"identifier": identifier, "stored": False, "reason": f"download_failed: {e2}"}
285
+ else:
286
+ return {"identifier": identifier, "stored": False, "reason": f"download_failed: {e}"}
287
+
288
+ img = _resize_if_needed(img, max_dim=max_dim)
289
+ w, h = img.size
290
+
291
+ # Upload original
292
+ img_bytes = io.BytesIO()
293
+ img.save(img_bytes, format="JPEG", quality=jpeg_quality, optimize=True)
294
+ img_bytes.seek(0)
295
+ img_path = f"ia_cache/{identifier}/original.jpg"
296
+ storage_url = upload_bytes_to_storage(img_bytes.getvalue(), img_path, "image/jpeg")
297
+
298
+ # Upload macro crop
299
+ crop = crop_signature_macro(img, 512)
300
+ crop_bytes = io.BytesIO()
301
+ crop.save(crop_bytes, format="JPEG", quality=jpeg_quality, optimize=True)
302
+ crop_bytes.seek(0)
303
+ crop_path = f"ia_cache/{identifier}/signature_crop.jpg"
304
+ signature_crop_url = upload_bytes_to_storage(crop_bytes.getvalue(), crop_path, "image/jpeg")
305
+
306
+ rec_update = {
307
+ "storage_url": storage_url,
308
+ "signature_crop_url": signature_crop_url,
309
+ "image_path": img_path,
310
+ "crop_path": crop_path,
311
+ "width": w,
312
+ "height": h,
313
+ "cached_at": datetime.now(timezone.utc).isoformat()
314
+ }
315
+ rec_ref.update(rec_update)
316
+
317
+ return {
318
+ "identifier": identifier,
319
+ "stored": True,
320
+ "storage_url": storage_url,
321
+ "signature_crop_url": signature_crop_url,
322
+ "width": w,
323
+ "height": h
324
+ }
325
+
326
+ def batch_cache_ia_pool(
327
+ limit: int = 100,
328
+ overwrite: bool = False,
329
+ randomize: bool = True,
330
+ min_width: int = 800,
331
+ min_height: int = 800,
332
+ max_dim: int = 4096,
333
+ jpeg_quality: int = 90,
334
+ skip_if_restricted: bool = True,
335
+ ) -> dict:
336
+ """Cache up to `limit` uncached IA items into Firebase Storage."""
337
+ pool = ia_pool_ref().get() or {}
338
+ if not pool:
339
+ return {"ok": True, "processed": 0, "stored": 0, "skipped": 0, "results": []}
340
+
341
+ candidates = []
342
+ for ident, rec in pool.items():
343
+ if overwrite or not rec.get("storage_url"):
344
+ w = int(rec.get("width") or 0)
345
+ h = int(rec.get("height") or 0)
346
+ if (w and h) and (w < min_width or h < min_height):
347
+ continue
348
+ candidates.append(ident)
349
+
350
+ if randomize:
351
+ random.shuffle(candidates)
352
+ candidates = candidates[:max(0, limit)]
353
+
354
+ results, stored, skipped = [], 0, 0
355
+ for ident in candidates:
356
+ res = cache_single_ia_identifier(
357
+ ident,
358
+ overwrite=overwrite,
359
+ max_dim=max_dim,
360
+ jpeg_quality=jpeg_quality,
361
+ skip_if_restricted=skip_if_restricted,
362
+ )
363
+ results.append(res)
364
+ if res.get("stored"):
365
+ stored += 1
366
+ else:
367
+ skipped += 1
368
+
369
+ return {"ok": True, "processed": len(candidates), "stored": stored, "skipped": skipped, "results": results}
370
+
371
+ def ensure_minimum_ia_pool(min_items: int = MIN_IA_POOL, rows: int = 100, max_pages: int = 5) -> dict:
372
+ """
373
+ Zero-admin bootstrap:
374
+ - If ia_pool has fewer than `min_items`, pull from IA Advanced Search and ingest.
375
+ - Then cache enough images to reach `min_items`.
376
+ """
377
+ pool = ia_pool_ref().get() or {}
378
+ have = len(pool)
379
+ added = 0
380
+ cached = 0
381
+
382
+ if have < min_items:
383
+ page = 1
384
+ while have + added < min_items and page <= max_pages:
385
+ try:
386
+ docs = ia_advanced_search(DEFAULT_IA_QUERY, rows=rows, page=page)
387
+ except Exception as e:
388
+ print("IA search failed on page", page, e)
389
+ break
390
+ if not docs:
391
+ break
392
+ for d in docs:
393
+ ident = d.get("identifier")
394
+ if not ident:
395
+ continue
396
+ if ia_pool_ref().child(ident).get():
397
+ continue
398
+ try:
399
+ rec = ingest_ia_doc(d)
400
+ if rec:
401
+ added += 1
402
+ except Exception:
403
+ continue
404
+ page += 1
405
+
406
+ # Cache up to min_items
407
+ pool = ia_pool_ref().get() or {}
408
+ have_now = len(pool)
409
+ need_cache = max(0, min_items - have_now)
410
+ if need_cache:
411
+ res = batch_cache_ia_pool(limit=need_cache, randomize=True)
412
+ cached = res.get("stored", 0)
413
+
414
+ final_size = len(ia_pool_ref().get() or {})
415
+ return {"ok": True, "had": have, "added": added, "cached": cached, "final_size": final_size}
416
+
417
+ # -----------------------------------------------------------------------------
418
+ # 4) CASE GENERATION (uses IA for authentic image, Gemini for forgeries/meta)
419
  # -----------------------------------------------------------------------------
420
  def ensure_case_generated(case_id: str) -> Dict[str, Any]:
421
  existing_public = case_ref(case_id).child("public").get()
422
  if existing_public:
423
  return existing_public
424
 
425
+ # Ensure we have a cached pool ready
426
  try:
427
+ stats = ensure_minimum_ia_pool()
428
+ if stats.get("added") or stats.get("cached"):
429
+ print("Bootstrap:", stats)
430
  except Exception as e:
431
  print("Bootstrap warning:", e)
432
 
433
+ # Fallback ingest if pool is empty
 
434
  pool = ia_pool_ref().get() or {}
435
  if not pool:
436
  try:
437
+ docs = ia_advanced_search(DEFAULT_IA_QUERY, rows=100, page=1)
 
 
438
  for d in docs:
439
  try:
440
  ingest_ia_doc(d)
 
446
  # Pick authentic from ia_pool deterministically
447
  ia_item = choose_ia_item_for_case(case_id)
448
  if not ia_item:
 
449
  raise RuntimeError("No IA items available. Ingest needed.")
450
 
451
  # Deterministic mode
452
  case_seed = seed_for_date(case_id)
453
  rng = random.Random(case_seed)
454
  mode = "knowledge" if (case_seed % 2 == 0) else "observation"
 
 
455
  style_period = "sourced from Internet Archive; museum catalog reproduction"
456
 
457
+ # Load authentic image (prefer cached)
458
  source_url = ia_item.get("storage_url") or ia_item["download_url"]
459
  auth_img = download_image_to_pil(source_url)
460
 
 
461
  images_urls: List[str] = []
462
  signature_crops: List[str] = []
463
 
 
474
  if mode == "knowledge":
475
  # Use the same authentic visual for all three; differences come from metadata only
476
  for idx in [2, 3]:
477
+ images_urls.append(images_urls[0]) # same URL is ok
478
  signature_crops.append(signature_crops[0])
479
  else:
480
+ # observation: two subtle variants (signature micro-geometry)
481
  for i in range(2):
482
  forg_prompt = """
483
  Create a near-identical variant of the provided painting.
 
505
  signature_crops.append(c_url)
506
 
507
  # === Gemini: Case brief + 3 metadata bundles + ledger + solution ===
 
508
  title = ia_item.get("title") or "Untitled"
509
  creator = ia_item.get("creator") or ""
510
  date = ia_item.get("date") or ""
 
527
  1) Create a short, punchy "case_brief" (2–4 sentences) explaining why the artifact matters and why fraud is suspected — NO SPOILERS.
528
  2) Prepare THREE metadata bundles for images A,B,C with NEARLY IDENTICAL fields.
529
  Ensure exactly ONE bundle is AUTHENTIC and that it corresponds to the above authentic context.
530
+ The other two are FORGERIES with subtle, reality-checkable anomalies.
531
  3) Provide a concise "ledger_summary" describing a believable ownership/payment trail.
532
  4) Provide the solution with: "answer_index" (0 for A, 1 for B, 2 for C) and detailed flags for signature/metadata/financial, plus an "explanation".
533
 
 
566
  except Exception:
567
  cleaned = raw_text
568
  if "```" in raw_text:
569
+ parts = raw_text.split("```")
570
+ if len(parts) >= 2:
571
+ cleaned = parts[1]
572
+ if cleaned.lower().startswith("json"):
573
+ cleaned = cleaned.split("\n", 1)[1]
574
  meta_json = json.loads(cleaned)
575
 
576
  case_brief = meta_json.get("case_brief", "A resurfaced portrait raises questions—its paper trail glitters a little too perfectly.")
 
593
  "style_period": style_period,
594
  "images": images_urls,
595
  "signature_crops": signature_crops,
596
+ "metadata": metadata,
597
  "ledger_summary": ledger_summary,
598
  "timer_seconds": TIMER_SECONDS,
599
  "initial_ip": INITIAL_IP,
 
621
  return public
622
 
623
  # -----------------------------------------------------------------------------
624
+ # 5) SESSIONS, TOOLS, GUESS, LEADERBOARD
625
  # -----------------------------------------------------------------------------
626
  def create_session(user_id: str, username: str, case_id: str) -> Dict[str, Any]:
627
  session_id = str(uuid.uuid4())
 
691
  leaderboard_ref(case_id).set(top)
692
 
693
  # -----------------------------------------------------------------------------
694
+ # 6) ROUTES
695
  # -----------------------------------------------------------------------------
696
  @app.route("/health", methods=["GET"])
697
  def health():
698
  return jsonify({"ok": True, "time": datetime.now(timezone.utc).isoformat()})
699
 
700
+ # --- Admin: Internet Archive ingestion (manual) ---
701
  @app.route("/admin/ingest-ia", methods=["POST"])
702
  def admin_ingest_ia():
703
  if not ADMIN_KEY or request.headers.get("X-Admin-Key") != ADMIN_KEY:
704
  return jsonify({"error": "Forbidden"}), 403
705
 
706
  body = request.get_json() or {}
707
+ query = body.get("query") or DEFAULT_IA_QUERY
 
708
  pages = int(body.get("pages") or 2)
709
  rows = int(body.get("rows") or 100)
710
  ingested = 0
 
713
  for page in range(1, pages + 1):
714
  try:
715
  docs = ia_advanced_search(query, rows=rows, page=page)
716
+ except Exception:
717
  errors += 1
718
  continue
719
  for d in docs:
720
  ident = d.get("identifier")
721
  if not ident:
722
  continue
 
723
  if ia_pool_ref().child(ident).get():
724
  continue
725
  try:
 
733
  pool_size = len(ia_pool_ref().get() or {})
734
  return jsonify({"ok": True, "ingested": ingested, "errors": errors, "pool_size": pool_size})
735
 
736
+ # --- Admin: Cache IA images to Firebase Storage (manual) ---
737
+ @app.route("/admin/cache-ia", methods=["POST"])
738
+ def admin_cache_ia():
739
+ if not ADMIN_KEY or request.headers.get("X-Admin-Key") != ADMIN_KEY:
740
+ return jsonify({"error": "Forbidden"}), 403
741
+
742
+ cfg = request.get_json() or {}
743
+ out = batch_cache_ia_pool(
744
+ limit=int(cfg.get("limit", 100)),
745
+ overwrite=bool(cfg.get("overwrite", False)),
746
+ randomize=bool(cfg.get("randomize", True)),
747
+ min_width=int(cfg.get("min_width", 800)),
748
+ min_height=int(cfg.get("min_height", 800)),
749
+ max_dim=int(cfg.get("max_dim", 4096)),
750
+ jpeg_quality=int(cfg.get("jpeg_quality", 90)),
751
+ skip_if_restricted=bool(cfg.get("skip_if_restricted", True)),
752
+ )
753
+ return jsonify(out)
754
+
755
+ # --- Admin: pool stats ---
756
  @app.route("/admin/ia-pool/stats", methods=["GET"])
757
  def ia_pool_stats():
758
  if not ADMIN_KEY or request.headers.get("X-Admin-Key") != ADMIN_KEY:
 
760
  pool = ia_pool_ref().get() or {}
761
  return jsonify({"pool_size": len(pool)})
762
 
763
+ # --- Admin: pre-generate today's case (manual) ---
764
  @app.route("/admin/generate-today", methods=["POST"])
765
  def admin_generate_today():
766
  if not ADMIN_KEY or request.headers.get("X-Admin-Key") != ADMIN_KEY:
 
769
  public = ensure_case_generated(case_id)
770
  return jsonify({"generated": True, "case_id": case_id, "mode": public.get("mode")})
771
 
772
+ # --- DEV-ONLY: panic button bootstrap (no auth; gated by env) ---
773
+ @app.route("/admin/bootstrap-now", methods=["POST"])
774
+ def admin_bootstrap_now():
775
+ if not ALLOW_DEV_BOOTSTRAP:
776
+ return jsonify({"error": "Disabled. Set ALLOW_DEV_BOOTSTRAP=1 to enable."}), 403
777
+ cfg = request.get_json() or {}
778
+ min_items = int(cfg.get("min_items", MIN_IA_POOL))
779
+ rows = int(cfg.get("rows", 100))
780
+ max_pages = int(cfg.get("max_pages", 5))
781
+ try:
782
+ stats = ensure_minimum_ia_pool(min_items=min_items, rows=rows, max_pages=max_pages)
783
+ return jsonify({"ok": True, "stats": stats})
784
+ except Exception as e:
785
+ return jsonify({"ok": False, "error": str(e)}), 500
786
+
787
  # --- Player flow ---
788
  @app.route("/cases/today/start", methods=["POST"])
789
  def start_case():
 
795
  existing = sessions_ref().order_by_child("user_id").equal_to(user_id).get()
796
  sess = None
797
  if existing:
798
+ for _, sdoc in existing.items():
799
  if sdoc.get("case_id") == case_id and sdoc.get("status") == "active":
800
  sess = sdoc
801
  break
802
  if not sess:
803
  sess = create_session(user_id, username, case_id)
804
 
805
+ return jsonify({"session_id": sess["session_id"], "case": public})
 
 
 
 
806
 
807
  @app.route("/cases/<case_id>/tool/signature", methods=["POST"])
808
  def tool_signature(case_id):
 
912
  def leaderboard_daily():
913
  case_id = utc_today_str()
914
  top = leaderboard_ref(case_id).get() or []
915
+ user_id, _ = extract_user_from_headers(request)
916
  me = plays_ref(case_id).child(user_id).get() or {}
917
  rank = None
918
  if top:
 
923
  return jsonify({"case_id": case_id, "top": top, "me": {"score": me.get("score"), "rank": rank}})
924
 
925
  # -----------------------------------------------------------------------------
926
+ # 7) MAIN
927
  # -----------------------------------------------------------------------------
928
  if __name__ == "__main__":
929
  # Optional: pre-warm pool on boot so you’re ready before first request
930
  if os.environ.get("BOOTSTRAP_IA", "1") == "1":
931
  print("Bootstrapping Internet Archive pool...")
932
+ try:
933
+ stats = ensure_minimum_ia_pool()
934
+ print("Bootstrap complete:", stats)
935
+ except Exception as e:
936
+ print("Bootstrap failed:", e)
937
 
938
  app.run(host="0.0.0.0", port=int(os.environ.get("PORT", "7860")), debug=True)