rairo commited on
Commit
6ebaf2b
·
verified ·
1 Parent(s): 3bafcbc

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +151 -76
main.py CHANGED
@@ -1,6 +1,8 @@
1
- # app.py — Hidden Stroke (AI Noir Investigation) with Internet Archive ingestion
2
- # Flask + Firebase Realtime DB + Firebase Storage + Gemini (exact model names)
3
- # Runs on Hugging Face (envs: FIREBASE, Firebase_DB, Firebase_Storage, Gemini, optional GAME_SALT/ADMIN_KEY)
 
 
4
 
5
  import os, io, uuid, json, hmac, hashlib, random, traceback, requests
6
  from datetime import datetime, timedelta, timezone
@@ -10,6 +12,15 @@ from flask import Flask, request, jsonify
10
  from flask_cors import CORS
11
  from PIL import Image
12
 
 
 
 
 
 
 
 
 
 
13
  # ---------------- Firebase Admin (Realtime DB + Storage) ----------------
14
  import firebase_admin
15
  from firebase_admin import credentials, db, storage
@@ -43,9 +54,9 @@ try:
43
  })
44
  bucket = storage.bucket()
45
  db_root = db.reference("/")
46
- print("Firebase Realtime DB + Storage initialized.")
47
  except Exception as e:
48
- print(f"FATAL: Firebase init failed: {e}")
49
  raise
50
 
51
  # --- Gemini ---
@@ -54,9 +65,9 @@ try:
54
  if not GEMINI_API_KEY:
55
  raise ValueError("The 'Gemini' environment variable is not set.")
56
  client = genai.Client(api_key=GEMINI_API_KEY)
57
- print("Gemini client initialized.")
58
  except Exception as e:
59
- print(f"FATAL: Gemini init failed: {e}")
60
  raise
61
 
62
  # --- Models (exact names) ---
@@ -70,8 +81,8 @@ TOOL_COSTS = {"signature": 1, "metadata": 1, "financial": 2}
70
  LEADERBOARD_TOP_N = 50
71
 
72
  # --- Misc config ---
73
- GAME_SALT = os.environ.get("GAME_SALT", "dev-salt") # for deterministic seeds / HMAC
74
- ADMIN_KEY = os.environ.get("ADMIN_KEY") # optional for admin endpoints
75
  IA_USER_AGENT = os.environ.get("IA_USER_AGENT", "HiddenStrokeBot/1.0 (+https://reddit.com)")
76
  MIN_IA_POOL = int(os.environ.get("MIN_IA_POOL", "60"))
77
  DEFAULT_IA_QUERY = os.environ.get(
@@ -79,6 +90,7 @@ DEFAULT_IA_QUERY = os.environ.get(
79
  '(collection:(metropolitanmuseum OR smithsonian OR getty OR artic) AND mediatype:image)'
80
  )
81
  ALLOW_DEV_BOOTSTRAP = os.environ.get("ALLOW_DEV_BOOTSTRAP", "0") == "1"
 
82
 
83
  # -----------------------------------------------------------------------------
84
  # 2) UTILS
@@ -105,10 +117,13 @@ def hmac_hex(s: str) -> str:
105
  return hmac.new(GAME_SALT.encode(), s.encode(), hashlib.sha256).hexdigest()
106
 
107
  def upload_bytes_to_storage(data: bytes, path: str, content_type: str) -> str:
 
108
  blob = bucket.blob(path)
109
  blob.upload_from_string(data, content_type=content_type)
110
  blob.make_public()
111
- return blob.public_url
 
 
112
 
113
  def pil_from_inline_image_part(part) -> Image.Image:
114
  image_bytes = part.inline_data.data
@@ -135,27 +150,42 @@ def fifty_fifty_mode(case_seed: int) -> str:
135
  return "knowledge" if (case_seed % 2 == 0) else "observation"
136
 
137
  def http_get_json(url: str, params: dict = None) -> dict:
 
138
  headers = {"User-Agent": IA_USER_AGENT}
139
  r = requests.get(url, params=params, headers=headers, timeout=30)
 
140
  r.raise_for_status()
141
  return r.json()
142
 
143
  def http_get_bytes(url: str) -> bytes:
 
144
  headers = {"User-Agent": IA_USER_AGENT}
145
  r = requests.get(url, headers=headers, timeout=60)
 
146
  r.raise_for_status()
147
  return r.content
148
 
149
  def ia_advanced_search(query: str, rows: int, page: int) -> List[dict]:
150
- # Internet Archive Advanced Search (no key required)
151
  url = "https://archive.org/advancedsearch.php"
152
  params = {"q": query, "rows": rows, "page": page, "output": "json"}
153
- data = http_get_json(url, params=params)
154
- return data.get("response", {}).get("docs", [])
 
 
 
 
 
 
155
 
156
  def ia_metadata(identifier: str) -> dict:
157
  url = f"https://archive.org/metadata/{identifier}"
158
- return http_get_json(url)
 
 
 
 
 
 
159
 
160
  def ia_best_image_from_metadata(meta: dict) -> Optional[dict]:
161
  files = meta.get("files", []) or []
@@ -165,12 +195,13 @@ def ia_best_image_from_metadata(meta: dict) -> Optional[dict]:
165
  if any(x in fmt for x in ["jpeg", "jpg", "png", "tiff", "image"]):
166
  w = int(f.get("width") or 0)
167
  h = int(f.get("height") or 0)
168
- if w and h:
169
- px = w * h
170
- else:
171
- px = int(f.get("size") or 0)
172
  if px > best_pixels:
173
  best_pixels, best = px, f
 
 
 
 
174
  return best
175
 
176
  def ingest_ia_doc(doc: dict) -> Optional[dict]:
@@ -178,9 +209,11 @@ def ingest_ia_doc(doc: dict) -> Optional[dict]:
178
  identifier = doc.get("identifier")
179
  if not identifier:
180
  return None
 
181
  meta = ia_metadata(identifier)
182
  best = ia_best_image_from_metadata(meta)
183
  if not best:
 
184
  return None
185
 
186
  md = meta.get("metadata", {}) or {}
@@ -207,28 +240,33 @@ def ingest_ia_doc(doc: dict) -> Optional[dict]:
207
  "source": "internet_archive"
208
  }
209
  ia_pool_ref().child(identifier).set(record)
 
210
  return record
211
 
212
  def choose_ia_item_for_case(case_id: str) -> Optional[dict]:
213
  pool = ia_pool_ref().get() or {}
214
  if not pool:
 
215
  return None
216
  identifiers = sorted(pool.keys())
217
  case_seed = seed_for_date(case_id)
218
  ident = identifiers[case_seed % len(identifiers)]
 
219
  return pool[ident]
220
 
221
  def download_image_to_pil(url: str) -> Image.Image:
222
  data = http_get_bytes(url)
223
- return Image.open(io.BytesIO(data)).convert("RGB")
 
 
224
 
225
  def crop_signature_macro(img: Image.Image, size: int = 512) -> Image.Image:
226
- """Lower-right macro crop."""
227
  w, h = img.size
228
  cw = min(size, w)
229
  ch = min(size, h)
230
  left = max(0, w - cw)
231
  top = max(0, h - ch)
 
232
  return img.crop((left, top, left + cw, top + ch))
233
 
234
  # -----------------------------------------------------------------------------
@@ -244,6 +282,7 @@ def _resize_if_needed(img: Image.Image, max_dim: int = 4096) -> Image.Image:
244
  else:
245
  new_h = max_dim
246
  new_w = int(w * (max_dim / h))
 
247
  return img.resize((new_w, new_h), Image.LANCZOS)
248
 
249
  def cache_single_ia_identifier(
@@ -253,10 +292,6 @@ def cache_single_ia_identifier(
253
  jpeg_quality: int = 90,
254
  skip_if_restricted: bool = True,
255
  ) -> dict:
256
- """
257
- Download one IA item from ia_pool, upload image + signature macro crop to Firebase Storage,
258
- and update the ia_pool record with storage URLs & dimensions.
259
- """
260
  rec_ref = ia_pool_ref().child(identifier)
261
  rec = rec_ref.get() or {}
262
  if not rec:
@@ -264,25 +299,31 @@ def cache_single_ia_identifier(
264
 
265
  rights = (rec.get("rights") or "").lower()
266
  if skip_if_restricted and ("in copyright" in rights or "all rights reserved" in rights):
 
267
  return {"identifier": identifier, "stored": False, "reason": "restricted_rights"}
268
 
269
  if rec.get("storage_url") and not overwrite:
 
270
  return {"identifier": identifier, "stored": False, "reason": "already_cached", "storage_url": rec["storage_url"]}
271
 
272
- # Prefer existing cached URL as source; fall back to IA
273
  source_url = rec.get("storage_url") or rec.get("download_url")
274
  if not source_url:
 
275
  return {"identifier": identifier, "stored": False, "reason": "missing_source_url"}
276
 
277
  try:
 
278
  img = download_image_to_pil(source_url)
279
  except Exception as e:
280
- if rec.get("download_url"):
281
  try:
 
282
  img = download_image_to_pil(rec["download_url"])
283
  except Exception as e2:
 
284
  return {"identifier": identifier, "stored": False, "reason": f"download_failed: {e2}"}
285
  else:
 
286
  return {"identifier": identifier, "stored": False, "reason": f"download_failed: {e}"}
287
 
288
  img = _resize_if_needed(img, max_dim=max_dim)
@@ -313,6 +354,7 @@ def cache_single_ia_identifier(
313
  "cached_at": datetime.now(timezone.utc).isoformat()
314
  }
315
  rec_ref.update(rec_update)
 
316
 
317
  return {
318
  "identifier": identifier,
@@ -333,8 +375,8 @@ def batch_cache_ia_pool(
333
  jpeg_quality: int = 90,
334
  skip_if_restricted: bool = True,
335
  ) -> dict:
336
- """Cache up to `limit` uncached IA items into Firebase Storage."""
337
  pool = ia_pool_ref().get() or {}
 
338
  if not pool:
339
  return {"ok": True, "processed": 0, "stored": 0, "skipped": 0, "results": []}
340
 
@@ -344,12 +386,14 @@ def batch_cache_ia_pool(
344
  w = int(rec.get("width") or 0)
345
  h = int(rec.get("height") or 0)
346
  if (w and h) and (w < min_width or h < min_height):
 
347
  continue
348
  candidates.append(ident)
349
 
350
  if randomize:
351
  random.shuffle(candidates)
352
  candidates = candidates[:max(0, limit)]
 
353
 
354
  results, stored, skipped = [], 0, 0
355
  for ident in candidates:
@@ -366,28 +410,26 @@ def batch_cache_ia_pool(
366
  else:
367
  skipped += 1
368
 
 
369
  return {"ok": True, "processed": len(candidates), "stored": stored, "skipped": skipped, "results": results}
370
 
371
  def ensure_minimum_ia_pool(min_items: int = MIN_IA_POOL, rows: int = 100, max_pages: int = 5) -> dict:
372
- """
373
- Zero-admin bootstrap:
374
- - If ia_pool has fewer than `min_items`, pull from IA Advanced Search and ingest.
375
- - Then cache enough images to reach `min_items`.
376
- """
377
  pool = ia_pool_ref().get() or {}
378
  have = len(pool)
379
  added = 0
380
  cached = 0
 
381
 
382
  if have < min_items:
383
  page = 1
384
  while have + added < min_items and page <= max_pages:
385
  try:
386
  docs = ia_advanced_search(DEFAULT_IA_QUERY, rows=rows, page=page)
387
- except Exception as e:
388
- print("IA search failed on page", page, e)
389
  break
390
  if not docs:
 
391
  break
392
  for d in docs:
393
  ident = d.get("identifier")
@@ -400,19 +442,25 @@ def ensure_minimum_ia_pool(min_items: int = MIN_IA_POOL, rows: int = 100, max_pa
400
  if rec:
401
  added += 1
402
  except Exception:
 
403
  continue
 
 
404
  page += 1
405
 
406
  # Cache up to min_items
407
  pool = ia_pool_ref().get() or {}
408
  have_now = len(pool)
409
  need_cache = max(0, min_items - have_now)
 
410
  if need_cache:
411
  res = batch_cache_ia_pool(limit=need_cache, randomize=True)
412
  cached = res.get("stored", 0)
413
 
414
  final_size = len(ia_pool_ref().get() or {})
415
- return {"ok": True, "had": have, "added": added, "cached": cached, "final_size": final_size}
 
 
416
 
417
  # -----------------------------------------------------------------------------
418
  # 4) CASE GENERATION (uses IA for authentic image, Gemini for forgeries/meta)
@@ -420,28 +468,15 @@ def ensure_minimum_ia_pool(min_items: int = MIN_IA_POOL, rows: int = 100, max_pa
420
  def ensure_case_generated(case_id: str) -> Dict[str, Any]:
421
  existing_public = case_ref(case_id).child("public").get()
422
  if existing_public:
 
423
  return existing_public
424
 
425
  # Ensure we have a cached pool ready
426
  try:
427
  stats = ensure_minimum_ia_pool()
428
- if stats.get("added") or stats.get("cached"):
429
- print("Bootstrap:", stats)
430
- except Exception as e:
431
- print("Bootstrap warning:", e)
432
-
433
- # Fallback ingest if pool is empty
434
- pool = ia_pool_ref().get() or {}
435
- if not pool:
436
- try:
437
- docs = ia_advanced_search(DEFAULT_IA_QUERY, rows=100, page=1)
438
- for d in docs:
439
- try:
440
- ingest_ia_doc(d)
441
- except Exception:
442
- continue
443
- except Exception as e:
444
- print("WARNING: IA default ingest failed:", e)
445
 
446
  # Pick authentic from ia_pool deterministically
447
  ia_item = choose_ia_item_for_case(case_id)
@@ -450,34 +485,35 @@ def ensure_case_generated(case_id: str) -> Dict[str, Any]:
450
 
451
  # Deterministic mode
452
  case_seed = seed_for_date(case_id)
453
- rng = random.Random(case_seed)
454
  mode = "knowledge" if (case_seed % 2 == 0) else "observation"
 
 
455
  style_period = "sourced from Internet Archive; museum catalog reproduction"
456
 
457
  # Load authentic image (prefer cached)
458
  source_url = ia_item.get("storage_url") or ia_item["download_url"]
 
459
  auth_img = download_image_to_pil(source_url)
460
 
461
  images_urls: List[str] = []
462
  signature_crops: List[str] = []
463
 
464
  # Save authentic as image #1
465
- images_urls.append(
466
- save_image_return_url(auth_img, f"hidden_stroke/{case_id}/images/img_1.jpg")
467
- )
 
468
  # Macro crop for signature area
469
  crop1 = crop_signature_macro(auth_img, 512)
470
- signature_crops.append(
471
- save_image_return_url(crop1, f"hidden_stroke/{case_id}/signature_crops/crop_1.jpg", quality=88)
472
- )
473
 
474
  if mode == "knowledge":
475
- # Use the same authentic visual for all three; differences come from metadata only
476
  for idx in [2, 3]:
477
- images_urls.append(images_urls[0]) # same URL is ok
478
  signature_crops.append(signature_crops[0])
479
  else:
480
- # observation: two subtle variants (signature micro-geometry)
481
  for i in range(2):
482
  forg_prompt = """
483
  Create a near-identical variant of the provided painting.
@@ -485,6 +521,7 @@ Keep composition, palette, and lighting the same.
485
  Only introduce a subtle change in signature micro-geometry (baseline alignment, stroke overlap order, or curve spacing).
486
  No annotations. Differences must be visible only at macro zoom.
487
  """
 
488
  resp = client.models.generate_content(
489
  model=GENERATION_MODEL,
490
  contents=[forg_prompt, auth_img],
@@ -496,6 +533,7 @@ No annotations. Differences must be visible only at macro zoom.
496
  f_img = pil_from_inline_image_part(p)
497
  break
498
  if f_img is None:
 
499
  f_img = auth_img.copy()
500
 
501
  url = save_image_return_url(f_img, f"hidden_stroke/{case_id}/images/img_{i+2}.jpg")
@@ -503,13 +541,15 @@ No annotations. Differences must be visible only at macro zoom.
503
  crop = crop_signature_macro(f_img, 512)
504
  c_url = save_image_return_url(crop, f"hidden_stroke/{case_id}/signature_crops/crop_{i+2}.jpg", quality=88)
505
  signature_crops.append(c_url)
 
506
 
507
- # === Gemini: Case brief + 3 metadata bundles + ledger + solution ===
508
  title = ia_item.get("title") or "Untitled"
509
  creator = ia_item.get("creator") or ""
510
  date = ia_item.get("date") or ""
511
  rights = ia_item.get("rights") or ""
512
  licenseurl = ia_item.get("licenseurl") or ""
 
513
 
514
  meta_prompt = f"""
515
  You are generating a daily case for a noir art investigation game.
@@ -548,19 +588,13 @@ OUTPUT STRICT JSON with this schema:
548
  "explanation": "A few sentences that justify the authentic pick without listing spoilers."
549
  }}
550
  }}
551
-
552
- CONSTRAINTS:
553
- - Keep all three bundles plausible and near-identical at a glance.
554
- - Anomalies must be subtle and testable (chemistry/ink era, currency introductions, institution timelines, accession formats, etc.).
555
- - If MODE=KNOWLEDGE, the tells should be discoverable via metadata/ledger alone.
556
- - If MODE=OBSERVATION, include at least one signature micro-geometry flag in "flags_signature".
557
- - The authentic bundle should be consistent with the AUTHENTIC CONTEXT.
558
  """
559
  meta_resp = client.models.generate_content(
560
  model=CATEGORY_MODEL,
561
  contents=[meta_prompt]
562
  )
563
  raw_text = meta_resp.text.strip()
 
564
  try:
565
  meta_json = json.loads(raw_text)
566
  except Exception:
@@ -582,8 +616,10 @@ CONSTRAINTS:
582
  flags_metadata = solution.get("flags_metadata", [])
583
  flags_financial = solution.get("flags_financial", [])
584
  explanation = solution.get("explanation", "The authentic work aligns with period-accurate details; the others contain subtle contradictions.")
 
585
 
586
  if len(metadata) != 3:
 
587
  raise RuntimeError("Expected exactly 3 metadata bundles.")
588
 
589
  public = {
@@ -618,6 +654,7 @@ CONSTRAINTS:
618
  cref = case_ref(case_id)
619
  cref.child("public").set(public)
620
  cref.child("solution").set(solution_doc)
 
621
  return public
622
 
623
  # -----------------------------------------------------------------------------
@@ -638,6 +675,7 @@ def create_session(user_id: str, username: str, case_id: str) -> Dict[str, Any]:
638
  "status": "active"
639
  }
640
  sessions_ref().child(session_id).set(session_doc)
 
641
  return session_doc
642
 
643
  def get_session(session_id: str) -> Dict[str, Any]:
@@ -666,6 +704,7 @@ def spend_ip(session: Dict[str, Any], cost: int, action: Dict[str, Any]) -> Tupl
666
  action["ts"] = datetime.now(timezone.utc).isoformat()
667
  sessions_ref().child(session["session_id"]).child("ip_remaining").set(new_ip)
668
  sessions_ref().child(session["session_id"]).child("actions").push(action)
 
669
  return session, {}
670
 
671
  def score_result(correct: bool, session: Dict[str, Any]) -> Dict[str, Any]:
@@ -709,6 +748,7 @@ def admin_ingest_ia():
709
  rows = int(body.get("rows") or 100)
710
  ingested = 0
711
  errors = 0
 
712
 
713
  for page in range(1, pages + 1):
714
  try:
@@ -728,6 +768,7 @@ def admin_ingest_ia():
728
  ingested += 1
729
  except Exception:
730
  errors += 1
 
731
  continue
732
 
733
  pool_size = len(ia_pool_ref().get() or {})
@@ -758,7 +799,8 @@ def ia_pool_stats():
758
  if not ADMIN_KEY or request.headers.get("X-Admin-Key") != ADMIN_KEY:
759
  return jsonify({"error": "Forbidden"}), 403
760
  pool = ia_pool_ref().get() or {}
761
- return jsonify({"pool_size": len(pool)})
 
762
 
763
  # --- Admin: pre-generate today's case (manual) ---
764
  @app.route("/admin/generate-today", methods=["POST"])
@@ -782,8 +824,42 @@ def admin_bootstrap_now():
782
  stats = ensure_minimum_ia_pool(min_items=min_items, rows=rows, max_pages=max_pages)
783
  return jsonify({"ok": True, "stats": stats})
784
  except Exception as e:
 
785
  return jsonify({"ok": False, "error": str(e)}), 500
786
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
787
  # --- Player flow ---
788
  @app.route("/cases/today/start", methods=["POST"])
789
  def start_case():
@@ -926,13 +1002,12 @@ def leaderboard_daily():
926
  # 7) MAIN
927
  # -----------------------------------------------------------------------------
928
  if __name__ == "__main__":
929
- # Optional: pre-warm pool on boot so you’re ready before first request
930
  if os.environ.get("BOOTSTRAP_IA", "1") == "1":
931
- print("Bootstrapping Internet Archive pool...")
932
  try:
933
  stats = ensure_minimum_ia_pool()
934
- print("Bootstrap complete:", stats)
935
- except Exception as e:
936
- print("Bootstrap failed:", e)
937
 
938
  app.run(host="0.0.0.0", port=int(os.environ.get("PORT", "7860")), debug=True)
 
1
+ # app.py — Hidden Stroke (AI Noir Investigation) with verbose logging
2
+ # Flask + Firebase Realtime DB + Firebase Storage + Gemini
3
+ # Envs required: FIREBASE, Firebase_DB, Firebase_Storage, Gemini
4
+ # Optional envs: GAME_SALT, ADMIN_KEY, IA_USER_AGENT, MIN_IA_POOL, IA_QUERY,
5
+ # BOOTSTRAP_IA, LOG_LEVEL, ALLOW_DEV_BOOTSTRAP, ALLOW_DEV_DIAGNOSTICS
6
 
7
  import os, io, uuid, json, hmac, hashlib, random, traceback, requests
8
  from datetime import datetime, timedelta, timezone
 
12
  from flask_cors import CORS
13
  from PIL import Image
14
 
15
+ # ----- Logging ---------------------------------------------------------------
16
+ import logging
17
+ LOG_LEVEL = os.environ.get("LOG_LEVEL", "DEBUG").upper()
18
+ logging.basicConfig(
19
+ level=getattr(logging, LOG_LEVEL, logging.DEBUG),
20
+ format="%(asctime)s | %(levelname)s | %(name)s | %(message)s"
21
+ )
22
+ log = logging.getLogger("hidden_stroke")
23
+
24
  # ---------------- Firebase Admin (Realtime DB + Storage) ----------------
25
  import firebase_admin
26
  from firebase_admin import credentials, db, storage
 
54
  })
55
  bucket = storage.bucket()
56
  db_root = db.reference("/")
57
+ log.info("Firebase Realtime DB + Storage initialized.")
58
  except Exception as e:
59
+ log.exception("FATAL: Firebase init failed")
60
  raise
61
 
62
  # --- Gemini ---
 
65
  if not GEMINI_API_KEY:
66
  raise ValueError("The 'Gemini' environment variable is not set.")
67
  client = genai.Client(api_key=GEMINI_API_KEY)
68
+ log.info("Gemini client initialized.")
69
  except Exception as e:
70
+ log.exception("FATAL: Gemini init failed")
71
  raise
72
 
73
  # --- Models (exact names) ---
 
81
  LEADERBOARD_TOP_N = 50
82
 
83
  # --- Misc config ---
84
+ GAME_SALT = os.environ.get("GAME_SALT", "dev-salt")
85
+ ADMIN_KEY = os.environ.get("ADMIN_KEY")
86
  IA_USER_AGENT = os.environ.get("IA_USER_AGENT", "HiddenStrokeBot/1.0 (+https://reddit.com)")
87
  MIN_IA_POOL = int(os.environ.get("MIN_IA_POOL", "60"))
88
  DEFAULT_IA_QUERY = os.environ.get(
 
90
  '(collection:(metropolitanmuseum OR smithsonian OR getty OR artic) AND mediatype:image)'
91
  )
92
  ALLOW_DEV_BOOTSTRAP = os.environ.get("ALLOW_DEV_BOOTSTRAP", "0") == "1"
93
+ ALLOW_DEV_DIAGNOSTICS = os.environ.get("ALLOW_DEV_DIAGNOSTICS", "0") == "1"
94
 
95
  # -----------------------------------------------------------------------------
96
  # 2) UTILS
 
117
  return hmac.new(GAME_SALT.encode(), s.encode(), hashlib.sha256).hexdigest()
118
 
119
  def upload_bytes_to_storage(data: bytes, path: str, content_type: str) -> str:
120
+ log.debug(f"Uploading to Storage: path={path}, content_type={content_type}, bytes={len(data)}")
121
  blob = bucket.blob(path)
122
  blob.upload_from_string(data, content_type=content_type)
123
  blob.make_public()
124
+ url = blob.public_url
125
+ log.debug(f"Uploaded: {url}")
126
+ return url
127
 
128
  def pil_from_inline_image_part(part) -> Image.Image:
129
  image_bytes = part.inline_data.data
 
150
  return "knowledge" if (case_seed % 2 == 0) else "observation"
151
 
152
  def http_get_json(url: str, params: dict = None) -> dict:
153
+ log.debug(f"HTTP GET JSON: {url} params={params}")
154
  headers = {"User-Agent": IA_USER_AGENT}
155
  r = requests.get(url, params=params, headers=headers, timeout=30)
156
+ log.debug(f"HTTP {r.status_code} for {r.url}")
157
  r.raise_for_status()
158
  return r.json()
159
 
160
  def http_get_bytes(url: str) -> bytes:
161
+ log.debug(f"HTTP GET BYTES: {url}")
162
  headers = {"User-Agent": IA_USER_AGENT}
163
  r = requests.get(url, headers=headers, timeout=60)
164
+ log.debug(f"HTTP {r.status_code} for {r.url} bytes={len(r.content)}")
165
  r.raise_for_status()
166
  return r.content
167
 
168
  def ia_advanced_search(query: str, rows: int, page: int) -> List[dict]:
 
169
  url = "https://archive.org/advancedsearch.php"
170
  params = {"q": query, "rows": rows, "page": page, "output": "json"}
171
+ try:
172
+ data = http_get_json(url, params=params)
173
+ docs = data.get("response", {}).get("docs", [])
174
+ log.info(f"IA search page={page} rows={rows} -> {len(docs)} docs")
175
+ return docs
176
+ except Exception:
177
+ log.exception("IA advanced search failed")
178
+ raise
179
 
180
  def ia_metadata(identifier: str) -> dict:
181
  url = f"https://archive.org/metadata/{identifier}"
182
+ try:
183
+ meta = http_get_json(url)
184
+ log.debug(f"Fetched metadata for {identifier}, files={len(meta.get('files', []) or [])}")
185
+ return meta
186
+ except Exception:
187
+ log.exception(f"IA metadata fetch failed for {identifier}")
188
+ raise
189
 
190
  def ia_best_image_from_metadata(meta: dict) -> Optional[dict]:
191
  files = meta.get("files", []) or []
 
195
  if any(x in fmt for x in ["jpeg", "jpg", "png", "tiff", "image"]):
196
  w = int(f.get("width") or 0)
197
  h = int(f.get("height") or 0)
198
+ px = w * h if (w and h) else int(f.get("size") or 0)
 
 
 
199
  if px > best_pixels:
200
  best_pixels, best = px, f
201
+ if best:
202
+ log.debug(f"Best image: name={best.get('name')} fmt={best.get('format')} dims={best.get('width')}x{best.get('height')} size={best.get('size')}")
203
+ else:
204
+ log.warning("No suitable image file found in metadata")
205
  return best
206
 
207
  def ingest_ia_doc(doc: dict) -> Optional[dict]:
 
209
  identifier = doc.get("identifier")
210
  if not identifier:
211
  return None
212
+ log.info(f"Ingesting IA identifier={identifier}")
213
  meta = ia_metadata(identifier)
214
  best = ia_best_image_from_metadata(meta)
215
  if not best:
216
+ log.warning(f"Skipping {identifier}: no image file")
217
  return None
218
 
219
  md = meta.get("metadata", {}) or {}
 
240
  "source": "internet_archive"
241
  }
242
  ia_pool_ref().child(identifier).set(record)
243
+ log.info(f"Ingested {identifier} -> ia_pool (title='{title}')")
244
  return record
245
 
246
  def choose_ia_item_for_case(case_id: str) -> Optional[dict]:
247
  pool = ia_pool_ref().get() or {}
248
  if not pool:
249
+ log.warning("choose_ia_item_for_case: pool is empty")
250
  return None
251
  identifiers = sorted(pool.keys())
252
  case_seed = seed_for_date(case_id)
253
  ident = identifiers[case_seed % len(identifiers)]
254
+ log.info(f"Chosen IA item for case {case_id}: {ident}")
255
  return pool[ident]
256
 
257
  def download_image_to_pil(url: str) -> Image.Image:
258
  data = http_get_bytes(url)
259
+ img = Image.open(io.BytesIO(data)).convert("RGB")
260
+ log.debug(f"Opened image from {url} size={img.size}")
261
+ return img
262
 
263
  def crop_signature_macro(img: Image.Image, size: int = 512) -> Image.Image:
 
264
  w, h = img.size
265
  cw = min(size, w)
266
  ch = min(size, h)
267
  left = max(0, w - cw)
268
  top = max(0, h - ch)
269
+ log.debug(f"Signature crop from ({left},{top}) to ({left+cw},{top+ch})")
270
  return img.crop((left, top, left + cw, top + ch))
271
 
272
  # -----------------------------------------------------------------------------
 
282
  else:
283
  new_h = max_dim
284
  new_w = int(w * (max_dim / h))
285
+ log.debug(f"Resizing image from {w}x{h} to {new_w}x{new_h}")
286
  return img.resize((new_w, new_h), Image.LANCZOS)
287
 
288
  def cache_single_ia_identifier(
 
292
  jpeg_quality: int = 90,
293
  skip_if_restricted: bool = True,
294
  ) -> dict:
 
 
 
 
295
  rec_ref = ia_pool_ref().child(identifier)
296
  rec = rec_ref.get() or {}
297
  if not rec:
 
299
 
300
  rights = (rec.get("rights") or "").lower()
301
  if skip_if_restricted and ("in copyright" in rights or "all rights reserved" in rights):
302
+ log.info(f"Skipping {identifier}: restricted rights")
303
  return {"identifier": identifier, "stored": False, "reason": "restricted_rights"}
304
 
305
  if rec.get("storage_url") and not overwrite:
306
+ log.info(f"Skipping {identifier}: already cached")
307
  return {"identifier": identifier, "stored": False, "reason": "already_cached", "storage_url": rec["storage_url"]}
308
 
 
309
  source_url = rec.get("storage_url") or rec.get("download_url")
310
  if not source_url:
311
+ log.warning(f"{identifier}: missing source_url")
312
  return {"identifier": identifier, "stored": False, "reason": "missing_source_url"}
313
 
314
  try:
315
+ log.info(f"Caching {identifier} from {source_url}")
316
  img = download_image_to_pil(source_url)
317
  except Exception as e:
318
+ if rec.get("download_url") and source_url != rec.get("download_url"):
319
  try:
320
+ log.warning(f"Retrying {identifier} from IA download_url")
321
  img = download_image_to_pil(rec["download_url"])
322
  except Exception as e2:
323
+ log.exception(f"{identifier}: download failed")
324
  return {"identifier": identifier, "stored": False, "reason": f"download_failed: {e2}"}
325
  else:
326
+ log.exception(f"{identifier}: download failed")
327
  return {"identifier": identifier, "stored": False, "reason": f"download_failed: {e}"}
328
 
329
  img = _resize_if_needed(img, max_dim=max_dim)
 
354
  "cached_at": datetime.now(timezone.utc).isoformat()
355
  }
356
  rec_ref.update(rec_update)
357
+ log.info(f"Cached {identifier} -> {storage_url}")
358
 
359
  return {
360
  "identifier": identifier,
 
375
  jpeg_quality: int = 90,
376
  skip_if_restricted: bool = True,
377
  ) -> dict:
 
378
  pool = ia_pool_ref().get() or {}
379
+ log.info(f"batch_cache_ia_pool: pool_size={len(pool)}")
380
  if not pool:
381
  return {"ok": True, "processed": 0, "stored": 0, "skipped": 0, "results": []}
382
 
 
386
  w = int(rec.get("width") or 0)
387
  h = int(rec.get("height") or 0)
388
  if (w and h) and (w < min_width or h < min_height):
389
+ log.debug(f"Skip {ident}: too small {w}x{h}")
390
  continue
391
  candidates.append(ident)
392
 
393
  if randomize:
394
  random.shuffle(candidates)
395
  candidates = candidates[:max(0, limit)]
396
+ log.info(f"Caching candidates: {len(candidates)} (limit={limit})")
397
 
398
  results, stored, skipped = [], 0, 0
399
  for ident in candidates:
 
410
  else:
411
  skipped += 1
412
 
413
+ log.info(f"batch_cache_ia_pool done: processed={len(candidates)} stored={stored} skipped={skipped}")
414
  return {"ok": True, "processed": len(candidates), "stored": stored, "skipped": skipped, "results": results}
415
 
416
  def ensure_minimum_ia_pool(min_items: int = MIN_IA_POOL, rows: int = 100, max_pages: int = 5) -> dict:
 
 
 
 
 
417
  pool = ia_pool_ref().get() or {}
418
  have = len(pool)
419
  added = 0
420
  cached = 0
421
+ log.info(f"ensure_minimum_ia_pool: have={have}, target={min_items}")
422
 
423
  if have < min_items:
424
  page = 1
425
  while have + added < min_items and page <= max_pages:
426
  try:
427
  docs = ia_advanced_search(DEFAULT_IA_QUERY, rows=rows, page=page)
428
+ except Exception:
429
+ log.warning(f"IA search failed on page {page}, stopping ingest loop")
430
  break
431
  if not docs:
432
+ log.warning("IA search returned 0 docs; stopping")
433
  break
434
  for d in docs:
435
  ident = d.get("identifier")
 
442
  if rec:
443
  added += 1
444
  except Exception:
445
+ log.exception(f"Failed to ingest {ident}")
446
  continue
447
+ if have + added >= min_items:
448
+ break
449
  page += 1
450
 
451
  # Cache up to min_items
452
  pool = ia_pool_ref().get() or {}
453
  have_now = len(pool)
454
  need_cache = max(0, min_items - have_now)
455
+ log.info(f"ensure_minimum_ia_pool: post-ingest have={have_now}, need_cache={need_cache}")
456
  if need_cache:
457
  res = batch_cache_ia_pool(limit=need_cache, randomize=True)
458
  cached = res.get("stored", 0)
459
 
460
  final_size = len(ia_pool_ref().get() or {})
461
+ stats = {"ok": True, "had": have, "added": added, "cached": cached, "final_size": final_size}
462
+ log.info(f"ensure_minimum_ia_pool: stats={stats}")
463
+ return stats
464
 
465
  # -----------------------------------------------------------------------------
466
  # 4) CASE GENERATION (uses IA for authentic image, Gemini for forgeries/meta)
 
468
  def ensure_case_generated(case_id: str) -> Dict[str, Any]:
469
  existing_public = case_ref(case_id).child("public").get()
470
  if existing_public:
471
+ log.info(f"Case {case_id} already exists")
472
  return existing_public
473
 
474
  # Ensure we have a cached pool ready
475
  try:
476
  stats = ensure_minimum_ia_pool()
477
+ log.debug(f"Bootstrap stats for case {case_id}: {stats}")
478
+ except Exception:
479
+ log.exception("Bootstrap failed inside ensure_case_generated")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
480
 
481
  # Pick authentic from ia_pool deterministically
482
  ia_item = choose_ia_item_for_case(case_id)
 
485
 
486
  # Deterministic mode
487
  case_seed = seed_for_date(case_id)
 
488
  mode = "knowledge" if (case_seed % 2 == 0) else "observation"
489
+ log.info(f"Case {case_id}: mode={mode}")
490
+
491
  style_period = "sourced from Internet Archive; museum catalog reproduction"
492
 
493
  # Load authentic image (prefer cached)
494
  source_url = ia_item.get("storage_url") or ia_item["download_url"]
495
+ log.info(f"Case {case_id}: authentic source={source_url}")
496
  auth_img = download_image_to_pil(source_url)
497
 
498
  images_urls: List[str] = []
499
  signature_crops: List[str] = []
500
 
501
  # Save authentic as image #1
502
+ url1 = save_image_return_url(auth_img, f"hidden_stroke/{case_id}/images/img_1.jpg")
503
+ images_urls.append(url1)
504
+ log.debug(f"Case {case_id}: saved authentic -> {url1}")
505
+
506
  # Macro crop for signature area
507
  crop1 = crop_signature_macro(auth_img, 512)
508
+ crop1_url = save_image_return_url(crop1, f"hidden_stroke/{case_id}/signature_crops/crop_1.jpg", quality=88)
509
+ signature_crops.append(crop1_url)
510
+ log.debug(f"Case {case_id}: saved authentic crop -> {crop1_url}")
511
 
512
  if mode == "knowledge":
 
513
  for idx in [2, 3]:
514
+ images_urls.append(images_urls[0])
515
  signature_crops.append(signature_crops[0])
516
  else:
 
517
  for i in range(2):
518
  forg_prompt = """
519
  Create a near-identical variant of the provided painting.
 
521
  Only introduce a subtle change in signature micro-geometry (baseline alignment, stroke overlap order, or curve spacing).
522
  No annotations. Differences must be visible only at macro zoom.
523
  """
524
+ log.info(f"Case {case_id}: generating forgery {i+1}")
525
  resp = client.models.generate_content(
526
  model=GENERATION_MODEL,
527
  contents=[forg_prompt, auth_img],
 
533
  f_img = pil_from_inline_image_part(p)
534
  break
535
  if f_img is None:
536
+ log.warning("Gemini returned no image; falling back to copy of authentic")
537
  f_img = auth_img.copy()
538
 
539
  url = save_image_return_url(f_img, f"hidden_stroke/{case_id}/images/img_{i+2}.jpg")
 
541
  crop = crop_signature_macro(f_img, 512)
542
  c_url = save_image_return_url(crop, f"hidden_stroke/{case_id}/signature_crops/crop_{i+2}.jpg", quality=88)
543
  signature_crops.append(c_url)
544
+ log.debug(f"Case {case_id}: forgery saved -> {url}; crop -> {c_url}")
545
 
546
+ # === Gemini: Case brief + metadata + ledger + solution ===
547
  title = ia_item.get("title") or "Untitled"
548
  creator = ia_item.get("creator") or ""
549
  date = ia_item.get("date") or ""
550
  rights = ia_item.get("rights") or ""
551
  licenseurl = ia_item.get("licenseurl") or ""
552
+ log.info(f"Case {case_id}: prompting metadata with title='{title}' creator='{creator}' date='{date}'")
553
 
554
  meta_prompt = f"""
555
  You are generating a daily case for a noir art investigation game.
 
588
  "explanation": "A few sentences that justify the authentic pick without listing spoilers."
589
  }}
590
  }}
 
 
 
 
 
 
 
591
  """
592
  meta_resp = client.models.generate_content(
593
  model=CATEGORY_MODEL,
594
  contents=[meta_prompt]
595
  )
596
  raw_text = meta_resp.text.strip()
597
+ log.debug(f"Case {case_id}: raw meta JSON text len={len(raw_text)}")
598
  try:
599
  meta_json = json.loads(raw_text)
600
  except Exception:
 
616
  flags_metadata = solution.get("flags_metadata", [])
617
  flags_financial = solution.get("flags_financial", [])
618
  explanation = solution.get("explanation", "The authentic work aligns with period-accurate details; the others contain subtle contradictions.")
619
+ log.info(f"Case {case_id}: answer_index={answer_index}, meta_count={len(metadata)}")
620
 
621
  if len(metadata) != 3:
622
+ log.error("Gemini did not return exactly 3 metadata bundles")
623
  raise RuntimeError("Expected exactly 3 metadata bundles.")
624
 
625
  public = {
 
654
  cref = case_ref(case_id)
655
  cref.child("public").set(public)
656
  cref.child("solution").set(solution_doc)
657
+ log.info(f"Case {case_id}: generated and stored")
658
  return public
659
 
660
  # -----------------------------------------------------------------------------
 
675
  "status": "active"
676
  }
677
  sessions_ref().child(session_id).set(session_doc)
678
+ log.info(f"New session {session_id} for user={username} case={case_id}")
679
  return session_doc
680
 
681
  def get_session(session_id: str) -> Dict[str, Any]:
 
704
  action["ts"] = datetime.now(timezone.utc).isoformat()
705
  sessions_ref().child(session["session_id"]).child("ip_remaining").set(new_ip)
706
  sessions_ref().child(session["session_id"]).child("actions").push(action)
707
+ log.debug(f"Spend IP: {cost} -> remaining={new_ip}")
708
  return session, {}
709
 
710
  def score_result(correct: bool, session: Dict[str, Any]) -> Dict[str, Any]:
 
748
  rows = int(body.get("rows") or 100)
749
  ingested = 0
750
  errors = 0
751
+ log.info(f"Manual ingest: query='{query}' pages={pages} rows={rows}")
752
 
753
  for page in range(1, pages + 1):
754
  try:
 
768
  ingested += 1
769
  except Exception:
770
  errors += 1
771
+ log.exception(f"Manual ingest failed for {ident}")
772
  continue
773
 
774
  pool_size = len(ia_pool_ref().get() or {})
 
799
  if not ADMIN_KEY or request.headers.get("X-Admin-Key") != ADMIN_KEY:
800
  return jsonify({"error": "Forbidden"}), 403
801
  pool = ia_pool_ref().get() or {}
802
+ cached = sum(1 for r in pool.values() if r.get("storage_url"))
803
+ return jsonify({"pool_size": len(pool), "cached": cached})
804
 
805
  # --- Admin: pre-generate today's case (manual) ---
806
  @app.route("/admin/generate-today", methods=["POST"])
 
824
  stats = ensure_minimum_ia_pool(min_items=min_items, rows=rows, max_pages=max_pages)
825
  return jsonify({"ok": True, "stats": stats})
826
  except Exception as e:
827
+ log.exception("bootstrap-now failed")
828
  return jsonify({"ok": False, "error": str(e)}), 500
829
 
830
+ # --- DEV-ONLY: diagnostics (network + firebase sanity) ---
831
+ @app.route("/admin/diagnostics", methods=["GET"])
832
+ def diagnostics():
833
+ if not ALLOW_DEV_DIAGNOSTICS:
834
+ return jsonify({"error": "Disabled. Set ALLOW_DEV_DIAGNOSTICS=1 to enable."}), 403
835
+ info = {
836
+ "bucket": bucket.name,
837
+ "db_url": db_root.path,
838
+ "log_level": LOG_LEVEL,
839
+ "ia_query": DEFAULT_IA_QUERY,
840
+ }
841
+ diag = {"info": info, "ia": {}, "firebase": {}}
842
+ try:
843
+ docs = ia_advanced_search(DEFAULT_IA_QUERY, rows=3, page=1)
844
+ diag["ia"]["search_docs"] = [d.get("identifier") for d in docs]
845
+ if docs:
846
+ ident = docs[0].get("identifier")
847
+ meta = ia_metadata(ident)
848
+ best = ia_best_image_from_metadata(meta)
849
+ diag["ia"]["sample_identifier"] = ident
850
+ diag["ia"]["best_file"] = (best or {}).get("name")
851
+ except Exception as e:
852
+ diag["ia"]["error"] = str(e)
853
+
854
+ # Try a tiny upload
855
+ try:
856
+ tiny = upload_bytes_to_storage(b"ping", f"diag/ping_{uuid.uuid4().hex}.txt", "text/plain")
857
+ diag["firebase"]["upload_test"] = tiny
858
+ except Exception as e:
859
+ diag["firebase"]["error"] = str(e)
860
+
861
+ return jsonify(diag)
862
+
863
  # --- Player flow ---
864
  @app.route("/cases/today/start", methods=["POST"])
865
  def start_case():
 
1002
  # 7) MAIN
1003
  # -----------------------------------------------------------------------------
1004
  if __name__ == "__main__":
 
1005
  if os.environ.get("BOOTSTRAP_IA", "1") == "1":
1006
+ log.info("Bootstrapping Internet Archive pool...")
1007
  try:
1008
  stats = ensure_minimum_ia_pool()
1009
+ log.info(f"Bootstrap complete: {stats}")
1010
+ except Exception:
1011
+ log.exception("Bootstrap failed")
1012
 
1013
  app.run(host="0.0.0.0", port=int(os.environ.get("PORT", "7860")), debug=True)