Spaces:
Sleeping
Sleeping
Update main.py
Browse files
main.py
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
# app.py — Hidden Stroke (AI Noir Investigation) with Internet Archive ingestion
|
| 2 |
# Flask + Firebase Realtime DB + Firebase Storage + Gemini (exact model names)
|
| 3 |
-
# Runs on Hugging Face
|
| 4 |
|
| 5 |
-
import os, io, uuid, json,
|
| 6 |
from datetime import datetime, timedelta, timezone
|
| 7 |
from typing import Dict, Any, Tuple, List, Optional
|
| 8 |
|
|
@@ -19,7 +19,7 @@ from google import genai
|
|
| 19 |
from google.genai import types
|
| 20 |
|
| 21 |
# -----------------------------------------------------------------------------
|
| 22 |
-
# 1) CONFIG & INIT
|
| 23 |
# -----------------------------------------------------------------------------
|
| 24 |
app = Flask(__name__)
|
| 25 |
CORS(app)
|
|
@@ -71,8 +71,14 @@ LEADERBOARD_TOP_N = 50
|
|
| 71 |
|
| 72 |
# --- Misc config ---
|
| 73 |
GAME_SALT = os.environ.get("GAME_SALT", "dev-salt") # for deterministic seeds / HMAC
|
| 74 |
-
ADMIN_KEY = os.environ.get("ADMIN_KEY")
|
| 75 |
-
IA_USER_AGENT = os.environ.get("IA_USER_AGENT", "HiddenStrokeBot/1.0 (+https://reddit.com)")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
|
| 77 |
# -----------------------------------------------------------------------------
|
| 78 |
# 2) UTILS
|
|
@@ -142,7 +148,6 @@ def http_get_bytes(url: str) -> bytes:
|
|
| 142 |
|
| 143 |
def ia_advanced_search(query: str, rows: int, page: int) -> List[dict]:
|
| 144 |
# Internet Archive Advanced Search (no key required)
|
| 145 |
-
# docs: /advancedsearch.php?q=...&rows=...&page=...&output=json
|
| 146 |
url = "https://archive.org/advancedsearch.php"
|
| 147 |
params = {"q": query, "rows": rows, "page": page, "output": "json"}
|
| 148 |
data = http_get_json(url, params=params)
|
|
@@ -153,30 +158,23 @@ def ia_metadata(identifier: str) -> dict:
|
|
| 153 |
return http_get_json(url)
|
| 154 |
|
| 155 |
def ia_best_image_from_metadata(meta: dict) -> Optional[dict]:
|
| 156 |
-
# Pick the largest suitable image file from /metadata result
|
| 157 |
files = meta.get("files", []) or []
|
| 158 |
-
best = None
|
| 159 |
-
best_pixels = -1
|
| 160 |
for f in files:
|
| 161 |
-
name = f.get("name", "")
|
| 162 |
fmt = (f.get("format") or "").lower()
|
| 163 |
if any(x in fmt for x in ["jpeg", "jpg", "png", "tiff", "image"]):
|
| 164 |
-
# width/height sometimes present
|
| 165 |
w = int(f.get("width") or 0)
|
| 166 |
h = int(f.get("height") or 0)
|
| 167 |
if w and h:
|
| 168 |
px = w * h
|
| 169 |
else:
|
| 170 |
-
px = int(f.get("size") or 0)
|
| 171 |
if px > best_pixels:
|
| 172 |
-
best_pixels = px
|
| 173 |
-
best = f
|
| 174 |
return best
|
| 175 |
|
| 176 |
def ingest_ia_doc(doc: dict) -> Optional[dict]:
|
| 177 |
-
"""
|
| 178 |
-
Given a doc from advancedsearch, fetch /metadata and store best image into ia_pool.
|
| 179 |
-
"""
|
| 180 |
identifier = doc.get("identifier")
|
| 181 |
if not identifier:
|
| 182 |
return None
|
|
@@ -185,11 +183,12 @@ def ingest_ia_doc(doc: dict) -> Optional[dict]:
|
|
| 185 |
if not best:
|
| 186 |
return None
|
| 187 |
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
|
|
|
| 193 |
|
| 194 |
download_url = f"https://archive.org/download/{identifier}/{best['name']}"
|
| 195 |
record = {
|
|
@@ -216,17 +215,15 @@ def choose_ia_item_for_case(case_id: str) -> Optional[dict]:
|
|
| 216 |
return None
|
| 217 |
identifiers = sorted(pool.keys())
|
| 218 |
case_seed = seed_for_date(case_id)
|
| 219 |
-
|
| 220 |
-
ident = identifiers[idx]
|
| 221 |
return pool[ident]
|
| 222 |
|
| 223 |
def download_image_to_pil(url: str) -> Image.Image:
|
| 224 |
data = http_get_bytes(url)
|
| 225 |
-
|
| 226 |
-
return img
|
| 227 |
|
| 228 |
def crop_signature_macro(img: Image.Image, size: int = 512) -> Image.Image:
|
| 229 |
-
|
| 230 |
w, h = img.size
|
| 231 |
cw = min(size, w)
|
| 232 |
ch = min(size, h)
|
|
@@ -235,27 +232,209 @@ def crop_signature_macro(img: Image.Image, size: int = 512) -> Image.Image:
|
|
| 235 |
return img.crop((left, top, left + cw, top + ch))
|
| 236 |
|
| 237 |
# -----------------------------------------------------------------------------
|
| 238 |
-
# 3)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 239 |
# -----------------------------------------------------------------------------
|
| 240 |
def ensure_case_generated(case_id: str) -> Dict[str, Any]:
|
| 241 |
existing_public = case_ref(case_id).child("public").get()
|
| 242 |
if existing_public:
|
| 243 |
return existing_public
|
| 244 |
|
| 245 |
-
#
|
| 246 |
try:
|
| 247 |
-
ensure_minimum_ia_pool()
|
|
|
|
|
|
|
| 248 |
except Exception as e:
|
| 249 |
print("Bootstrap warning:", e)
|
| 250 |
|
| 251 |
-
|
| 252 |
-
# Ensure we have at least some IA records; if not, auto-ingest a default set (one page)
|
| 253 |
pool = ia_pool_ref().get() or {}
|
| 254 |
if not pool:
|
| 255 |
try:
|
| 256 |
-
|
| 257 |
-
default_query = '(collection:(metropolitanmuseum OR smithsonian OR getty OR artic) AND mediatype:image)'
|
| 258 |
-
docs = ia_advanced_search(default_query, rows=100, page=1)
|
| 259 |
for d in docs:
|
| 260 |
try:
|
| 261 |
ingest_ia_doc(d)
|
|
@@ -267,22 +446,18 @@ def ensure_case_generated(case_id: str) -> Dict[str, Any]:
|
|
| 267 |
# Pick authentic from ia_pool deterministically
|
| 268 |
ia_item = choose_ia_item_for_case(case_id)
|
| 269 |
if not ia_item:
|
| 270 |
-
# absolute fallback (rare)
|
| 271 |
raise RuntimeError("No IA items available. Ingest needed.")
|
| 272 |
|
| 273 |
# Deterministic mode
|
| 274 |
case_seed = seed_for_date(case_id)
|
| 275 |
rng = random.Random(case_seed)
|
| 276 |
mode = "knowledge" if (case_seed % 2 == 0) else "observation"
|
| 277 |
-
|
| 278 |
-
# Style label (flavor text only)
|
| 279 |
style_period = "sourced from Internet Archive; museum catalog reproduction"
|
| 280 |
|
| 281 |
-
#
|
| 282 |
source_url = ia_item.get("storage_url") or ia_item["download_url"]
|
| 283 |
auth_img = download_image_to_pil(source_url)
|
| 284 |
|
| 285 |
-
|
| 286 |
images_urls: List[str] = []
|
| 287 |
signature_crops: List[str] = []
|
| 288 |
|
|
@@ -299,10 +474,10 @@ def ensure_case_generated(case_id: str) -> Dict[str, Any]:
|
|
| 299 |
if mode == "knowledge":
|
| 300 |
# Use the same authentic visual for all three; differences come from metadata only
|
| 301 |
for idx in [2, 3]:
|
| 302 |
-
images_urls.append(images_urls[0]) # same URL
|
| 303 |
signature_crops.append(signature_crops[0])
|
| 304 |
else:
|
| 305 |
-
# observation:
|
| 306 |
for i in range(2):
|
| 307 |
forg_prompt = """
|
| 308 |
Create a near-identical variant of the provided painting.
|
|
@@ -330,7 +505,6 @@ No annotations. Differences must be visible only at macro zoom.
|
|
| 330 |
signature_crops.append(c_url)
|
| 331 |
|
| 332 |
# === Gemini: Case brief + 3 metadata bundles + ledger + solution ===
|
| 333 |
-
# Feed IA title/creator/year so the authentic bundle aligns with reality.
|
| 334 |
title = ia_item.get("title") or "Untitled"
|
| 335 |
creator = ia_item.get("creator") or ""
|
| 336 |
date = ia_item.get("date") or ""
|
|
@@ -353,7 +527,7 @@ TASK:
|
|
| 353 |
1) Create a short, punchy "case_brief" (2–4 sentences) explaining why the artifact matters and why fraud is suspected — NO SPOILERS.
|
| 354 |
2) Prepare THREE metadata bundles for images A,B,C with NEARLY IDENTICAL fields.
|
| 355 |
Ensure exactly ONE bundle is AUTHENTIC and that it corresponds to the above authentic context.
|
| 356 |
-
The other two are FORGERIES
|
| 357 |
3) Provide a concise "ledger_summary" describing a believable ownership/payment trail.
|
| 358 |
4) Provide the solution with: "answer_index" (0 for A, 1 for B, 2 for C) and detailed flags for signature/metadata/financial, plus an "explanation".
|
| 359 |
|
|
@@ -392,9 +566,11 @@ CONSTRAINTS:
|
|
| 392 |
except Exception:
|
| 393 |
cleaned = raw_text
|
| 394 |
if "```" in raw_text:
|
| 395 |
-
|
| 396 |
-
if
|
| 397 |
-
cleaned =
|
|
|
|
|
|
|
| 398 |
meta_json = json.loads(cleaned)
|
| 399 |
|
| 400 |
case_brief = meta_json.get("case_brief", "A resurfaced portrait raises questions—its paper trail glitters a little too perfectly.")
|
|
@@ -417,7 +593,7 @@ CONSTRAINTS:
|
|
| 417 |
"style_period": style_period,
|
| 418 |
"images": images_urls,
|
| 419 |
"signature_crops": signature_crops,
|
| 420 |
-
"metadata": metadata,
|
| 421 |
"ledger_summary": ledger_summary,
|
| 422 |
"timer_seconds": TIMER_SECONDS,
|
| 423 |
"initial_ip": INITIAL_IP,
|
|
@@ -445,7 +621,7 @@ CONSTRAINTS:
|
|
| 445 |
return public
|
| 446 |
|
| 447 |
# -----------------------------------------------------------------------------
|
| 448 |
-
#
|
| 449 |
# -----------------------------------------------------------------------------
|
| 450 |
def create_session(user_id: str, username: str, case_id: str) -> Dict[str, Any]:
|
| 451 |
session_id = str(uuid.uuid4())
|
|
@@ -515,21 +691,20 @@ def upsert_leaderboard(case_id: str, user_id: str, username: str, score: int):
|
|
| 515 |
leaderboard_ref(case_id).set(top)
|
| 516 |
|
| 517 |
# -----------------------------------------------------------------------------
|
| 518 |
-
#
|
| 519 |
# -----------------------------------------------------------------------------
|
| 520 |
@app.route("/health", methods=["GET"])
|
| 521 |
def health():
|
| 522 |
return jsonify({"ok": True, "time": datetime.now(timezone.utc).isoformat()})
|
| 523 |
|
| 524 |
-
# --- Admin: Internet Archive ingestion ---
|
| 525 |
@app.route("/admin/ingest-ia", methods=["POST"])
|
| 526 |
def admin_ingest_ia():
|
| 527 |
if not ADMIN_KEY or request.headers.get("X-Admin-Key") != ADMIN_KEY:
|
| 528 |
return jsonify({"error": "Forbidden"}), 403
|
| 529 |
|
| 530 |
body = request.get_json() or {}
|
| 531 |
-
|
| 532 |
-
query = body.get("query") or '(collection:(metropolitanmuseum OR smithsonian OR getty OR artic) AND mediatype:image)'
|
| 533 |
pages = int(body.get("pages") or 2)
|
| 534 |
rows = int(body.get("rows") or 100)
|
| 535 |
ingested = 0
|
|
@@ -538,14 +713,13 @@ def admin_ingest_ia():
|
|
| 538 |
for page in range(1, pages + 1):
|
| 539 |
try:
|
| 540 |
docs = ia_advanced_search(query, rows=rows, page=page)
|
| 541 |
-
except Exception
|
| 542 |
errors += 1
|
| 543 |
continue
|
| 544 |
for d in docs:
|
| 545 |
ident = d.get("identifier")
|
| 546 |
if not ident:
|
| 547 |
continue
|
| 548 |
-
# skip if already ingested
|
| 549 |
if ia_pool_ref().child(ident).get():
|
| 550 |
continue
|
| 551 |
try:
|
|
@@ -559,6 +733,26 @@ def admin_ingest_ia():
|
|
| 559 |
pool_size = len(ia_pool_ref().get() or {})
|
| 560 |
return jsonify({"ok": True, "ingested": ingested, "errors": errors, "pool_size": pool_size})
|
| 561 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 562 |
@app.route("/admin/ia-pool/stats", methods=["GET"])
|
| 563 |
def ia_pool_stats():
|
| 564 |
if not ADMIN_KEY or request.headers.get("X-Admin-Key") != ADMIN_KEY:
|
|
@@ -566,7 +760,7 @@ def ia_pool_stats():
|
|
| 566 |
pool = ia_pool_ref().get() or {}
|
| 567 |
return jsonify({"pool_size": len(pool)})
|
| 568 |
|
| 569 |
-
# --- Admin: pre-generate today's case (
|
| 570 |
@app.route("/admin/generate-today", methods=["POST"])
|
| 571 |
def admin_generate_today():
|
| 572 |
if not ADMIN_KEY or request.headers.get("X-Admin-Key") != ADMIN_KEY:
|
|
@@ -575,6 +769,21 @@ def admin_generate_today():
|
|
| 575 |
public = ensure_case_generated(case_id)
|
| 576 |
return jsonify({"generated": True, "case_id": case_id, "mode": public.get("mode")})
|
| 577 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 578 |
# --- Player flow ---
|
| 579 |
@app.route("/cases/today/start", methods=["POST"])
|
| 580 |
def start_case():
|
|
@@ -586,18 +795,14 @@ def start_case():
|
|
| 586 |
existing = sessions_ref().order_by_child("user_id").equal_to(user_id).get()
|
| 587 |
sess = None
|
| 588 |
if existing:
|
| 589 |
-
for
|
| 590 |
if sdoc.get("case_id") == case_id and sdoc.get("status") == "active":
|
| 591 |
sess = sdoc
|
| 592 |
break
|
| 593 |
if not sess:
|
| 594 |
sess = create_session(user_id, username, case_id)
|
| 595 |
|
| 596 |
-
|
| 597 |
-
"session_id": sess["session_id"],
|
| 598 |
-
"case": public
|
| 599 |
-
}
|
| 600 |
-
return jsonify(payload)
|
| 601 |
|
| 602 |
@app.route("/cases/<case_id>/tool/signature", methods=["POST"])
|
| 603 |
def tool_signature(case_id):
|
|
@@ -707,7 +912,7 @@ def submit_guess(case_id):
|
|
| 707 |
def leaderboard_daily():
|
| 708 |
case_id = utc_today_str()
|
| 709 |
top = leaderboard_ref(case_id).get() or []
|
| 710 |
-
user_id,
|
| 711 |
me = plays_ref(case_id).child(user_id).get() or {}
|
| 712 |
rank = None
|
| 713 |
if top:
|
|
@@ -718,14 +923,16 @@ def leaderboard_daily():
|
|
| 718 |
return jsonify({"case_id": case_id, "top": top, "me": {"score": me.get("score"), "rank": rank}})
|
| 719 |
|
| 720 |
# -----------------------------------------------------------------------------
|
| 721 |
-
#
|
| 722 |
# -----------------------------------------------------------------------------
|
| 723 |
if __name__ == "__main__":
|
| 724 |
# Optional: pre-warm pool on boot so you’re ready before first request
|
| 725 |
if os.environ.get("BOOTSTRAP_IA", "1") == "1":
|
| 726 |
print("Bootstrapping Internet Archive pool...")
|
| 727 |
-
|
| 728 |
-
|
|
|
|
|
|
|
|
|
|
| 729 |
|
| 730 |
app.run(host="0.0.0.0", port=int(os.environ.get("PORT", "7860")), debug=True)
|
| 731 |
-
|
|
|
|
| 1 |
# app.py — Hidden Stroke (AI Noir Investigation) with Internet Archive ingestion
|
| 2 |
# Flask + Firebase Realtime DB + Firebase Storage + Gemini (exact model names)
|
| 3 |
+
# Runs on Hugging Face (envs: FIREBASE, Firebase_DB, Firebase_Storage, Gemini, optional GAME_SALT/ADMIN_KEY)
|
| 4 |
|
| 5 |
+
import os, io, uuid, json, hmac, hashlib, random, traceback, requests
|
| 6 |
from datetime import datetime, timedelta, timezone
|
| 7 |
from typing import Dict, Any, Tuple, List, Optional
|
| 8 |
|
|
|
|
| 19 |
from google.genai import types
|
| 20 |
|
| 21 |
# -----------------------------------------------------------------------------
|
| 22 |
+
# 1) CONFIG & INIT
|
| 23 |
# -----------------------------------------------------------------------------
|
| 24 |
app = Flask(__name__)
|
| 25 |
CORS(app)
|
|
|
|
| 71 |
|
| 72 |
# --- Misc config ---
|
| 73 |
GAME_SALT = os.environ.get("GAME_SALT", "dev-salt") # for deterministic seeds / HMAC
|
| 74 |
+
ADMIN_KEY = os.environ.get("ADMIN_KEY") # optional for admin endpoints
|
| 75 |
+
IA_USER_AGENT = os.environ.get("IA_USER_AGENT", "HiddenStrokeBot/1.0 (+https://reddit.com)")
|
| 76 |
+
MIN_IA_POOL = int(os.environ.get("MIN_IA_POOL", "60"))
|
| 77 |
+
DEFAULT_IA_QUERY = os.environ.get(
|
| 78 |
+
"IA_QUERY",
|
| 79 |
+
'(collection:(metropolitanmuseum OR smithsonian OR getty OR artic) AND mediatype:image)'
|
| 80 |
+
)
|
| 81 |
+
ALLOW_DEV_BOOTSTRAP = os.environ.get("ALLOW_DEV_BOOTSTRAP", "0") == "1"
|
| 82 |
|
| 83 |
# -----------------------------------------------------------------------------
|
| 84 |
# 2) UTILS
|
|
|
|
| 148 |
|
| 149 |
def ia_advanced_search(query: str, rows: int, page: int) -> List[dict]:
|
| 150 |
# Internet Archive Advanced Search (no key required)
|
|
|
|
| 151 |
url = "https://archive.org/advancedsearch.php"
|
| 152 |
params = {"q": query, "rows": rows, "page": page, "output": "json"}
|
| 153 |
data = http_get_json(url, params=params)
|
|
|
|
| 158 |
return http_get_json(url)
|
| 159 |
|
| 160 |
def ia_best_image_from_metadata(meta: dict) -> Optional[dict]:
|
|
|
|
| 161 |
files = meta.get("files", []) or []
|
| 162 |
+
best, best_pixels = None, -1
|
|
|
|
| 163 |
for f in files:
|
|
|
|
| 164 |
fmt = (f.get("format") or "").lower()
|
| 165 |
if any(x in fmt for x in ["jpeg", "jpg", "png", "tiff", "image"]):
|
|
|
|
| 166 |
w = int(f.get("width") or 0)
|
| 167 |
h = int(f.get("height") or 0)
|
| 168 |
if w and h:
|
| 169 |
px = w * h
|
| 170 |
else:
|
| 171 |
+
px = int(f.get("size") or 0)
|
| 172 |
if px > best_pixels:
|
| 173 |
+
best_pixels, best = px, f
|
|
|
|
| 174 |
return best
|
| 175 |
|
| 176 |
def ingest_ia_doc(doc: dict) -> Optional[dict]:
|
| 177 |
+
"""Fetch /metadata and store best image entry into ia_pool."""
|
|
|
|
|
|
|
| 178 |
identifier = doc.get("identifier")
|
| 179 |
if not identifier:
|
| 180 |
return None
|
|
|
|
| 183 |
if not best:
|
| 184 |
return None
|
| 185 |
|
| 186 |
+
md = meta.get("metadata", {}) or {}
|
| 187 |
+
title = md.get("title", "") or doc.get("title", "")
|
| 188 |
+
date = md.get("date", "") or doc.get("date", "")
|
| 189 |
+
creator = md.get("creator", "") or doc.get("creator", "")
|
| 190 |
+
rights = md.get("rights", "") or doc.get("rights", "")
|
| 191 |
+
licenseurl = md.get("licenseurl", "") or doc.get("licenseurl", "")
|
| 192 |
|
| 193 |
download_url = f"https://archive.org/download/{identifier}/{best['name']}"
|
| 194 |
record = {
|
|
|
|
| 215 |
return None
|
| 216 |
identifiers = sorted(pool.keys())
|
| 217 |
case_seed = seed_for_date(case_id)
|
| 218 |
+
ident = identifiers[case_seed % len(identifiers)]
|
|
|
|
| 219 |
return pool[ident]
|
| 220 |
|
| 221 |
def download_image_to_pil(url: str) -> Image.Image:
|
| 222 |
data = http_get_bytes(url)
|
| 223 |
+
return Image.open(io.BytesIO(data)).convert("RGB")
|
|
|
|
| 224 |
|
| 225 |
def crop_signature_macro(img: Image.Image, size: int = 512) -> Image.Image:
|
| 226 |
+
"""Lower-right macro crop."""
|
| 227 |
w, h = img.size
|
| 228 |
cw = min(size, w)
|
| 229 |
ch = min(size, h)
|
|
|
|
| 232 |
return img.crop((left, top, left + cw, top + ch))
|
| 233 |
|
| 234 |
# -----------------------------------------------------------------------------
|
| 235 |
+
# 3) IA -> Firebase Storage caching + Zero-admin bootstrap
|
| 236 |
+
# -----------------------------------------------------------------------------
|
| 237 |
+
def _resize_if_needed(img: Image.Image, max_dim: int = 4096) -> Image.Image:
|
| 238 |
+
w, h = img.size
|
| 239 |
+
if max(w, h) <= max_dim:
|
| 240 |
+
return img
|
| 241 |
+
if w >= h:
|
| 242 |
+
new_w = max_dim
|
| 243 |
+
new_h = int(h * (max_dim / w))
|
| 244 |
+
else:
|
| 245 |
+
new_h = max_dim
|
| 246 |
+
new_w = int(w * (max_dim / h))
|
| 247 |
+
return img.resize((new_w, new_h), Image.LANCZOS)
|
| 248 |
+
|
| 249 |
+
def cache_single_ia_identifier(
|
| 250 |
+
identifier: str,
|
| 251 |
+
overwrite: bool = False,
|
| 252 |
+
max_dim: int = 4096,
|
| 253 |
+
jpeg_quality: int = 90,
|
| 254 |
+
skip_if_restricted: bool = True,
|
| 255 |
+
) -> dict:
|
| 256 |
+
"""
|
| 257 |
+
Download one IA item from ia_pool, upload image + signature macro crop to Firebase Storage,
|
| 258 |
+
and update the ia_pool record with storage URLs & dimensions.
|
| 259 |
+
"""
|
| 260 |
+
rec_ref = ia_pool_ref().child(identifier)
|
| 261 |
+
rec = rec_ref.get() or {}
|
| 262 |
+
if not rec:
|
| 263 |
+
return {"identifier": identifier, "stored": False, "reason": "not_in_pool"}
|
| 264 |
+
|
| 265 |
+
rights = (rec.get("rights") or "").lower()
|
| 266 |
+
if skip_if_restricted and ("in copyright" in rights or "all rights reserved" in rights):
|
| 267 |
+
return {"identifier": identifier, "stored": False, "reason": "restricted_rights"}
|
| 268 |
+
|
| 269 |
+
if rec.get("storage_url") and not overwrite:
|
| 270 |
+
return {"identifier": identifier, "stored": False, "reason": "already_cached", "storage_url": rec["storage_url"]}
|
| 271 |
+
|
| 272 |
+
# Prefer existing cached URL as source; fall back to IA
|
| 273 |
+
source_url = rec.get("storage_url") or rec.get("download_url")
|
| 274 |
+
if not source_url:
|
| 275 |
+
return {"identifier": identifier, "stored": False, "reason": "missing_source_url"}
|
| 276 |
+
|
| 277 |
+
try:
|
| 278 |
+
img = download_image_to_pil(source_url)
|
| 279 |
+
except Exception as e:
|
| 280 |
+
if rec.get("download_url"):
|
| 281 |
+
try:
|
| 282 |
+
img = download_image_to_pil(rec["download_url"])
|
| 283 |
+
except Exception as e2:
|
| 284 |
+
return {"identifier": identifier, "stored": False, "reason": f"download_failed: {e2}"}
|
| 285 |
+
else:
|
| 286 |
+
return {"identifier": identifier, "stored": False, "reason": f"download_failed: {e}"}
|
| 287 |
+
|
| 288 |
+
img = _resize_if_needed(img, max_dim=max_dim)
|
| 289 |
+
w, h = img.size
|
| 290 |
+
|
| 291 |
+
# Upload original
|
| 292 |
+
img_bytes = io.BytesIO()
|
| 293 |
+
img.save(img_bytes, format="JPEG", quality=jpeg_quality, optimize=True)
|
| 294 |
+
img_bytes.seek(0)
|
| 295 |
+
img_path = f"ia_cache/{identifier}/original.jpg"
|
| 296 |
+
storage_url = upload_bytes_to_storage(img_bytes.getvalue(), img_path, "image/jpeg")
|
| 297 |
+
|
| 298 |
+
# Upload macro crop
|
| 299 |
+
crop = crop_signature_macro(img, 512)
|
| 300 |
+
crop_bytes = io.BytesIO()
|
| 301 |
+
crop.save(crop_bytes, format="JPEG", quality=jpeg_quality, optimize=True)
|
| 302 |
+
crop_bytes.seek(0)
|
| 303 |
+
crop_path = f"ia_cache/{identifier}/signature_crop.jpg"
|
| 304 |
+
signature_crop_url = upload_bytes_to_storage(crop_bytes.getvalue(), crop_path, "image/jpeg")
|
| 305 |
+
|
| 306 |
+
rec_update = {
|
| 307 |
+
"storage_url": storage_url,
|
| 308 |
+
"signature_crop_url": signature_crop_url,
|
| 309 |
+
"image_path": img_path,
|
| 310 |
+
"crop_path": crop_path,
|
| 311 |
+
"width": w,
|
| 312 |
+
"height": h,
|
| 313 |
+
"cached_at": datetime.now(timezone.utc).isoformat()
|
| 314 |
+
}
|
| 315 |
+
rec_ref.update(rec_update)
|
| 316 |
+
|
| 317 |
+
return {
|
| 318 |
+
"identifier": identifier,
|
| 319 |
+
"stored": True,
|
| 320 |
+
"storage_url": storage_url,
|
| 321 |
+
"signature_crop_url": signature_crop_url,
|
| 322 |
+
"width": w,
|
| 323 |
+
"height": h
|
| 324 |
+
}
|
| 325 |
+
|
| 326 |
+
def batch_cache_ia_pool(
|
| 327 |
+
limit: int = 100,
|
| 328 |
+
overwrite: bool = False,
|
| 329 |
+
randomize: bool = True,
|
| 330 |
+
min_width: int = 800,
|
| 331 |
+
min_height: int = 800,
|
| 332 |
+
max_dim: int = 4096,
|
| 333 |
+
jpeg_quality: int = 90,
|
| 334 |
+
skip_if_restricted: bool = True,
|
| 335 |
+
) -> dict:
|
| 336 |
+
"""Cache up to `limit` uncached IA items into Firebase Storage."""
|
| 337 |
+
pool = ia_pool_ref().get() or {}
|
| 338 |
+
if not pool:
|
| 339 |
+
return {"ok": True, "processed": 0, "stored": 0, "skipped": 0, "results": []}
|
| 340 |
+
|
| 341 |
+
candidates = []
|
| 342 |
+
for ident, rec in pool.items():
|
| 343 |
+
if overwrite or not rec.get("storage_url"):
|
| 344 |
+
w = int(rec.get("width") or 0)
|
| 345 |
+
h = int(rec.get("height") or 0)
|
| 346 |
+
if (w and h) and (w < min_width or h < min_height):
|
| 347 |
+
continue
|
| 348 |
+
candidates.append(ident)
|
| 349 |
+
|
| 350 |
+
if randomize:
|
| 351 |
+
random.shuffle(candidates)
|
| 352 |
+
candidates = candidates[:max(0, limit)]
|
| 353 |
+
|
| 354 |
+
results, stored, skipped = [], 0, 0
|
| 355 |
+
for ident in candidates:
|
| 356 |
+
res = cache_single_ia_identifier(
|
| 357 |
+
ident,
|
| 358 |
+
overwrite=overwrite,
|
| 359 |
+
max_dim=max_dim,
|
| 360 |
+
jpeg_quality=jpeg_quality,
|
| 361 |
+
skip_if_restricted=skip_if_restricted,
|
| 362 |
+
)
|
| 363 |
+
results.append(res)
|
| 364 |
+
if res.get("stored"):
|
| 365 |
+
stored += 1
|
| 366 |
+
else:
|
| 367 |
+
skipped += 1
|
| 368 |
+
|
| 369 |
+
return {"ok": True, "processed": len(candidates), "stored": stored, "skipped": skipped, "results": results}
|
| 370 |
+
|
| 371 |
+
def ensure_minimum_ia_pool(min_items: int = MIN_IA_POOL, rows: int = 100, max_pages: int = 5) -> dict:
|
| 372 |
+
"""
|
| 373 |
+
Zero-admin bootstrap:
|
| 374 |
+
- If ia_pool has fewer than `min_items`, pull from IA Advanced Search and ingest.
|
| 375 |
+
- Then cache enough images to reach `min_items`.
|
| 376 |
+
"""
|
| 377 |
+
pool = ia_pool_ref().get() or {}
|
| 378 |
+
have = len(pool)
|
| 379 |
+
added = 0
|
| 380 |
+
cached = 0
|
| 381 |
+
|
| 382 |
+
if have < min_items:
|
| 383 |
+
page = 1
|
| 384 |
+
while have + added < min_items and page <= max_pages:
|
| 385 |
+
try:
|
| 386 |
+
docs = ia_advanced_search(DEFAULT_IA_QUERY, rows=rows, page=page)
|
| 387 |
+
except Exception as e:
|
| 388 |
+
print("IA search failed on page", page, e)
|
| 389 |
+
break
|
| 390 |
+
if not docs:
|
| 391 |
+
break
|
| 392 |
+
for d in docs:
|
| 393 |
+
ident = d.get("identifier")
|
| 394 |
+
if not ident:
|
| 395 |
+
continue
|
| 396 |
+
if ia_pool_ref().child(ident).get():
|
| 397 |
+
continue
|
| 398 |
+
try:
|
| 399 |
+
rec = ingest_ia_doc(d)
|
| 400 |
+
if rec:
|
| 401 |
+
added += 1
|
| 402 |
+
except Exception:
|
| 403 |
+
continue
|
| 404 |
+
page += 1
|
| 405 |
+
|
| 406 |
+
# Cache up to min_items
|
| 407 |
+
pool = ia_pool_ref().get() or {}
|
| 408 |
+
have_now = len(pool)
|
| 409 |
+
need_cache = max(0, min_items - have_now)
|
| 410 |
+
if need_cache:
|
| 411 |
+
res = batch_cache_ia_pool(limit=need_cache, randomize=True)
|
| 412 |
+
cached = res.get("stored", 0)
|
| 413 |
+
|
| 414 |
+
final_size = len(ia_pool_ref().get() or {})
|
| 415 |
+
return {"ok": True, "had": have, "added": added, "cached": cached, "final_size": final_size}
|
| 416 |
+
|
| 417 |
+
# -----------------------------------------------------------------------------
|
| 418 |
+
# 4) CASE GENERATION (uses IA for authentic image, Gemini for forgeries/meta)
|
| 419 |
# -----------------------------------------------------------------------------
|
| 420 |
def ensure_case_generated(case_id: str) -> Dict[str, Any]:
|
| 421 |
existing_public = case_ref(case_id).child("public").get()
|
| 422 |
if existing_public:
|
| 423 |
return existing_public
|
| 424 |
|
| 425 |
+
# Ensure we have a cached pool ready
|
| 426 |
try:
|
| 427 |
+
stats = ensure_minimum_ia_pool()
|
| 428 |
+
if stats.get("added") or stats.get("cached"):
|
| 429 |
+
print("Bootstrap:", stats)
|
| 430 |
except Exception as e:
|
| 431 |
print("Bootstrap warning:", e)
|
| 432 |
|
| 433 |
+
# Fallback ingest if pool is empty
|
|
|
|
| 434 |
pool = ia_pool_ref().get() or {}
|
| 435 |
if not pool:
|
| 436 |
try:
|
| 437 |
+
docs = ia_advanced_search(DEFAULT_IA_QUERY, rows=100, page=1)
|
|
|
|
|
|
|
| 438 |
for d in docs:
|
| 439 |
try:
|
| 440 |
ingest_ia_doc(d)
|
|
|
|
| 446 |
# Pick authentic from ia_pool deterministically
|
| 447 |
ia_item = choose_ia_item_for_case(case_id)
|
| 448 |
if not ia_item:
|
|
|
|
| 449 |
raise RuntimeError("No IA items available. Ingest needed.")
|
| 450 |
|
| 451 |
# Deterministic mode
|
| 452 |
case_seed = seed_for_date(case_id)
|
| 453 |
rng = random.Random(case_seed)
|
| 454 |
mode = "knowledge" if (case_seed % 2 == 0) else "observation"
|
|
|
|
|
|
|
| 455 |
style_period = "sourced from Internet Archive; museum catalog reproduction"
|
| 456 |
|
| 457 |
+
# Load authentic image (prefer cached)
|
| 458 |
source_url = ia_item.get("storage_url") or ia_item["download_url"]
|
| 459 |
auth_img = download_image_to_pil(source_url)
|
| 460 |
|
|
|
|
| 461 |
images_urls: List[str] = []
|
| 462 |
signature_crops: List[str] = []
|
| 463 |
|
|
|
|
| 474 |
if mode == "knowledge":
|
| 475 |
# Use the same authentic visual for all three; differences come from metadata only
|
| 476 |
for idx in [2, 3]:
|
| 477 |
+
images_urls.append(images_urls[0]) # same URL is ok
|
| 478 |
signature_crops.append(signature_crops[0])
|
| 479 |
else:
|
| 480 |
+
# observation: two subtle variants (signature micro-geometry)
|
| 481 |
for i in range(2):
|
| 482 |
forg_prompt = """
|
| 483 |
Create a near-identical variant of the provided painting.
|
|
|
|
| 505 |
signature_crops.append(c_url)
|
| 506 |
|
| 507 |
# === Gemini: Case brief + 3 metadata bundles + ledger + solution ===
|
|
|
|
| 508 |
title = ia_item.get("title") or "Untitled"
|
| 509 |
creator = ia_item.get("creator") or ""
|
| 510 |
date = ia_item.get("date") or ""
|
|
|
|
| 527 |
1) Create a short, punchy "case_brief" (2–4 sentences) explaining why the artifact matters and why fraud is suspected — NO SPOILERS.
|
| 528 |
2) Prepare THREE metadata bundles for images A,B,C with NEARLY IDENTICAL fields.
|
| 529 |
Ensure exactly ONE bundle is AUTHENTIC and that it corresponds to the above authentic context.
|
| 530 |
+
The other two are FORGERIES with subtle, reality-checkable anomalies.
|
| 531 |
3) Provide a concise "ledger_summary" describing a believable ownership/payment trail.
|
| 532 |
4) Provide the solution with: "answer_index" (0 for A, 1 for B, 2 for C) and detailed flags for signature/metadata/financial, plus an "explanation".
|
| 533 |
|
|
|
|
| 566 |
except Exception:
|
| 567 |
cleaned = raw_text
|
| 568 |
if "```" in raw_text:
|
| 569 |
+
parts = raw_text.split("```")
|
| 570 |
+
if len(parts) >= 2:
|
| 571 |
+
cleaned = parts[1]
|
| 572 |
+
if cleaned.lower().startswith("json"):
|
| 573 |
+
cleaned = cleaned.split("\n", 1)[1]
|
| 574 |
meta_json = json.loads(cleaned)
|
| 575 |
|
| 576 |
case_brief = meta_json.get("case_brief", "A resurfaced portrait raises questions—its paper trail glitters a little too perfectly.")
|
|
|
|
| 593 |
"style_period": style_period,
|
| 594 |
"images": images_urls,
|
| 595 |
"signature_crops": signature_crops,
|
| 596 |
+
"metadata": metadata,
|
| 597 |
"ledger_summary": ledger_summary,
|
| 598 |
"timer_seconds": TIMER_SECONDS,
|
| 599 |
"initial_ip": INITIAL_IP,
|
|
|
|
| 621 |
return public
|
| 622 |
|
| 623 |
# -----------------------------------------------------------------------------
|
| 624 |
+
# 5) SESSIONS, TOOLS, GUESS, LEADERBOARD
|
| 625 |
# -----------------------------------------------------------------------------
|
| 626 |
def create_session(user_id: str, username: str, case_id: str) -> Dict[str, Any]:
|
| 627 |
session_id = str(uuid.uuid4())
|
|
|
|
| 691 |
leaderboard_ref(case_id).set(top)
|
| 692 |
|
| 693 |
# -----------------------------------------------------------------------------
|
| 694 |
+
# 6) ROUTES
|
| 695 |
# -----------------------------------------------------------------------------
|
| 696 |
@app.route("/health", methods=["GET"])
|
| 697 |
def health():
|
| 698 |
return jsonify({"ok": True, "time": datetime.now(timezone.utc).isoformat()})
|
| 699 |
|
| 700 |
+
# --- Admin: Internet Archive ingestion (manual) ---
|
| 701 |
@app.route("/admin/ingest-ia", methods=["POST"])
|
| 702 |
def admin_ingest_ia():
|
| 703 |
if not ADMIN_KEY or request.headers.get("X-Admin-Key") != ADMIN_KEY:
|
| 704 |
return jsonify({"error": "Forbidden"}), 403
|
| 705 |
|
| 706 |
body = request.get_json() or {}
|
| 707 |
+
query = body.get("query") or DEFAULT_IA_QUERY
|
|
|
|
| 708 |
pages = int(body.get("pages") or 2)
|
| 709 |
rows = int(body.get("rows") or 100)
|
| 710 |
ingested = 0
|
|
|
|
| 713 |
for page in range(1, pages + 1):
|
| 714 |
try:
|
| 715 |
docs = ia_advanced_search(query, rows=rows, page=page)
|
| 716 |
+
except Exception:
|
| 717 |
errors += 1
|
| 718 |
continue
|
| 719 |
for d in docs:
|
| 720 |
ident = d.get("identifier")
|
| 721 |
if not ident:
|
| 722 |
continue
|
|
|
|
| 723 |
if ia_pool_ref().child(ident).get():
|
| 724 |
continue
|
| 725 |
try:
|
|
|
|
| 733 |
pool_size = len(ia_pool_ref().get() or {})
|
| 734 |
return jsonify({"ok": True, "ingested": ingested, "errors": errors, "pool_size": pool_size})
|
| 735 |
|
| 736 |
+
# --- Admin: Cache IA images to Firebase Storage (manual) ---
|
| 737 |
+
@app.route("/admin/cache-ia", methods=["POST"])
|
| 738 |
+
def admin_cache_ia():
|
| 739 |
+
if not ADMIN_KEY or request.headers.get("X-Admin-Key") != ADMIN_KEY:
|
| 740 |
+
return jsonify({"error": "Forbidden"}), 403
|
| 741 |
+
|
| 742 |
+
cfg = request.get_json() or {}
|
| 743 |
+
out = batch_cache_ia_pool(
|
| 744 |
+
limit=int(cfg.get("limit", 100)),
|
| 745 |
+
overwrite=bool(cfg.get("overwrite", False)),
|
| 746 |
+
randomize=bool(cfg.get("randomize", True)),
|
| 747 |
+
min_width=int(cfg.get("min_width", 800)),
|
| 748 |
+
min_height=int(cfg.get("min_height", 800)),
|
| 749 |
+
max_dim=int(cfg.get("max_dim", 4096)),
|
| 750 |
+
jpeg_quality=int(cfg.get("jpeg_quality", 90)),
|
| 751 |
+
skip_if_restricted=bool(cfg.get("skip_if_restricted", True)),
|
| 752 |
+
)
|
| 753 |
+
return jsonify(out)
|
| 754 |
+
|
| 755 |
+
# --- Admin: pool stats ---
|
| 756 |
@app.route("/admin/ia-pool/stats", methods=["GET"])
|
| 757 |
def ia_pool_stats():
|
| 758 |
if not ADMIN_KEY or request.headers.get("X-Admin-Key") != ADMIN_KEY:
|
|
|
|
| 760 |
pool = ia_pool_ref().get() or {}
|
| 761 |
return jsonify({"pool_size": len(pool)})
|
| 762 |
|
| 763 |
+
# --- Admin: pre-generate today's case (manual) ---
|
| 764 |
@app.route("/admin/generate-today", methods=["POST"])
|
| 765 |
def admin_generate_today():
|
| 766 |
if not ADMIN_KEY or request.headers.get("X-Admin-Key") != ADMIN_KEY:
|
|
|
|
| 769 |
public = ensure_case_generated(case_id)
|
| 770 |
return jsonify({"generated": True, "case_id": case_id, "mode": public.get("mode")})
|
| 771 |
|
| 772 |
+
# --- DEV-ONLY: panic button bootstrap (no auth; gated by env) ---
|
| 773 |
+
@app.route("/admin/bootstrap-now", methods=["POST"])
|
| 774 |
+
def admin_bootstrap_now():
|
| 775 |
+
if not ALLOW_DEV_BOOTSTRAP:
|
| 776 |
+
return jsonify({"error": "Disabled. Set ALLOW_DEV_BOOTSTRAP=1 to enable."}), 403
|
| 777 |
+
cfg = request.get_json() or {}
|
| 778 |
+
min_items = int(cfg.get("min_items", MIN_IA_POOL))
|
| 779 |
+
rows = int(cfg.get("rows", 100))
|
| 780 |
+
max_pages = int(cfg.get("max_pages", 5))
|
| 781 |
+
try:
|
| 782 |
+
stats = ensure_minimum_ia_pool(min_items=min_items, rows=rows, max_pages=max_pages)
|
| 783 |
+
return jsonify({"ok": True, "stats": stats})
|
| 784 |
+
except Exception as e:
|
| 785 |
+
return jsonify({"ok": False, "error": str(e)}), 500
|
| 786 |
+
|
| 787 |
# --- Player flow ---
|
| 788 |
@app.route("/cases/today/start", methods=["POST"])
|
| 789 |
def start_case():
|
|
|
|
| 795 |
existing = sessions_ref().order_by_child("user_id").equal_to(user_id).get()
|
| 796 |
sess = None
|
| 797 |
if existing:
|
| 798 |
+
for _, sdoc in existing.items():
|
| 799 |
if sdoc.get("case_id") == case_id and sdoc.get("status") == "active":
|
| 800 |
sess = sdoc
|
| 801 |
break
|
| 802 |
if not sess:
|
| 803 |
sess = create_session(user_id, username, case_id)
|
| 804 |
|
| 805 |
+
return jsonify({"session_id": sess["session_id"], "case": public})
|
|
|
|
|
|
|
|
|
|
|
|
|
| 806 |
|
| 807 |
@app.route("/cases/<case_id>/tool/signature", methods=["POST"])
|
| 808 |
def tool_signature(case_id):
|
|
|
|
| 912 |
def leaderboard_daily():
|
| 913 |
case_id = utc_today_str()
|
| 914 |
top = leaderboard_ref(case_id).get() or []
|
| 915 |
+
user_id, _ = extract_user_from_headers(request)
|
| 916 |
me = plays_ref(case_id).child(user_id).get() or {}
|
| 917 |
rank = None
|
| 918 |
if top:
|
|
|
|
| 923 |
return jsonify({"case_id": case_id, "top": top, "me": {"score": me.get("score"), "rank": rank}})
|
| 924 |
|
| 925 |
# -----------------------------------------------------------------------------
|
| 926 |
+
# 7) MAIN
|
| 927 |
# -----------------------------------------------------------------------------
|
| 928 |
if __name__ == "__main__":
|
| 929 |
# Optional: pre-warm pool on boot so you’re ready before first request
|
| 930 |
if os.environ.get("BOOTSTRAP_IA", "1") == "1":
|
| 931 |
print("Bootstrapping Internet Archive pool...")
|
| 932 |
+
try:
|
| 933 |
+
stats = ensure_minimum_ia_pool()
|
| 934 |
+
print("Bootstrap complete:", stats)
|
| 935 |
+
except Exception as e:
|
| 936 |
+
print("Bootstrap failed:", e)
|
| 937 |
|
| 938 |
app.run(host="0.0.0.0", port=int(os.environ.get("PORT", "7860")), debug=True)
|
|
|