Spaces:
Sleeping
Sleeping
Update main.py
Browse files
main.py
CHANGED
|
@@ -4,7 +4,7 @@
|
|
| 4 |
# Optional envs: GAME_SALT, ADMIN_KEY, IA_USER_AGENT, MIN_IA_POOL, IA_QUERY,
|
| 5 |
# BOOTSTRAP_IA, LOG_LEVEL, ALLOW_DEV_BOOTSTRAP, ALLOW_DEV_DIAGNOSTICS
|
| 6 |
|
| 7 |
-
import os, io, uuid, json, hmac, hashlib, random, traceback, requests
|
| 8 |
from datetime import datetime, timedelta, timezone
|
| 9 |
from typing import Dict, Any, Tuple, List, Optional
|
| 10 |
|
|
@@ -55,7 +55,7 @@ try:
|
|
| 55 |
bucket = storage.bucket()
|
| 56 |
db_root = db.reference("/")
|
| 57 |
log.info("Firebase Realtime DB + Storage initialized.")
|
| 58 |
-
except Exception
|
| 59 |
log.exception("FATAL: Firebase init failed")
|
| 60 |
raise
|
| 61 |
|
|
@@ -66,7 +66,7 @@ try:
|
|
| 66 |
raise ValueError("The 'Gemini' environment variable is not set.")
|
| 67 |
client = genai.Client(api_key=GEMINI_API_KEY)
|
| 68 |
log.info("Gemini client initialized.")
|
| 69 |
-
except Exception
|
| 70 |
log.exception("FATAL: Gemini init failed")
|
| 71 |
raise
|
| 72 |
|
|
@@ -93,11 +93,8 @@ ALLOW_DEV_BOOTSTRAP = os.environ.get("ALLOW_DEV_BOOTSTRAP", "0") == "1"
|
|
| 93 |
ALLOW_DEV_DIAGNOSTICS = os.environ.get("ALLOW_DEV_DIAGNOSTICS", "0") == "1"
|
| 94 |
|
| 95 |
FALLBACK_IA_QUERIES = [
|
| 96 |
-
# 1) broad (fastest win)
|
| 97 |
'(mediatype:image AND (format:JPEG OR format:PNG))',
|
| 98 |
-
# 2) portraits bias (nice for the game)
|
| 99 |
'(mediatype:image AND (format:JPEG OR format:PNG) AND (subject:portrait OR title:portrait))',
|
| 100 |
-
# 3) slightly narrower but still broad
|
| 101 |
'(mediatype:image AND format:JPEG)',
|
| 102 |
]
|
| 103 |
|
|
@@ -125,6 +122,18 @@ def ia_pool_ref():
|
|
| 125 |
def hmac_hex(s: str) -> str:
|
| 126 |
return hmac.new(GAME_SALT.encode(), s.encode(), hashlib.sha256).hexdigest()
|
| 127 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 128 |
def upload_bytes_to_storage(data: bytes, path: str, content_type: str) -> str:
|
| 129 |
log.debug(f"Uploading to Storage: path={path}, content_type={content_type}, bytes={len(data)}")
|
| 130 |
blob = bucket.blob(path)
|
|
@@ -214,11 +223,12 @@ def ia_best_image_from_metadata(meta: dict) -> Optional[dict]:
|
|
| 214 |
return best
|
| 215 |
|
| 216 |
def ingest_ia_doc(doc: dict) -> Optional[dict]:
|
| 217 |
-
"""Fetch /metadata and store best image entry into ia_pool."""
|
| 218 |
identifier = doc.get("identifier")
|
| 219 |
if not identifier:
|
| 220 |
return None
|
| 221 |
-
|
|
|
|
| 222 |
meta = ia_metadata(identifier)
|
| 223 |
best = ia_best_image_from_metadata(meta)
|
| 224 |
if not best:
|
|
@@ -234,7 +244,8 @@ def ingest_ia_doc(doc: dict) -> Optional[dict]:
|
|
| 234 |
|
| 235 |
download_url = f"https://archive.org/download/{identifier}/{best['name']}"
|
| 236 |
record = {
|
| 237 |
-
"identifier": identifier,
|
|
|
|
| 238 |
"title": title,
|
| 239 |
"date": str(date),
|
| 240 |
"creator": creator,
|
|
@@ -248,8 +259,8 @@ def ingest_ia_doc(doc: dict) -> Optional[dict]:
|
|
| 248 |
"size": best.get("size"),
|
| 249 |
"source": "internet_archive"
|
| 250 |
}
|
| 251 |
-
ia_pool_ref().child(
|
| 252 |
-
log.info(f"Ingested {identifier} -> ia_pool (title='{title}')")
|
| 253 |
return record
|
| 254 |
|
| 255 |
def choose_ia_item_for_case(case_id: str) -> Optional[dict]:
|
|
@@ -257,11 +268,11 @@ def choose_ia_item_for_case(case_id: str) -> Optional[dict]:
|
|
| 257 |
if not pool:
|
| 258 |
log.warning("choose_ia_item_for_case: pool is empty")
|
| 259 |
return None
|
| 260 |
-
|
| 261 |
case_seed = seed_for_date(case_id)
|
| 262 |
-
|
| 263 |
-
log.info(f"Chosen IA
|
| 264 |
-
return pool[
|
| 265 |
|
| 266 |
def download_image_to_pil(url: str) -> Image.Image:
|
| 267 |
data = http_get_bytes(url)
|
|
@@ -295,30 +306,31 @@ def _resize_if_needed(img: Image.Image, max_dim: int = 4096) -> Image.Image:
|
|
| 295 |
return img.resize((new_w, new_h), Image.LANCZOS)
|
| 296 |
|
| 297 |
def cache_single_ia_identifier(
|
| 298 |
-
|
| 299 |
overwrite: bool = False,
|
| 300 |
max_dim: int = 4096,
|
| 301 |
jpeg_quality: int = 90,
|
| 302 |
skip_if_restricted: bool = True,
|
| 303 |
) -> dict:
|
| 304 |
-
rec_ref = ia_pool_ref().child(
|
| 305 |
rec = rec_ref.get() or {}
|
| 306 |
if not rec:
|
| 307 |
-
return {"
|
| 308 |
|
|
|
|
| 309 |
rights = (rec.get("rights") or "").lower()
|
| 310 |
if skip_if_restricted and ("in copyright" in rights or "all rights reserved" in rights):
|
| 311 |
log.info(f"Skipping {identifier}: restricted rights")
|
| 312 |
-
return {"
|
| 313 |
|
| 314 |
if rec.get("storage_url") and not overwrite:
|
| 315 |
log.info(f"Skipping {identifier}: already cached")
|
| 316 |
-
return {"
|
| 317 |
|
| 318 |
source_url = rec.get("storage_url") or rec.get("download_url")
|
| 319 |
if not source_url:
|
| 320 |
log.warning(f"{identifier}: missing source_url")
|
| 321 |
-
return {"
|
| 322 |
|
| 323 |
try:
|
| 324 |
log.info(f"Caching {identifier} from {source_url}")
|
|
@@ -330,10 +342,10 @@ def cache_single_ia_identifier(
|
|
| 330 |
img = download_image_to_pil(rec["download_url"])
|
| 331 |
except Exception as e2:
|
| 332 |
log.exception(f"{identifier}: download failed")
|
| 333 |
-
return {"
|
| 334 |
else:
|
| 335 |
log.exception(f"{identifier}: download failed")
|
| 336 |
-
return {"
|
| 337 |
|
| 338 |
img = _resize_if_needed(img, max_dim=max_dim)
|
| 339 |
w, h = img.size
|
|
@@ -342,7 +354,7 @@ def cache_single_ia_identifier(
|
|
| 342 |
img_bytes = io.BytesIO()
|
| 343 |
img.save(img_bytes, format="JPEG", quality=jpeg_quality, optimize=True)
|
| 344 |
img_bytes.seek(0)
|
| 345 |
-
img_path = f"ia_cache/{
|
| 346 |
storage_url = upload_bytes_to_storage(img_bytes.getvalue(), img_path, "image/jpeg")
|
| 347 |
|
| 348 |
# Upload macro crop
|
|
@@ -350,7 +362,7 @@ def cache_single_ia_identifier(
|
|
| 350 |
crop_bytes = io.BytesIO()
|
| 351 |
crop.save(crop_bytes, format="JPEG", quality=jpeg_quality, optimize=True)
|
| 352 |
crop_bytes.seek(0)
|
| 353 |
-
crop_path = f"ia_cache/{
|
| 354 |
signature_crop_url = upload_bytes_to_storage(crop_bytes.getvalue(), crop_path, "image/jpeg")
|
| 355 |
|
| 356 |
rec_update = {
|
|
@@ -366,7 +378,7 @@ def cache_single_ia_identifier(
|
|
| 366 |
log.info(f"Cached {identifier} -> {storage_url}")
|
| 367 |
|
| 368 |
return {
|
| 369 |
-
"
|
| 370 |
"stored": True,
|
| 371 |
"storage_url": storage_url,
|
| 372 |
"signature_crop_url": signature_crop_url,
|
|
@@ -390,14 +402,14 @@ def batch_cache_ia_pool(
|
|
| 390 |
return {"ok": True, "processed": 0, "stored": 0, "skipped": 0, "results": []}
|
| 391 |
|
| 392 |
candidates = []
|
| 393 |
-
for
|
| 394 |
if overwrite or not rec.get("storage_url"):
|
| 395 |
w = int(rec.get("width") or 0)
|
| 396 |
h = int(rec.get("height") or 0)
|
| 397 |
if (w and h) and (w < min_width or h < min_height):
|
| 398 |
-
log.debug(f"Skip {
|
| 399 |
continue
|
| 400 |
-
candidates.append(
|
| 401 |
|
| 402 |
if randomize:
|
| 403 |
random.shuffle(candidates)
|
|
@@ -405,9 +417,9 @@ def batch_cache_ia_pool(
|
|
| 405 |
log.info(f"Caching candidates: {len(candidates)} (limit={limit})")
|
| 406 |
|
| 407 |
results, stored, skipped = [], 0, 0
|
| 408 |
-
for
|
| 409 |
res = cache_single_ia_identifier(
|
| 410 |
-
|
| 411 |
overwrite=overwrite,
|
| 412 |
max_dim=max_dim,
|
| 413 |
jpeg_quality=jpeg_quality,
|
|
@@ -429,13 +441,11 @@ def ensure_minimum_ia_pool(min_items: int = MIN_IA_POOL, rows: int = 100, max_pa
|
|
| 429 |
cached = 0
|
| 430 |
log.info(f"ensure_minimum_ia_pool: have={have}, target={min_items}")
|
| 431 |
|
| 432 |
-
# Decide which queries to try: env IA_QUERY first, then fallbacks
|
| 433 |
candidate_queries = []
|
| 434 |
if DEFAULT_IA_QUERY:
|
| 435 |
candidate_queries.append(DEFAULT_IA_QUERY)
|
| 436 |
candidate_queries.extend([q for q in FALLBACK_IA_QUERIES if q not in candidate_queries])
|
| 437 |
|
| 438 |
-
# Ingest until we reach the target or run out of queries/pages
|
| 439 |
for q in candidate_queries:
|
| 440 |
if have + added >= min_items:
|
| 441 |
break
|
|
@@ -454,7 +464,7 @@ def ensure_minimum_ia_pool(min_items: int = MIN_IA_POOL, rows: int = 100, max_pa
|
|
| 454 |
ident = d.get("identifier")
|
| 455 |
if not ident:
|
| 456 |
continue
|
| 457 |
-
if ia_pool_ref().child(ident).get():
|
| 458 |
continue
|
| 459 |
try:
|
| 460 |
rec = ingest_ia_doc(d)
|
|
@@ -467,7 +477,6 @@ def ensure_minimum_ia_pool(min_items: int = MIN_IA_POOL, rows: int = 100, max_pa
|
|
| 467 |
break
|
| 468 |
page += 1
|
| 469 |
|
| 470 |
-
# Cache up to min_items (unchanged)
|
| 471 |
pool = ia_pool_ref().get() or {}
|
| 472 |
have_now = len(pool)
|
| 473 |
need_cache = max(0, min_items - have_now)
|
|
@@ -481,7 +490,6 @@ def ensure_minimum_ia_pool(min_items: int = MIN_IA_POOL, rows: int = 100, max_pa
|
|
| 481 |
log.info(f"ensure_minimum_ia_pool: stats={stats}")
|
| 482 |
return stats
|
| 483 |
|
| 484 |
-
|
| 485 |
# -----------------------------------------------------------------------------
|
| 486 |
# 4) CASE GENERATION (uses IA for authentic image, Gemini for forgeries/meta)
|
| 487 |
# -----------------------------------------------------------------------------
|
|
@@ -498,19 +506,16 @@ def ensure_case_generated(case_id: str) -> Dict[str, Any]:
|
|
| 498 |
except Exception:
|
| 499 |
log.exception("Bootstrap failed inside ensure_case_generated")
|
| 500 |
|
| 501 |
-
# Pick authentic from ia_pool deterministically
|
| 502 |
ia_item = choose_ia_item_for_case(case_id)
|
| 503 |
if not ia_item:
|
| 504 |
raise RuntimeError("No IA items available. Ingest needed.")
|
| 505 |
|
| 506 |
-
# Deterministic mode
|
| 507 |
case_seed = seed_for_date(case_id)
|
| 508 |
mode = "knowledge" if (case_seed % 2 == 0) else "observation"
|
| 509 |
log.info(f"Case {case_id}: mode={mode}")
|
| 510 |
|
| 511 |
style_period = "sourced from Internet Archive; museum catalog reproduction"
|
| 512 |
|
| 513 |
-
# Load authentic image (prefer cached)
|
| 514 |
source_url = ia_item.get("storage_url") or ia_item["download_url"]
|
| 515 |
log.info(f"Case {case_id}: authentic source={source_url}")
|
| 516 |
auth_img = download_image_to_pil(source_url)
|
|
@@ -518,19 +523,17 @@ def ensure_case_generated(case_id: str) -> Dict[str, Any]:
|
|
| 518 |
images_urls: List[str] = []
|
| 519 |
signature_crops: List[str] = []
|
| 520 |
|
| 521 |
-
# Save authentic as image #1
|
| 522 |
url1 = save_image_return_url(auth_img, f"hidden_stroke/{case_id}/images/img_1.jpg")
|
| 523 |
images_urls.append(url1)
|
| 524 |
log.debug(f"Case {case_id}: saved authentic -> {url1}")
|
| 525 |
|
| 526 |
-
# Macro crop for signature area
|
| 527 |
crop1 = crop_signature_macro(auth_img, 512)
|
| 528 |
crop1_url = save_image_return_url(crop1, f"hidden_stroke/{case_id}/signature_crops/crop_1.jpg", quality=88)
|
| 529 |
signature_crops.append(crop1_url)
|
| 530 |
log.debug(f"Case {case_id}: saved authentic crop -> {crop1_url}")
|
| 531 |
|
| 532 |
if mode == "knowledge":
|
| 533 |
-
for
|
| 534 |
images_urls.append(images_urls[0])
|
| 535 |
signature_crops.append(signature_crops[0])
|
| 536 |
else:
|
|
@@ -563,7 +566,6 @@ No annotations. Differences must be visible only at macro zoom.
|
|
| 563 |
signature_crops.append(c_url)
|
| 564 |
log.debug(f"Case {case_id}: forgery saved -> {url}; crop -> {c_url}")
|
| 565 |
|
| 566 |
-
# === Gemini: Case brief + metadata + ledger + solution ===
|
| 567 |
title = ia_item.get("title") or "Untitled"
|
| 568 |
creator = ia_item.get("creator") or ""
|
| 569 |
date = ia_item.get("date") or ""
|
|
@@ -780,7 +782,7 @@ def admin_ingest_ia():
|
|
| 780 |
ident = d.get("identifier")
|
| 781 |
if not ident:
|
| 782 |
continue
|
| 783 |
-
if ia_pool_ref().child(ident).get():
|
| 784 |
continue
|
| 785 |
try:
|
| 786 |
rec = ingest_ia_doc(d)
|
|
@@ -858,7 +860,6 @@ def admin_bootstrap_now():
|
|
| 858 |
finally:
|
| 859 |
DEFAULT_IA_QUERY = original_q # restore
|
| 860 |
|
| 861 |
-
|
| 862 |
# --- DEV-ONLY: diagnostics (network + firebase sanity) ---
|
| 863 |
@app.route("/admin/diagnostics", methods=["GET"])
|
| 864 |
def diagnostics():
|
|
@@ -883,7 +884,6 @@ def diagnostics():
|
|
| 883 |
except Exception as e:
|
| 884 |
diag["ia"]["error"] = str(e)
|
| 885 |
|
| 886 |
-
# Try a tiny upload
|
| 887 |
try:
|
| 888 |
tiny = upload_bytes_to_storage(b"ping", f"diag/ping_{uuid.uuid4().hex}.txt", "text/plain")
|
| 889 |
diag["firebase"]["upload_test"] = tiny
|
|
@@ -899,7 +899,6 @@ def start_case():
|
|
| 899 |
case_id = utc_today_str()
|
| 900 |
public = ensure_case_generated(case_id)
|
| 901 |
|
| 902 |
-
# Create/reuse an active session for this user+case
|
| 903 |
existing = sessions_ref().order_by_child("user_id").equal_to(user_id).get()
|
| 904 |
sess = None
|
| 905 |
if existing:
|
|
|
|
| 4 |
# Optional envs: GAME_SALT, ADMIN_KEY, IA_USER_AGENT, MIN_IA_POOL, IA_QUERY,
|
| 5 |
# BOOTSTRAP_IA, LOG_LEVEL, ALLOW_DEV_BOOTSTRAP, ALLOW_DEV_DIAGNOSTICS
|
| 6 |
|
| 7 |
+
import os, io, uuid, json, hmac, hashlib, random, traceback, requests, re, hashlib as _hash
|
| 8 |
from datetime import datetime, timedelta, timezone
|
| 9 |
from typing import Dict, Any, Tuple, List, Optional
|
| 10 |
|
|
|
|
| 55 |
bucket = storage.bucket()
|
| 56 |
db_root = db.reference("/")
|
| 57 |
log.info("Firebase Realtime DB + Storage initialized.")
|
| 58 |
+
except Exception:
|
| 59 |
log.exception("FATAL: Firebase init failed")
|
| 60 |
raise
|
| 61 |
|
|
|
|
| 66 |
raise ValueError("The 'Gemini' environment variable is not set.")
|
| 67 |
client = genai.Client(api_key=GEMINI_API_KEY)
|
| 68 |
log.info("Gemini client initialized.")
|
| 69 |
+
except Exception:
|
| 70 |
log.exception("FATAL: Gemini init failed")
|
| 71 |
raise
|
| 72 |
|
|
|
|
| 93 |
ALLOW_DEV_DIAGNOSTICS = os.environ.get("ALLOW_DEV_DIAGNOSTICS", "0") == "1"
|
| 94 |
|
| 95 |
FALLBACK_IA_QUERIES = [
|
|
|
|
| 96 |
'(mediatype:image AND (format:JPEG OR format:PNG))',
|
|
|
|
| 97 |
'(mediatype:image AND (format:JPEG OR format:PNG) AND (subject:portrait OR title:portrait))',
|
|
|
|
| 98 |
'(mediatype:image AND format:JPEG)',
|
| 99 |
]
|
| 100 |
|
|
|
|
| 122 |
def hmac_hex(s: str) -> str:
|
| 123 |
return hmac.new(GAME_SALT.encode(), s.encode(), hashlib.sha256).hexdigest()
|
| 124 |
|
| 125 |
+
# Firebase RTDB key sanitizer (no . $ # [ ] / or control chars)
|
| 126 |
+
_FB_BAD = re.compile(r'[.$#\[\]/\x00-\x1F\x7F]')
|
| 127 |
+
|
| 128 |
+
def fb_key(raw: str) -> str:
|
| 129 |
+
safe = _FB_BAD.sub('_', raw or '')
|
| 130 |
+
if len(safe) > 700:
|
| 131 |
+
safe = safe[:700]
|
| 132 |
+
if safe != raw:
|
| 133 |
+
suffix = _hash.sha1((raw or '').encode('utf-8')).hexdigest()[:8]
|
| 134 |
+
safe = f"{safe}__{suffix}"
|
| 135 |
+
return safe or _hash.sha1(b'empty').hexdigest()[:8]
|
| 136 |
+
|
| 137 |
def upload_bytes_to_storage(data: bytes, path: str, content_type: str) -> str:
|
| 138 |
log.debug(f"Uploading to Storage: path={path}, content_type={content_type}, bytes={len(data)}")
|
| 139 |
blob = bucket.blob(path)
|
|
|
|
| 223 |
return best
|
| 224 |
|
| 225 |
def ingest_ia_doc(doc: dict) -> Optional[dict]:
|
| 226 |
+
"""Fetch /metadata and store best image entry into ia_pool (sanitized key)."""
|
| 227 |
identifier = doc.get("identifier")
|
| 228 |
if not identifier:
|
| 229 |
return None
|
| 230 |
+
pool_key = fb_key(identifier)
|
| 231 |
+
log.info(f"Ingesting IA identifier={identifier} -> pool_key={pool_key}")
|
| 232 |
meta = ia_metadata(identifier)
|
| 233 |
best = ia_best_image_from_metadata(meta)
|
| 234 |
if not best:
|
|
|
|
| 244 |
|
| 245 |
download_url = f"https://archive.org/download/{identifier}/{best['name']}"
|
| 246 |
record = {
|
| 247 |
+
"identifier": identifier, # original IA id preserved
|
| 248 |
+
"_pool_key": pool_key, # sanitized RTDB key
|
| 249 |
"title": title,
|
| 250 |
"date": str(date),
|
| 251 |
"creator": creator,
|
|
|
|
| 259 |
"size": best.get("size"),
|
| 260 |
"source": "internet_archive"
|
| 261 |
}
|
| 262 |
+
ia_pool_ref().child(pool_key).set(record)
|
| 263 |
+
log.info(f"Ingested {identifier} -> ia_pool/{pool_key} (title='{title}')")
|
| 264 |
return record
|
| 265 |
|
| 266 |
def choose_ia_item_for_case(case_id: str) -> Optional[dict]:
|
|
|
|
| 268 |
if not pool:
|
| 269 |
log.warning("choose_ia_item_for_case: pool is empty")
|
| 270 |
return None
|
| 271 |
+
keys = sorted(pool.keys())
|
| 272 |
case_seed = seed_for_date(case_id)
|
| 273 |
+
pool_key = keys[case_seed % len(keys)]
|
| 274 |
+
log.info(f"Chosen IA pool_key for case {case_id}: {pool_key}")
|
| 275 |
+
return pool[pool_key]
|
| 276 |
|
| 277 |
def download_image_to_pil(url: str) -> Image.Image:
|
| 278 |
data = http_get_bytes(url)
|
|
|
|
| 306 |
return img.resize((new_w, new_h), Image.LANCZOS)
|
| 307 |
|
| 308 |
def cache_single_ia_identifier(
|
| 309 |
+
pool_key: str,
|
| 310 |
overwrite: bool = False,
|
| 311 |
max_dim: int = 4096,
|
| 312 |
jpeg_quality: int = 90,
|
| 313 |
skip_if_restricted: bool = True,
|
| 314 |
) -> dict:
|
| 315 |
+
rec_ref = ia_pool_ref().child(pool_key)
|
| 316 |
rec = rec_ref.get() or {}
|
| 317 |
if not rec:
|
| 318 |
+
return {"pool_key": pool_key, "stored": False, "reason": "not_in_pool"}
|
| 319 |
|
| 320 |
+
identifier = rec.get("identifier") or pool_key
|
| 321 |
rights = (rec.get("rights") or "").lower()
|
| 322 |
if skip_if_restricted and ("in copyright" in rights or "all rights reserved" in rights):
|
| 323 |
log.info(f"Skipping {identifier}: restricted rights")
|
| 324 |
+
return {"pool_key": pool_key, "stored": False, "reason": "restricted_rights"}
|
| 325 |
|
| 326 |
if rec.get("storage_url") and not overwrite:
|
| 327 |
log.info(f"Skipping {identifier}: already cached")
|
| 328 |
+
return {"pool_key": pool_key, "stored": False, "reason": "already_cached", "storage_url": rec["storage_url"]}
|
| 329 |
|
| 330 |
source_url = rec.get("storage_url") or rec.get("download_url")
|
| 331 |
if not source_url:
|
| 332 |
log.warning(f"{identifier}: missing source_url")
|
| 333 |
+
return {"pool_key": pool_key, "stored": False, "reason": "missing_source_url"}
|
| 334 |
|
| 335 |
try:
|
| 336 |
log.info(f"Caching {identifier} from {source_url}")
|
|
|
|
| 342 |
img = download_image_to_pil(rec["download_url"])
|
| 343 |
except Exception as e2:
|
| 344 |
log.exception(f"{identifier}: download failed")
|
| 345 |
+
return {"pool_key": pool_key, "stored": False, "reason": f"download_failed: {e2}"}
|
| 346 |
else:
|
| 347 |
log.exception(f"{identifier}: download failed")
|
| 348 |
+
return {"pool_key": pool_key, "stored": False, "reason": f"download_failed: {e}"}
|
| 349 |
|
| 350 |
img = _resize_if_needed(img, max_dim=max_dim)
|
| 351 |
w, h = img.size
|
|
|
|
| 354 |
img_bytes = io.BytesIO()
|
| 355 |
img.save(img_bytes, format="JPEG", quality=jpeg_quality, optimize=True)
|
| 356 |
img_bytes.seek(0)
|
| 357 |
+
img_path = f"ia_cache/{pool_key}/original.jpg"
|
| 358 |
storage_url = upload_bytes_to_storage(img_bytes.getvalue(), img_path, "image/jpeg")
|
| 359 |
|
| 360 |
# Upload macro crop
|
|
|
|
| 362 |
crop_bytes = io.BytesIO()
|
| 363 |
crop.save(crop_bytes, format="JPEG", quality=jpeg_quality, optimize=True)
|
| 364 |
crop_bytes.seek(0)
|
| 365 |
+
crop_path = f"ia_cache/{pool_key}/signature_crop.jpg"
|
| 366 |
signature_crop_url = upload_bytes_to_storage(crop_bytes.getvalue(), crop_path, "image/jpeg")
|
| 367 |
|
| 368 |
rec_update = {
|
|
|
|
| 378 |
log.info(f"Cached {identifier} -> {storage_url}")
|
| 379 |
|
| 380 |
return {
|
| 381 |
+
"pool_key": pool_key,
|
| 382 |
"stored": True,
|
| 383 |
"storage_url": storage_url,
|
| 384 |
"signature_crop_url": signature_crop_url,
|
|
|
|
| 402 |
return {"ok": True, "processed": 0, "stored": 0, "skipped": 0, "results": []}
|
| 403 |
|
| 404 |
candidates = []
|
| 405 |
+
for pkey, rec in pool.items():
|
| 406 |
if overwrite or not rec.get("storage_url"):
|
| 407 |
w = int(rec.get("width") or 0)
|
| 408 |
h = int(rec.get("height") or 0)
|
| 409 |
if (w and h) and (w < min_width or h < min_height):
|
| 410 |
+
log.debug(f"Skip {pkey}: too small {w}x{h}")
|
| 411 |
continue
|
| 412 |
+
candidates.append(pkey)
|
| 413 |
|
| 414 |
if randomize:
|
| 415 |
random.shuffle(candidates)
|
|
|
|
| 417 |
log.info(f"Caching candidates: {len(candidates)} (limit={limit})")
|
| 418 |
|
| 419 |
results, stored, skipped = [], 0, 0
|
| 420 |
+
for pkey in candidates:
|
| 421 |
res = cache_single_ia_identifier(
|
| 422 |
+
pkey,
|
| 423 |
overwrite=overwrite,
|
| 424 |
max_dim=max_dim,
|
| 425 |
jpeg_quality=jpeg_quality,
|
|
|
|
| 441 |
cached = 0
|
| 442 |
log.info(f"ensure_minimum_ia_pool: have={have}, target={min_items}")
|
| 443 |
|
|
|
|
| 444 |
candidate_queries = []
|
| 445 |
if DEFAULT_IA_QUERY:
|
| 446 |
candidate_queries.append(DEFAULT_IA_QUERY)
|
| 447 |
candidate_queries.extend([q for q in FALLBACK_IA_QUERIES if q not in candidate_queries])
|
| 448 |
|
|
|
|
| 449 |
for q in candidate_queries:
|
| 450 |
if have + added >= min_items:
|
| 451 |
break
|
|
|
|
| 464 |
ident = d.get("identifier")
|
| 465 |
if not ident:
|
| 466 |
continue
|
| 467 |
+
if ia_pool_ref().child(fb_key(ident)).get():
|
| 468 |
continue
|
| 469 |
try:
|
| 470 |
rec = ingest_ia_doc(d)
|
|
|
|
| 477 |
break
|
| 478 |
page += 1
|
| 479 |
|
|
|
|
| 480 |
pool = ia_pool_ref().get() or {}
|
| 481 |
have_now = len(pool)
|
| 482 |
need_cache = max(0, min_items - have_now)
|
|
|
|
| 490 |
log.info(f"ensure_minimum_ia_pool: stats={stats}")
|
| 491 |
return stats
|
| 492 |
|
|
|
|
| 493 |
# -----------------------------------------------------------------------------
|
| 494 |
# 4) CASE GENERATION (uses IA for authentic image, Gemini for forgeries/meta)
|
| 495 |
# -----------------------------------------------------------------------------
|
|
|
|
| 506 |
except Exception:
|
| 507 |
log.exception("Bootstrap failed inside ensure_case_generated")
|
| 508 |
|
|
|
|
| 509 |
ia_item = choose_ia_item_for_case(case_id)
|
| 510 |
if not ia_item:
|
| 511 |
raise RuntimeError("No IA items available. Ingest needed.")
|
| 512 |
|
|
|
|
| 513 |
case_seed = seed_for_date(case_id)
|
| 514 |
mode = "knowledge" if (case_seed % 2 == 0) else "observation"
|
| 515 |
log.info(f"Case {case_id}: mode={mode}")
|
| 516 |
|
| 517 |
style_period = "sourced from Internet Archive; museum catalog reproduction"
|
| 518 |
|
|
|
|
| 519 |
source_url = ia_item.get("storage_url") or ia_item["download_url"]
|
| 520 |
log.info(f"Case {case_id}: authentic source={source_url}")
|
| 521 |
auth_img = download_image_to_pil(source_url)
|
|
|
|
| 523 |
images_urls: List[str] = []
|
| 524 |
signature_crops: List[str] = []
|
| 525 |
|
|
|
|
| 526 |
url1 = save_image_return_url(auth_img, f"hidden_stroke/{case_id}/images/img_1.jpg")
|
| 527 |
images_urls.append(url1)
|
| 528 |
log.debug(f"Case {case_id}: saved authentic -> {url1}")
|
| 529 |
|
|
|
|
| 530 |
crop1 = crop_signature_macro(auth_img, 512)
|
| 531 |
crop1_url = save_image_return_url(crop1, f"hidden_stroke/{case_id}/signature_crops/crop_1.jpg", quality=88)
|
| 532 |
signature_crops.append(crop1_url)
|
| 533 |
log.debug(f"Case {case_id}: saved authentic crop -> {crop1_url}")
|
| 534 |
|
| 535 |
if mode == "knowledge":
|
| 536 |
+
for _ in [2, 3]:
|
| 537 |
images_urls.append(images_urls[0])
|
| 538 |
signature_crops.append(signature_crops[0])
|
| 539 |
else:
|
|
|
|
| 566 |
signature_crops.append(c_url)
|
| 567 |
log.debug(f"Case {case_id}: forgery saved -> {url}; crop -> {c_url}")
|
| 568 |
|
|
|
|
| 569 |
title = ia_item.get("title") or "Untitled"
|
| 570 |
creator = ia_item.get("creator") or ""
|
| 571 |
date = ia_item.get("date") or ""
|
|
|
|
| 782 |
ident = d.get("identifier")
|
| 783 |
if not ident:
|
| 784 |
continue
|
| 785 |
+
if ia_pool_ref().child(fb_key(ident)).get():
|
| 786 |
continue
|
| 787 |
try:
|
| 788 |
rec = ingest_ia_doc(d)
|
|
|
|
| 860 |
finally:
|
| 861 |
DEFAULT_IA_QUERY = original_q # restore
|
| 862 |
|
|
|
|
| 863 |
# --- DEV-ONLY: diagnostics (network + firebase sanity) ---
|
| 864 |
@app.route("/admin/diagnostics", methods=["GET"])
|
| 865 |
def diagnostics():
|
|
|
|
| 884 |
except Exception as e:
|
| 885 |
diag["ia"]["error"] = str(e)
|
| 886 |
|
|
|
|
| 887 |
try:
|
| 888 |
tiny = upload_bytes_to_storage(b"ping", f"diag/ping_{uuid.uuid4().hex}.txt", "text/plain")
|
| 889 |
diag["firebase"]["upload_test"] = tiny
|
|
|
|
| 899 |
case_id = utc_today_str()
|
| 900 |
public = ensure_case_generated(case_id)
|
| 901 |
|
|
|
|
| 902 |
existing = sessions_ref().order_by_child("user_id").equal_to(user_id).get()
|
| 903 |
sess = None
|
| 904 |
if existing:
|