Spaces:

q6
/

P2

Running

Show ten explorer searches per page

19ce679 about 8 hours ago

35.3 kB

	import asyncio
	import io
	import json
	import os
	import tempfile
	import time
	from pathlib import Path
	from urllib.parse import parse_qs, parse_qsl, quote, unquote, urlencode, urlsplit

	try:
	from dotenv import load_dotenv

	load_dotenv(Path(__file__).resolve().parent.parent / ".env")
	except ImportError:
	pass

	import aiohttp
	import httpx
	import numpy as np
	from fastapi import BackgroundTasks, FastAPI, HTTPException, Request
	from fastapi.responses import FileResponse, Response, StreamingResponse
	from fastapi.staticfiles import StaticFiles
	from PIL import Image
	from pydantic import BaseModel

	TURSO_DB_URL = os.getenv("TURSO_DB_URL", "").strip()
	TURSO_AUTH_TOKEN = os.getenv("TURSO_AUTH_TOKEN_WRITE", "").strip()
	DISCORD_WEBHOOK_URL = os.getenv("DISCORD_WEBHOOK_URL", "").strip()
	PHPSESSID = os.getenv("PHPSESSID", "")

	IMG_BASE = "https://i.pximg.net/img-original/img/"


	PIXIV_HEADERS = {
	"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:126.0) Gecko/20100101 Firefox/126.0",
	"referer": "https://www.pixiv.net/",
	}
	AI_TAGS = {
	"stablediffusion",
	"ai-generated",
	"novelai",
	"novelaidiffusionai",
	"aiart",
	"ai",
	"comfyui",
	}
	EXIF_TYPE_ORDER = ("novelai", "sd", "comfy", "mj", "celsys", "photoshop", "stealth")
	EXIF_TYPE_TO_CODE = {name: idx + 1 for idx, name in enumerate(EXIF_TYPE_ORDER)}
	POST_SCAN_LIMIT = 64
	EXIF_RANGE_LIMIT = 96
	FULL_IMAGE_LIMIT = 32
	PAGE_SIZE = 60
	SEARCH_PAGE_SIZE = 10
	THUMB_MAX_AGE = 1800
	THUMB_DIR = Path(tempfile.gettempdir()) / "pixif2-thumbs"
	PAGE_URL_CACHE_MAX_AGE = 1800
	WEBP_SCALE = 0.4
	WEBP_QUALITY = 82

	app = FastAPI()
	ACTIVE_TASKS = {}
	TASK_EVENT_QUEUES = set()
	PAGE_URL_CACHE = {}

	FRONTEND_DIR = os.path.join(os.getcwd(), "frontend")


	def base26_time():
	x = ""
	n = int(time.time() * 100)
	while n:
	x = chr(97 + n % 26) + x
	n //= 26
	return x


	def base26_to_time(value):
	n = 0
	for c in value:
	if c < "a" or c > "z":
	return 0
	n = n * 26 + ord(c) - 97
	return n // 100


	def turso_url():
	base = TURSO_DB_URL.rstrip("/")
	if not base.startswith("http"):
	base = "https://" + base
	return base


	async def turso_execute(stmts):
	url = turso_url() + "/v2/pipeline"
	headers = {
	"Authorization": f"Bearer {TURSO_AUTH_TOKEN}",
	"Content-Type": "application/json",
	}
	body = {
	"requests": [{"type": "execute", "stmt": s} for s in stmts]
	+ [{"type": "close"}]
	}
	async with httpx.AsyncClient(timeout=60) as client:
	r = await client.post(url, json=body, headers=headers)
	r.raise_for_status()
	return r.json()


	async def turso_batch(stmts):
	url = turso_url() + "/v2/pipeline"
	headers = {
	"Authorization": f"Bearer {TURSO_AUTH_TOKEN}",
	"Content-Type": "application/json",
	}
	body = {
	"requests": [{"type": "execute", "stmt": s} for s in stmts]
	+ [{"type": "close"}]
	}
	async with httpx.AsyncClient(timeout=120) as client:
	r = await client.post(url, json=body, headers=headers)
	r.raise_for_status()
	return r.json()


	async def init_db():
	await turso_execute(
	[
	{
	"sql": "CREATE TABLE IF NOT EXISTS pi_searches (id TEXT PRIMARY KEY, post_ids TEXT)"
	},
	{
	"sql": "CREATE TABLE IF NOT EXISTS pi_scans (post_id TEXT PRIMARY KEY, url TEXT, exif_type INTEGER)"
	},
	]
	)
	for sql in (
	"DROP INDEX IF EXISTS pi_searches_created_at_idx",
	"DROP INDEX IF EXISTS pi_search_posts_search_pos_idx",
	"DROP INDEX IF EXISTS pi_search_posts_post_idx",
	"DROP TABLE IF EXISTS pi_search_posts",
	"ALTER TABLE pi_searches DROP COLUMN api_url",
	"ALTER TABLE pi_searches DROP COLUMN created_at",
	):
	try:
	await turso_execute([{"sql": sql}])
	except httpx.HTTPStatusError as e:
	text = e.response.text.casefold()
	if "no such column" not in text and "no such index" not in text:
	raise


	async def discord_notify(msg):
	if not DISCORD_WEBHOOK_URL:
	print("WARN: DISCORD_WEBHOOK_URL not set, skipping notify")
	return
	try:
	async with aiohttp.ClientSession() as session:
	async with session.post(
	DISCORD_WEBHOOK_URL.rstrip("/") + "/webhook-forward",
	json={"content": msg},
	timeout=aiohttp.ClientTimeout(total=15),
	) as r:
	if r.status >= 400:
	body = await r.text()
	print(f"Discord webhook failed ({r.status}): {body}")
	except Exception as e:
	print(f"Discord webhook error: {repr(e)}")


	async def publish_task_event(search_id):
	data = {"id": search_id, "at": int(time.time())}
	for queue in list(TASK_EVENT_QUEUES):
	if queue.full():
	try:
	queue.get_nowait()
	except asyncio.QueueEmpty:
	pass
	queue.put_nowait(data)


	async def finish_task(search_id):
	ACTIVE_TASKS.pop(search_id, None)
	await publish_task_event(search_id)


	def is_ai_post(post):
	if post.get("aiType") == 2:
	return True
	tags = post.get("tags") or []
	for tag in tags:
	name = (
	tag
	if isinstance(tag, str)
	else (tag.get("tag") or tag.get("name") or "")
	if isinstance(tag, dict)
	else ""
	)
	if name and name.casefold() in AI_TAGS:
	return True
	return False


	def get_search_keywords(raw):
	parts = urlsplit(raw)
	path_parts = [unquote(p) for p in parts.path.split("/") if p]
	if "tags" in path_parts:
	idx = path_parts.index("tags") + 1
	if idx < len(path_parts):
	return path_parts[idx]
	query = parse_qs(parts.query)
	words = query.get("word") or query.get("q")
	if words:
	return words[0]
	return raw.strip()


	def get_search_params(raw, keywords):
	params = []
	for key, value in parse_qsl(urlsplit(raw).query, keep_blank_values=True):
	if key in ("p", "q", "word", "type"):
	continue
	if key == "s_mode":
	if value == "tag":
	value = "s_tag"
	elif value == "tag_full":
	value = "s_tag_full"
	params.append((key, value))
	params.append(("word", keywords))
	if not any(k == "s_mode" for k, _ in params):
	params.append(("s_mode", "s_tag"))
	return urlencode(params)


	def get_search_api_url(raw, keywords):
	encoded = quote(keywords, safe="")
	params = get_search_params(raw, keywords)
	return f"https://www.pixiv.net/ajax/search/artworks/{encoded}?{params}"


	async def save_search(search_id, post_ids):
	if not post_ids:
	return
	stmt = {
	"sql": "INSERT OR REPLACE INTO pi_searches (id, post_ids) VALUES (?, ?)",
	"args": [
	{"type": "text", "value": search_id},
	{"type": "text", "value": json.dumps(post_ids)},
	],
	}
	await turso_execute([stmt])


	def user_search_id(user_id, username):
	label = str(username or user_id).strip() or str(user_id)
	return f"{base26_time()}_{label}"


	async def pixiv_user_name(user_id, session):
	data = await fetch_page(session, f"https://www.pixiv.net/ajax/user/{user_id}")
	body = data.get("body") or {}
	return (body.get("name") or body.get("account") or "").strip()


	async def pixiv_user_names(user_ids, phpsessid):
	cookies = {"PHPSESSID": phpsessid}
	async with aiohttp.ClientSession(cookies=cookies, headers=PIXIV_HEADERS) as session:
	async def load_name(uid):
	try:
	return uid, await pixiv_user_name(uid, session)
	except Exception:
	return uid, ""

	return dict(await asyncio.gather(*(load_name(uid) for uid in user_ids)))


	async def pixiv_search_live(url, pages, mode, phpsessid, search_id):
	keywords = get_search_keywords(url)
	api_url = get_search_api_url(url, keywords)
	first_url = f"{api_url}&p=1"
	cookies = {"PHPSESSID": phpsessid}
	post_ids = []
	seen = set()
	done = 0
	async with aiohttp.ClientSession(cookies=cookies, headers=PIXIV_HEADERS) as session:
	tasks = [fetch_page(session, f"{api_url}&p={p}") for p in range(1, pages + 1)]
	for coro in asyncio.as_completed(tasks):
	data = await coro
	done += 1
	if search_id in ACTIVE_TASKS:
	ACTIVE_TASKS[search_id].update({"total": pages, "done": done})
	if data.get("error"):
	continue
	body = data.get("body") or {}
	posts = (body.get("illustManga") or {}).get("data") or []
	if mode == "ai":
	posts = [p for p in posts if is_ai_post(p)]
	elif mode == "real":
	posts = [p for p in posts if not is_ai_post(p)]
	for post in posts:
	pid = str(post.get("id") or "")
	if pid and pid not in seen:
	seen.add(pid)
	post_ids.append(pid)
	if done % 25 == 0:
	await save_search(search_id, post_ids)
	await save_search(search_id, post_ids)
	return post_ids, keywords, first_url


	async def pixiv_user_posts(user_id, phpsessid):
	cookies = {"PHPSESSID": phpsessid}
	async with aiohttp.ClientSession(cookies=cookies, headers=PIXIV_HEADERS) as session:
	data = await fetch_page(
	session, f"https://www.pixiv.net/ajax/user/{user_id}/profile/all"
	)
	body = data.get("body") or {}
	posts = list((body.get("illusts") or {}).keys())
	username = ""
	pickup = body.get("pickup") or []
	if pickup:
	username = (pickup[0] or {}).get("userName") or ""
	if not username:
	username = await pixiv_user_name(user_id, session)
	return {"user_id": user_id, "post_ids": posts, "username": username}


	async def fetch_page(session, url):
	async with session.get(url) as r:
	return await r.json()


	async def get_post_pages(post_id, session):
	key = str(post_id)
	cached = PAGE_URL_CACHE.get(key)
	now = time.time()
	if cached and now - cached["time"] < PAGE_URL_CACHE_MAX_AGE:
	return cached["pages"]
	data = await fetch_page(session, f"https://www.pixiv.net/ajax/illust/{post_id}/pages")
	pages = []
	for page in data.get("body") or []:
	urls = page.get("urls") or {}
	pages.append(
	{
	"original": urls.get("original") or "",
	"regular": urls.get("regular") or "",
	"small": urls.get("small") or "",
	"thumb_mini": urls.get("thumb_mini") or "",
	}
	)
	PAGE_URL_CACHE[key] = {"time": now, "pages": pages}
	return pages


	def parse_png_metadata(data):
	index = 8
	while index < len(data):
	if index + 8 > len(data):
	break
	chunk_len = int.from_bytes(data[index : index + 4], "big")
	chunk_type = data[index + 4 : index + 8].decode("ascii", errors="ignore")
	index += 8
	if chunk_type in ("tEXt", "iTXt"):
	content = data[index : index + chunk_len]
	return (
	content.replace(b"\0", b"") if chunk_type == "tEXt" else content.strip()
	)
	index += chunk_len + 4
	return None


	def determine_exif_type(metadata):
	if metadata is None:
	return None
	if metadata == b"TitleAI generated image":
	return "novelai"
	if metadata.startswith(b"parameter"):
	return "sd"
	if b'{"' in metadata:
	return "comfy"
	if metadata.startswith(b"SoftwareCelsys"):
	return "celsys"
	return "photoshop"


	def byteize(alpha):
	alpha = alpha.T.reshape((-1,))
	alpha = alpha[: (alpha.shape[0] // 8) * 8]
	alpha = np.bitwise_and(alpha, 1)
	alpha = alpha.reshape((-1, 8))
	return np.packbits(alpha, axis=1)


	def has_stealth_png_bytes(data):
	try:
	image = Image.open(io.BytesIO(data))
	if "A" not in image.getbands():
	return False
	alpha = np.array(image.getchannel("A"))
	arr = byteize(alpha).flatten()
	magic = b"stealth_pngcomp"
	return bytes(arr[: len(magic)]) == magic
	except Exception:
	return False


	async def scan_post(post_id, session, post_sem, exif_sem, img_sem):
	async with post_sem:
	try:
	pages = await get_post_pages(post_id, session)
	image_urls = [p["original"] for p in pages if "png" in p["original"]]
	for url in image_urls:
	metadata = await get_exif_range(url, session, exif_sem)
	exif_type = determine_exif_type(metadata)
	if exif_type not in ("photoshop", "celsys", None):
	code = EXIF_TYPE_TO_CODE.get(exif_type)
	return post_id, url, code
	for url in image_urls:
	img_data = await fetch_image(session, url, img_sem)
	if img_data and has_stealth_png_bytes(img_data):
	return post_id, url, EXIF_TYPE_TO_CODE.get("stealth")
	return post_id, None, None
	except Exception:
	return post_id, None, None


	async def get_exif_range(url, session, sem):
	hdrs = {"Referer": "https://www.pixiv.net/", "Range": "bytes=0-512"}
	if sem:
	async with sem:
	async with session.get(url, headers=hdrs) as r:
	data = await r.read()
	else:
	async with session.get(url, headers=hdrs) as r:
	data = await r.read()
	return parse_png_metadata(data)


	async def fetch_image(session, url, sem):
	if sem:
	async with sem:
	async with session.get(url) as r:
	return await r.read()
	async with session.get(url) as r:
	return await r.read()


	async def run_scan(post_ids, phpsessid, task_id=None, save_live=False):
	post_sem = asyncio.Semaphore(POST_SCAN_LIMIT)
	exif_sem = asyncio.Semaphore(EXIF_RANGE_LIMIT)
	img_sem = asyncio.Semaphore(FULL_IMAGE_LIMIT)
	cookies = {"PHPSESSID": phpsessid}
	results = []
	async with aiohttp.ClientSession(cookies=cookies, headers=PIXIV_HEADERS) as session:
	tasks = [
	scan_post(pid, session, post_sem, exif_sem, img_sem) for pid in post_ids
	]
	pending = []
	for coro in asyncio.as_completed(tasks):
	result = await coro
	results.append(result)
	pending.append(result)
	if task_id and task_id in ACTIVE_TASKS:
	ACTIVE_TASKS[task_id]["done"] = len(results)
	if save_live and len(pending) >= 20:
	await save_scan_results(pending)
	pending = []
	if save_live and pending:
	await save_scan_results(pending)
	return results


	async def save_scan_results(results):
	stmts = []
	for post_id, url, exif_type in results:
	short_url = url.replace(IMG_BASE, "", 1) if url else ""
	stmts.append(
	{
	"sql": "INSERT OR REPLACE INTO pi_scans (post_id, url, exif_type) VALUES (?, ?, ?)",
	"args": [
	{"type": "text", "value": str(post_id)},
	{"type": "text", "value": short_url},
	{"type": "integer", "value": str(exif_type)}
	if exif_type
	else {"type": "null"},
	],
	}
	)
	if stmts:
	for i in range(0, len(stmts), 200):
	await turso_batch(stmts[i : i + 200])


	async def get_scanned_post_ids(post_ids):
	if not post_ids:
	return {}
	chunks = [post_ids[i : i + 500] for i in range(0, len(post_ids), 500)]
	scanned = {}
	stmts = []
	for chunk in chunks:
	placeholders = ",".join("?" for _ in chunk)
	stmts.append(
	{
	"sql": f"SELECT post_id, url, exif_type FROM pi_scans WHERE post_id IN ({placeholders})",
	"args": [{"type": "text", "value": str(pid)} for pid in chunk],
	}
	)
	resp = await turso_execute(stmts)
	for result in resp.get("results") or []:
	if "response" not in result:
	continue
	rows = result["response"].get("result", {}).get("rows", [])
	for row in rows:
	pid = row[0].get("value")
	url_val = row[1].get("value") if row[1].get("type") != "null" else ""
	et = row[2].get("value") if row[2].get("type") != "null" else None
	scanned[pid] = {"url": url_val, "exif_type": int(et) if et else None}
	return scanned


	def exif_items(post_ids, scanned, exif_types=None):
	allowed = set(exif_types) if exif_types is not None else None
	items = []
	for pid in post_ids:
	s = scanned.get(pid)
	if not s:
	continue
	exif_type = s.get("exif_type")
	if exif_type is None and (allowed is None or 0 not in allowed):
	continue
	if exif_type is not None and allowed is not None and exif_type not in allowed:
	continue
	items.append(
	{
	"post_id": pid,
	"url": s.get("url"),
	"exif_type": exif_type,
	"scanned": True,
	**image_links(pid, s.get("url")),
	}
	)
	return items


	def cleanup_thumbs():
	THUMB_DIR.mkdir(exist_ok=True)
	now = time.time()
	for path in THUMB_DIR.glob("*.webp"):
	try:
	if now - path.stat().st_atime > THUMB_MAX_AGE:
	path.unlink()
	except OSError:
	pass


	def page_num_from_url(url):
	if not url:
	return 0
	name = url.rsplit("/", 1)[-1]
	if "_p" not in name:
	return 0
	try:
	return int(name.rsplit("_p", 1)[1].split(".", 1)[0])
	except ValueError:
	return 0


	def media_type_from_url(url):
	ext = urlsplit(url).path.rsplit(".", 1)[-1].casefold()
	if ext in ("jpg", "jpeg"):
	return "image/jpeg"
	if ext == "png":
	return "image/png"
	if ext == "gif":
	return "image/gif"
	if ext == "webp":
	return "image/webp"
	return "application/octet-stream"


	async def get_pixiv_image_url(post_id, page, size, phpsessid):
	cookies = {"PHPSESSID": phpsessid}
	async with aiohttp.ClientSession(cookies=cookies, headers=PIXIV_HEADERS) as session:
	pages = await get_post_pages(post_id, session)
	if not pages:
	raise HTTPException(status_code=404, detail="image not found")
	page = min(max(page, 0), len(pages) - 1)
	urls = pages[page]
	if size in ("full", "orig"):
	url = urls.get("original") or urls.get("regular") or urls.get("small")
	else:
	url = urls.get("regular") or urls.get("small") or urls.get("original")
	if not url:
	raise HTTPException(status_code=404, detail="image not found")
	return url


	async def fetch_pixiv_bytes(url, phpsessid):
	cookies = {"PHPSESSID": phpsessid}
	async with aiohttp.ClientSession(cookies=cookies, headers=PIXIV_HEADERS) as session:
	async with session.get(url) as r:
	if r.status >= 400:
	raise HTTPException(status_code=r.status, detail="image fetch failed")
	return await r.read(), r.headers.get("Content-Type") or media_type_from_url(url)


	async def create_webp(post_id, image_url, phpsessid, page=0, kind="t"):
	cleanup_thumbs()
	scale_tag = int(WEBP_SCALE * 100)
	out = THUMB_DIR / f"{post_id}_p{page}_{kind}{scale_tag}.webp"
	if out.exists():
	os.utime(out, None)
	return out
	data, _ = await fetch_pixiv_bytes(image_url, phpsessid)
	if not data:
	raise HTTPException(status_code=404, detail="image not found")
	image = Image.open(io.BytesIO(data))
	image = image.resize(
	(max(int(image.width * WEBP_SCALE), 1), max(int(image.height * WEBP_SCALE), 1))
	)
	if image.mode not in ("RGB", "RGBA"):
	image = image.convert("RGB")
	image.save(out, "WEBP", quality=WEBP_QUALITY)
	return out


	def image_links(post_id, url):
	page = page_num_from_url(url)
	suffix = f"?p={page}"
	pid = quote(str(post_id), safe="")
	webp_url = f"/api/i/{pid}/v{suffix}"
	return {
	"image_url": webp_url,
	"preview_url": webp_url,
	"download_url": f"/api/i/{pid}/o{suffix}",
	"full_image_url": webp_url,
	"page": page,
	}


	async def bg_search_task(search_id, url, pages, mode, phpsessid):
	ACTIVE_TASKS[search_id] = {
	"type": "search",
	"phase": "searching",
	"total": pages,
	"done": 0,
	}
	await discord_notify(f"`{search_id}` started")
	try:
	post_ids, _, _ = await pixiv_search_live(url, pages, mode, phpsessid, search_id)
	await discord_notify(f"`{search_id}` completed - {len(post_ids)} posts found")
	except Exception as e:
	await discord_notify(f"`{search_id}` failed: {e}")
	finally:
	await finish_task(search_id)


	async def bg_user_task(search_id, user_id, phpsessid):
	ACTIVE_TASKS[search_id] = {
	"type": "user_search",
	"phase": "searching",
	"total": 1,
	"done": 0,
	}
	await discord_notify(f"`{search_id}` started (user {user_id})")
	try:
	result = await pixiv_user_posts(user_id, phpsessid)
	post_ids = list(dict.fromkeys(result["post_ids"]))
	ACTIVE_TASKS[search_id]["done"] = 1
	if not post_ids:
	await discord_notify(f"`{search_id}` completed - no posts, not saved")
	return
	await save_search(search_id, post_ids)
	already = await get_scanned_post_ids(post_ids)
	to_scan = [pid for pid in post_ids if pid not in already]
	if to_scan:
	ACTIVE_TASKS[search_id].update(
	{"phase": "scanning", "total": len(to_scan), "done": 0}
	)
	await run_scan(to_scan, phpsessid, task_id=search_id, save_live=True)
	await discord_notify(
	f"`{search_id}` completed - {len(post_ids)} posts from user {user_id}"
	)
	except Exception as e:
	await discord_notify(f"`{search_id}` failed: {e}")
	finally:
	await finish_task(search_id)


	async def bg_user_batch_task(jobs, phpsessid):
	await asyncio.gather(
	*(bg_user_task(search_id, user_id, phpsessid) for search_id, user_id in jobs)
	)


	async def bg_scan_task(search_id, post_ids, phpsessid):
	ACTIVE_TASKS[search_id] = {
	"type": "scan",
	"phase": "scanning",
	"total": len(post_ids),
	"done": 0,
	}
	await discord_notify(f"`{search_id}` scan started ({len(post_ids)} posts)")
	try:
	results = await run_scan(post_ids, phpsessid, task_id=search_id, save_live=True)
	found = sum(1 for _, url, _ in results if url)
	await discord_notify(
	f"`{search_id}` scan completed - {found}/{len(post_ids)} have exif"
	)
	except Exception as e:
	await discord_notify(f"`{search_id}` scan failed: {e}")
	finally:
	await finish_task(search_id)


	async def bg_search_and_scan_task(search_id, url, pages, mode, phpsessid):
	ACTIVE_TASKS[search_id] = {
	"type": "search+scan",
	"phase": "searching",
	"total": pages,
	"done": 0,
	}
	await discord_notify(f"`{search_id}` search+scan started")
	try:
	post_ids, _, _ = await pixiv_search_live(url, pages, mode, phpsessid, search_id)
	await discord_notify(
	f"`{search_id}` search done - {len(post_ids)} posts, scanning..."
	)
	already = await get_scanned_post_ids(post_ids)
	to_scan = [pid for pid in post_ids if pid not in already]
	if to_scan:
	ACTIVE_TASKS[search_id].update(
	{"phase": "scanning", "total": len(to_scan), "done": 0}
	)
	results = await run_scan(
	to_scan, phpsessid, task_id=search_id, save_live=True
	)
	found = sum(1 for _, url, _ in results if url)
	await discord_notify(
	f"`{search_id}` scan completed - {found}/{len(to_scan)} new exif"
	)
	else:
	await discord_notify(f"`{search_id}` all {len(post_ids)} already scanned")
	except Exception as e:
	await discord_notify(f"`{search_id}` failed: {e}")
	finally:
	await finish_task(search_id)


	class SearchRequest(BaseModel):
	url: str
	pages: int = 30
	mode: str = "ai"
	action: str = "search"


	class UserSearchRequest(BaseModel):
	user_ids: list
	action: str = "search"


	class ScanRequest(BaseModel):
	search_id: str


	class RenameRequest(BaseModel):
	new_id: str


	@app.on_event("startup")
	async def startup():
	if not TURSO_DB_URL:
	print("WARN: TURSO_DB_URL not set, skipping DB init")
	return
	try:
	await init_db()
	except Exception as e:
	print(f"WARN: DB init failed ({e}), will retry on first request")


	@app.post("/api/submit")
	async def submit_search(req: SearchRequest, bg: BackgroundTasks):
	search_id = base26_time()
	phpsessid = PHPSESSID
	bg.add_task(
	bg_search_and_scan_task, search_id, req.url, req.pages, req.mode, phpsessid
	)
	return {"id": search_id, "status": "started"}


	@app.post("/api/submit_users")
	async def submit_users(req: UserSearchRequest, bg: BackgroundTasks):
	phpsessid = PHPSESSID
	user_ids = list(dict.fromkeys(int(u) for u in req.user_ids))
	names = await pixiv_user_names(user_ids, phpsessid)
	jobs = [(user_search_id(uid, names.get(uid)), uid) for uid in user_ids]
	bg.add_task(bg_user_batch_task, jobs, phpsessid)
	return {"ids": [search_id for search_id, _ in jobs], "status": "started"}


	@app.post("/api/scan")
	async def scan_search(req: ScanRequest, bg: BackgroundTasks):
	phpsessid = PHPSESSID
	if req.search_id in ACTIVE_TASKS:
	return {"status": "active", **ACTIVE_TASKS[req.search_id]}
	resp = await turso_execute(
	[
	{
	"sql": "SELECT post_ids FROM pi_searches WHERE id = ?",
	"args": [{"type": "text", "value": req.search_id}],
	}
	]
	)
	results = resp.get("results") or []
	if not results or "response" not in results[0]:
	return {"error": "not found"}
	rows = results[0]["response"].get("result", {}).get("rows", [])
	if not rows:
	return {"error": "not found"}
	post_ids = json.loads(rows[0][0].get("value", "[]"))
	already = await get_scanned_post_ids(post_ids)
	to_scan = [pid for pid in post_ids if pid not in already]
	if not to_scan:
	return {"status": "already_scanned", "count": len(post_ids)}
	bg.add_task(bg_scan_task, req.search_id, to_scan, phpsessid)
	return {"status": "scanning", "total": len(post_ids), "to_scan": len(to_scan)}


	@app.get("/api/searches")
	async def list_searches(page: int = 1):
	page = max(page, 1)
	offset = (page - 1) * SEARCH_PAGE_SIZE
	resp = await turso_execute(
	[
	{
	"sql": "SELECT COUNT(*) FROM pi_searches WHERE post_ids != '[]'"
	},
	{
	"sql": "SELECT id, post_ids FROM pi_searches WHERE post_ids != '[]' "
	"ORDER BY id DESC LIMIT ? OFFSET ?",
	"args": [
	{"type": "integer", "value": str(SEARCH_PAGE_SIZE)},
	{"type": "integer", "value": str(offset)},
	],
	},
	]
	)
	results = resp.get("results") or []
	if len(results) < 2 or "response" not in results[1]:
	return {"items": [], "total": 0, "page": page, "pages": 1}
	count_rows = results[0].get("response", {}).get("result", {}).get("rows", [])
	total = int(count_rows[0][0].get("value", "0")) if count_rows else 0
	rows = results[1]["response"].get("result", {}).get("rows", [])
	search_posts = []
	all_post_ids = []
	seen = set()
	for row in rows:
	post_ids = json.loads(row[1].get("value", "[]"))
	search_posts.append(post_ids)
	for pid in post_ids:
	if pid not in seen:
	seen.add(pid)
	all_post_ids.append(pid)
	scanned = await get_scanned_post_ids(all_post_ids)
	items = []
	for row, post_ids in zip(rows, search_posts):
	search_id = row[0].get("value")
	items.append(
	{
	"id": search_id,
	"created_at": base26_to_time(search_id),
	"found_exif": sum(
	1 for pid in post_ids if scanned.get(pid, {}).get("exif_type")
	),
	"total_searched": len(post_ids),
	}
	)
	return {
	"items": items,
	"total": total,
	"page": page,
	"page_size": SEARCH_PAGE_SIZE,
	"pages": max((total + SEARCH_PAGE_SIZE - 1) // SEARCH_PAGE_SIZE, 1),
	}


	@app.get("/api/search/{search_id}")
	async def get_search(search_id: str):
	resp = await turso_execute(
	[
	{
	"sql": "SELECT id, post_ids FROM pi_searches WHERE id = ?",
	"args": [{"type": "text", "value": search_id}],
	}
	]
	)
	results = resp.get("results") or []
	if not results or "response" not in results[0]:
	return {"error": "not found"}
	rows = results[0]["response"].get("result", {}).get("rows", [])
	if not rows:
	return {"error": "not found"}
	row = rows[0]
	post_ids = json.loads(row[1].get("value", "[]"))
	scanned = await get_scanned_post_ids(post_ids)
	return {
	"id": row[0].get("value"),
	"post_ids": post_ids,
	"created_at": base26_to_time(row[0].get("value")),
	"scanned": scanned,
	}


	@app.get("/api/results/{search_id}")
	async def get_results(search_id: str, page: int = 1, exif_types: str = ""):
	resp = await turso_execute(
	[
	{
	"sql": "SELECT post_ids FROM pi_searches WHERE id = ?",
	"args": [{"type": "text", "value": search_id}],
	}
	]
	)
	results = resp.get("results") or []
	if not results or "response" not in results[0]:
	return {"error": "not found"}
	rows = results[0]["response"].get("result", {}).get("rows", [])
	if not rows:
	return {"error": "not found"}
	post_ids = json.loads(rows[0][0].get("value", "[]"))
	scanned = await get_scanned_post_ids(post_ids)
	allowed = [
	int(x)
	for x in exif_types.split(",")
	if x.isdigit() and int(x) in (*EXIF_TYPE_TO_CODE.values(), 0)
	]
	source = exif_items(post_ids, scanned, allowed if exif_types != "" else None)
	page = max(page, 1)
	total = len(source)
	start = (page - 1) * PAGE_SIZE
	items = source[start : start + PAGE_SIZE]
	return {
	"search_id": search_id,
	"items": items,
	"total": total,
	"page": page,
	"page_size": PAGE_SIZE,
	"pages": max((total + PAGE_SIZE - 1) // PAGE_SIZE, 1),
	"raw_total": len(post_ids),
	"scanned_count": len(scanned),
	}


	@app.get("/api/i/{post_id}/t")
	async def get_image_thumb(post_id: str, p=0):
	p = int(p or 0)
	image_url = await get_pixiv_image_url(post_id, p, "thumb", PHPSESSID)
	path = await create_webp(post_id, image_url, PHPSESSID, p, "t")
	return FileResponse(
	path,
	media_type="image/webp",
	headers={"Cache-Control": f"public, max-age={THUMB_MAX_AGE}"},
	)


	@app.get("/api/i/{post_id}/v")
	async def get_image_preview(post_id: str, p=0):
	p = int(p or 0)
	image_url = await get_pixiv_image_url(post_id, p, "full", PHPSESSID)
	path = await create_webp(post_id, image_url, PHPSESSID, p, "v")
	return FileResponse(
	path,
	media_type="image/webp",
	headers={"Cache-Control": f"public, max-age={THUMB_MAX_AGE}"},
	)


	@app.get("/api/i/{post_id}/o")
	async def get_image_original(post_id: str, p=0):
	p = int(p or 0)
	image_url = await get_pixiv_image_url(post_id, p, "orig", PHPSESSID)
	data, content_type = await fetch_pixiv_bytes(image_url, PHPSESSID)
	filename = urlsplit(image_url).path.rsplit("/", 1)[-1] or f"{post_id}_p{p}.png"
	return Response(
	data,
	media_type=content_type,
	headers={
	"Cache-Control": f"public, max-age={THUMB_MAX_AGE}",
	"Content-Disposition": f'attachment; filename="{filename}"',
	},
	)


	@app.get("/api/image/{post_id}/thumb")
	async def get_long_image_thumb(post_id: str, page: int = 0, p=None):
	return await get_image_thumb(post_id, page if p is None else p)


	@app.get("/api/image/{post_id}/full")
	async def get_long_image_full(post_id: str, page: int = 0, p=None):
	return await get_image_preview(post_id, page if p is None else p)


	@app.get("/api/thumb/{post_id}")
	async def get_thumb(post_id: str):
	return await get_image_thumb(post_id)


	@app.delete("/api/search/{search_id}")
	async def delete_search(search_id: str):
	await turso_execute(
	[
	{
	"sql": "DELETE FROM pi_searches WHERE id = ?",
	"args": [{"type": "text", "value": search_id}],
	}
	]
	)
	return {"status": "deleted"}


	@app.patch("/api/search/{search_id}")
	async def rename_search(search_id: str, req: RenameRequest):
	resp = await turso_execute(
	[
	{
	"sql": "SELECT post_ids FROM pi_searches WHERE id = ?",
	"args": [{"type": "text", "value": search_id}],
	}
	]
	)
	results = resp.get("results") or []
	if not results or "response" not in results[0]:
	return {"error": "not found"}
	rows = results[0]["response"].get("result", {}).get("rows", [])
	if not rows:
	return {"error": "not found"}
	post_ids_val = rows[0][0].get("value", "[]")
	await turso_execute(
	[
	{
	"sql": "DELETE FROM pi_searches WHERE id = ?",
	"args": [{"type": "text", "value": search_id}],
	},
	{
	"sql": "INSERT INTO pi_searches (id, post_ids) VALUES (?, ?)",
	"args": [
	{"type": "text", "value": req.new_id},
	{"type": "text", "value": post_ids_val},
	],
	},
	]
	)
	return {"status": "renamed", "new_id": req.new_id}


	@app.get("/api/progress")
	async def get_progress():
	return [{"id": k, **v} for k, v in ACTIVE_TASKS.items()]


	@app.get("/api/events")
	async def events(request: Request):
	queue = asyncio.Queue(maxsize=16)
	TASK_EVENT_QUEUES.add(queue)

	async def stream():
	try:
	while not await request.is_disconnected():
	try:
	data = await asyncio.wait_for(queue.get(), timeout=25)
	yield f"data: {json.dumps(data)}\n\n"
	except asyncio.TimeoutError:
	yield ": keepalive\n\n"
	finally:
	TASK_EVENT_QUEUES.discard(queue)

	return StreamingResponse(stream(), media_type="text/event-stream")


	app.mount("/", StaticFiles(directory=FRONTEND_DIR, html=True), name="frontend")