q6 commited on
Commit
c5ec522
·
1 Parent(s): ec9ced9

Revert "Index stuff"

Browse files

This reverts commit 969779a878fab17e2689543fe6421198e22b5c72.

Files changed (4) hide show
  1. backend/app.py +87 -195
  2. frontend/app.js +50 -25
  3. frontend/index.html +6 -8
  4. frontend/style.css +14 -0
backend/app.py CHANGED
@@ -53,8 +53,6 @@ PAGE_SIZE = 60
53
  THUMB_MAX_AGE = 1800
54
  THUMB_DIR = Path(tempfile.gettempdir()) / "pixif2-thumbs"
55
  PAGE_URL_CACHE_MAX_AGE = 1800
56
- SCAN_LOOKUP_CHUNK = 500
57
- PROGRESS_MILESTONES = (25, 50, 75)
58
 
59
  app = FastAPI()
60
  ACTIVE_TASKS = {}
@@ -129,19 +127,13 @@ async def init_db():
129
  {
130
  "sql": "CREATE TABLE IF NOT EXISTS pi_scans (post_id TEXT PRIMARY KEY, url TEXT, exif_type INTEGER)"
131
  },
132
- {
133
- "sql": "CREATE TABLE IF NOT EXISTS pi_search_posts (search_id TEXT NOT NULL, post_id TEXT NOT NULL, pos INTEGER NOT NULL, PRIMARY KEY (search_id, post_id))"
134
- },
135
- {
136
- "sql": "CREATE INDEX IF NOT EXISTS pi_search_posts_search_pos_idx ON pi_search_posts (search_id, pos)"
137
- },
138
- {
139
- "sql": "CREATE INDEX IF NOT EXISTS pi_search_posts_post_idx ON pi_search_posts (post_id)"
140
- },
141
  ]
142
  )
143
  for sql in (
144
  "DROP INDEX IF EXISTS pi_searches_created_at_idx",
 
 
 
145
  "ALTER TABLE pi_searches DROP COLUMN api_url",
146
  "ALTER TABLE pi_searches DROP COLUMN created_at",
147
  ):
@@ -171,38 +163,6 @@ async def discord_notify(msg):
171
  print(f"Discord webhook error: {repr(e)}")
172
 
173
 
174
- async def start_task(search_id, task_type, phase, total):
175
- ACTIVE_TASKS[search_id] = {
176
- "type": task_type,
177
- "phase": phase,
178
- "total": total,
179
- "done": 0,
180
- "sent": set(),
181
- }
182
- await discord_notify(f"`{search_id}` started")
183
-
184
-
185
- async def update_task(search_id, done=None, total=None, phase=None):
186
- task = ACTIVE_TASKS.get(search_id)
187
- if not task:
188
- return
189
- if done is not None:
190
- task["done"] = done
191
- if total is not None:
192
- task["total"] = total
193
- if phase is not None and task.get("phase") != phase:
194
- task["phase"] = phase
195
- task["sent"] = set()
196
- total = task.get("total") or 0
197
- if total <= 0:
198
- return
199
- pct = int(task.get("done", 0) * 100 / total)
200
- for mark in PROGRESS_MILESTONES:
201
- if pct >= mark and mark not in task["sent"]:
202
- task["sent"].add(mark)
203
- await discord_notify(f"`{search_id}` {mark}% {task['phase']}")
204
-
205
-
206
  def is_ai_post(post):
207
  if post.get("aiType") == 2:
208
  return True
@@ -266,48 +226,6 @@ async def save_search(search_id, post_ids):
266
  ],
267
  }
268
  await turso_execute([stmt])
269
- await save_search_posts(search_id, post_ids)
270
-
271
-
272
- async def save_search_posts(search_id, post_ids):
273
- stmts = []
274
- for pos, post_id in enumerate(post_ids):
275
- stmts.append(
276
- {
277
- "sql": "INSERT OR IGNORE INTO pi_search_posts (search_id, post_id, pos) VALUES (?, ?, ?)",
278
- "args": [
279
- {"type": "text", "value": search_id},
280
- {"type": "text", "value": str(post_id)},
281
- {"type": "integer", "value": str(pos)},
282
- ],
283
- }
284
- )
285
- for i in range(0, len(stmts), 200):
286
- await turso_batch(stmts[i : i + 200])
287
-
288
-
289
- async def ensure_search_posts(search_id, post_ids):
290
- resp = await turso_execute(
291
- [
292
- {
293
- "sql": "SELECT COUNT(*) FROM pi_search_posts WHERE search_id = ?",
294
- "args": [{"type": "text", "value": search_id}],
295
- }
296
- ]
297
- )
298
- rows = (resp.get("results") or [{}])[0].get("response", {}).get("result", {}).get("rows", [])
299
- count = int(rows[0][0].get("value", "0")) if rows else 0
300
- if count == len(post_ids):
301
- return
302
- await turso_execute(
303
- [
304
- {
305
- "sql": "DELETE FROM pi_search_posts WHERE search_id = ?",
306
- "args": [{"type": "text", "value": search_id}],
307
- }
308
- ]
309
- )
310
- await save_search_posts(search_id, post_ids)
311
 
312
 
313
  async def pixiv_search_live(url, pages, mode, phpsessid, search_id):
@@ -324,7 +242,8 @@ async def pixiv_search_live(url, pages, mode, phpsessid, search_id):
324
  for coro in asyncio.as_completed(tasks):
325
  data = await coro
326
  done += 1
327
- await update_task(search_id, done=done, total=pages)
 
328
  if data.get("error"):
329
  continue
330
  body = data.get("body") or {}
@@ -344,7 +263,7 @@ async def pixiv_search_live(url, pages, mode, phpsessid, search_id):
344
  return post_ids, keywords, first_url
345
 
346
 
347
- async def pixiv_user_posts(user_ids, phpsessid, task_id=None):
348
  cookies = {"PHPSESSID": phpsessid}
349
  results = []
350
  async with aiohttp.ClientSession(cookies=cookies, headers=PIXIV_HEADERS) as session:
@@ -364,8 +283,6 @@ async def pixiv_user_posts(user_ids, phpsessid, task_id=None):
364
  )
365
  username = (udata.get("body") or {}).get("name") or ""
366
  results.append({"user_id": uid, "post_ids": posts, "username": username})
367
- if task_id:
368
- await update_task(task_id, done=len(results), total=len(user_ids))
369
  return results
370
 
371
 
@@ -504,8 +421,8 @@ async def run_scan(post_ids, phpsessid, task_id=None, save_live=False):
504
  result = await coro
505
  results.append(result)
506
  pending.append(result)
507
- if task_id:
508
- await update_task(task_id, done=len(results), total=len(post_ids))
509
  if save_live and len(pending) >= 20:
510
  await save_scan_results(pending)
511
  pending = []
@@ -538,19 +455,15 @@ async def save_scan_results(results):
538
  async def get_scanned_post_ids(post_ids):
539
  if not post_ids:
540
  return {}
541
- chunks = [
542
- post_ids[i : i + SCAN_LOOKUP_CHUNK]
543
- for i in range(0, len(post_ids), SCAN_LOOKUP_CHUNK)
544
- ]
545
-
546
- async def fetch_chunk(chunk):
547
  placeholders = ",".join("?" for _ in chunk)
548
  stmt = {
549
  "sql": f"SELECT post_id, url, exif_type FROM pi_scans WHERE post_id IN ({placeholders})",
550
  "args": [{"type": "text", "value": str(pid)} for pid in chunk],
551
  }
552
  resp = await turso_execute([stmt])
553
- out = {}
554
  results = resp.get("results") or []
555
  if results and "response" in results[0]:
556
  rows = results[0]["response"].get("result", {}).get("rows", [])
@@ -558,12 +471,7 @@ async def get_scanned_post_ids(post_ids):
558
  pid = row[0].get("value")
559
  url_val = row[1].get("value") if row[1].get("type") != "null" else ""
560
  et = row[2].get("value") if row[2].get("type") != "null" else None
561
- out[pid] = {"url": url_val, "exif_type": int(et) if et else None}
562
- return out
563
-
564
- scanned = {}
565
- for result in await asyncio.gather(*(fetch_chunk(chunk) for chunk in chunks)):
566
- scanned.update(result)
567
  return scanned
568
 
569
 
@@ -649,8 +557,7 @@ async def fetch_pixiv_bytes(url, phpsessid):
649
 
650
  async def create_webp(post_id, image_url, phpsessid, page=0, kind="t"):
651
  cleanup_thumbs()
652
- cache_kind = "v33" if kind == "v" else "t10"
653
- out = THUMB_DIR / f"{post_id}_p{page}_{cache_kind}.webp"
654
  if out.exists():
655
  os.utime(out, None)
656
  return out
@@ -659,9 +566,9 @@ async def create_webp(post_id, image_url, phpsessid, page=0, kind="t"):
659
  raise HTTPException(status_code=404, detail="image not found")
660
  image = Image.open(io.BytesIO(data))
661
  if kind == "v":
662
- image = image.resize((max(image.width // 3, 1), max(image.height // 3, 1)))
663
  else:
664
- image = image.resize((max(image.width // 10, 1), max(image.height // 10, 1)))
665
  if image.mode not in ("RGB", "RGBA"):
666
  image = image.convert("RGB")
667
  image.save(out, "WEBP", quality=82 if kind == "v" else 72)
@@ -682,7 +589,13 @@ def image_links(post_id, url):
682
 
683
 
684
  async def bg_search_task(search_id, url, pages, mode, phpsessid):
685
- await start_task(search_id, "search", "searching", pages)
 
 
 
 
 
 
686
  try:
687
  post_ids, _, _ = await pixiv_search_live(url, pages, mode, phpsessid, search_id)
688
  await discord_notify(f"`{search_id}` completed - {len(post_ids)} posts found")
@@ -693,14 +606,27 @@ async def bg_search_task(search_id, url, pages, mode, phpsessid):
693
 
694
 
695
  async def bg_user_task(search_id, user_ids, phpsessid):
696
- await start_task(search_id, "user_search", "searching", len(user_ids))
 
 
 
 
 
 
697
  try:
698
- results = await pixiv_user_posts(user_ids, phpsessid, search_id)
699
  all_post_ids = []
700
  for r in results:
701
  all_post_ids.extend(r["post_ids"])
702
  all_post_ids = list(dict.fromkeys(all_post_ids))
703
  await save_search(search_id, all_post_ids)
 
 
 
 
 
 
 
704
  await discord_notify(
705
  f"`{search_id}` completed - {len(all_post_ids)} posts from {len(user_ids)} users"
706
  )
@@ -711,12 +637,18 @@ async def bg_user_task(search_id, user_ids, phpsessid):
711
 
712
 
713
  async def bg_scan_task(search_id, post_ids, phpsessid):
714
- await start_task(search_id, "scan", "scanning", len(post_ids))
 
 
 
 
 
 
715
  try:
716
  results = await run_scan(post_ids, phpsessid, task_id=search_id, save_live=True)
717
  found = sum(1 for _, url, _ in results if url)
718
  await discord_notify(
719
- f"`{search_id}` completed - {found}/{len(post_ids)} have exif"
720
  )
721
  except Exception as e:
722
  await discord_notify(f"`{search_id}` scan failed: {e}")
@@ -725,22 +657,33 @@ async def bg_scan_task(search_id, post_ids, phpsessid):
725
 
726
 
727
  async def bg_search_and_scan_task(search_id, url, pages, mode, phpsessid):
728
- await start_task(search_id, "search+scan", "searching", pages)
 
 
 
 
 
 
729
  try:
730
  post_ids, _, _ = await pixiv_search_live(url, pages, mode, phpsessid, search_id)
 
 
 
731
  already = await get_scanned_post_ids(post_ids)
732
  to_scan = [pid for pid in post_ids if pid not in already]
733
  if to_scan:
734
- await update_task(search_id, done=0, total=len(to_scan), phase="scanning")
 
 
735
  results = await run_scan(
736
  to_scan, phpsessid, task_id=search_id, save_live=True
737
  )
738
  found = sum(1 for _, url, _ in results if url)
739
  await discord_notify(
740
- f"`{search_id}` completed - {len(post_ids)} posts, {found}/{len(to_scan)} new exif"
741
  )
742
  else:
743
- await discord_notify(f"`{search_id}` completed - all {len(post_ids)} already scanned")
744
  except Exception as e:
745
  await discord_notify(f"`{search_id}` failed: {e}")
746
  finally:
@@ -782,12 +725,9 @@ async def startup():
782
  async def submit_search(req: SearchRequest, bg: BackgroundTasks):
783
  search_id = base26_time()
784
  phpsessid = PHPSESSID
785
- if req.action == "search":
786
- bg.add_task(bg_search_task, search_id, req.url, req.pages, req.mode, phpsessid)
787
- elif req.action == "scan_and_search":
788
- bg.add_task(
789
- bg_search_and_scan_task, search_id, req.url, req.pages, req.mode, phpsessid
790
- )
791
  return {"id": search_id, "status": "started"}
792
 
793
 
@@ -803,6 +743,8 @@ async def submit_users(req: UserSearchRequest, bg: BackgroundTasks):
803
  @app.post("/api/scan")
804
  async def scan_search(req: ScanRequest, bg: BackgroundTasks):
805
  phpsessid = PHPSESSID
 
 
806
  resp = await turso_execute(
807
  [
808
  {
@@ -895,63 +837,25 @@ async def get_results(search_id: str, page: int = 1, exif_only: bool = True):
895
  if not rows:
896
  return {"error": "not found"}
897
  post_ids = json.loads(rows[0][0].get("value", "[]"))
898
- await ensure_search_posts(search_id, post_ids)
899
- page = max(page, 1)
900
- offset = (page - 1) * PAGE_SIZE
901
- args = [{"type": "text", "value": search_id}]
902
- if exif_only:
903
- total_sql = "SELECT COUNT(*) FROM pi_search_posts p JOIN pi_scans s ON s.post_id = p.post_id WHERE p.search_id = ? AND s.exif_type IS NOT NULL"
904
- item_sql = "SELECT p.post_id, s.url, s.exif_type, 1 FROM pi_search_posts p JOIN pi_scans s ON s.post_id = p.post_id WHERE p.search_id = ? AND s.exif_type IS NOT NULL ORDER BY p.pos LIMIT ? OFFSET ?"
905
- else:
906
- total_sql = "SELECT COUNT(*) FROM pi_search_posts WHERE search_id = ?"
907
- item_sql = "SELECT p.post_id, s.url, s.exif_type, CASE WHEN s.post_id IS NULL THEN 0 ELSE 1 END FROM pi_search_posts p LEFT JOIN pi_scans s ON s.post_id = p.post_id WHERE p.search_id = ? ORDER BY p.pos LIMIT ? OFFSET ?"
908
- resp = await turso_execute(
909
- [
910
- {"sql": total_sql, "args": args},
911
- {
912
- "sql": "SELECT COUNT(*) FROM pi_search_posts p JOIN pi_scans s ON s.post_id = p.post_id WHERE p.search_id = ?",
913
- "args": args,
914
- },
915
- {
916
- "sql": item_sql,
917
- "args": args
918
- + [
919
- {"type": "integer", "value": str(PAGE_SIZE)},
920
- {"type": "integer", "value": str(offset)},
921
- ],
922
- },
923
- ]
924
- )
925
- result_rows = [
926
- r.get("response", {}).get("result", {}).get("rows", [])
927
- for r in resp.get("results", [])
928
- ]
929
- total = (
930
- int(result_rows[0][0][0].get("value", "0"))
931
- if result_rows and result_rows[0]
932
- else 0
933
- )
934
- scanned_count = (
935
- int(result_rows[1][0][0].get("value", "0"))
936
- if len(result_rows) > 1 and result_rows[1]
937
- else 0
938
- )
939
- items = []
940
- item_rows = result_rows[2] if len(result_rows) > 2 else []
941
- for row in item_rows:
942
- pid = row[0].get("value")
943
- url = row[1].get("value") if row[1].get("type") != "null" else ""
944
- exif_type = row[2].get("value") if row[2].get("type") != "null" else None
945
- scanned = row[3].get("value") == "1"
946
- items.append(
947
  {
948
  "post_id": pid,
949
- "url": url,
950
- "exif_type": int(exif_type) if exif_type else None,
951
- "scanned": scanned,
952
- **image_links(pid, url),
953
  }
954
- )
 
 
 
 
 
 
955
  return {
956
  "search_id": search_id,
957
  "items": items,
@@ -960,7 +864,7 @@ async def get_results(search_id: str, page: int = 1, exif_only: bool = True):
960
  "page_size": PAGE_SIZE,
961
  "pages": max((total + PAGE_SIZE - 1) // PAGE_SIZE, 1),
962
  "raw_total": len(post_ids),
963
- "scanned_count": scanned_count,
964
  }
965
 
966
 
@@ -1023,10 +927,6 @@ async def get_thumb(post_id: str):
1023
  async def delete_search(search_id: str):
1024
  await turso_execute(
1025
  [
1026
- {
1027
- "sql": "DELETE FROM pi_search_posts WHERE search_id = ?",
1028
- "args": [{"type": "text", "value": search_id}],
1029
- },
1030
  {
1031
  "sql": "DELETE FROM pi_searches WHERE id = ?",
1032
  "args": [{"type": "text", "value": search_id}],
@@ -1053,14 +953,8 @@ async def rename_search(search_id: str, req: RenameRequest):
1053
  if not rows:
1054
  return {"error": "not found"}
1055
  post_ids_val = rows[0][0].get("value", "[]")
1056
- post_ids = json.loads(post_ids_val)
1057
- await ensure_search_posts(search_id, post_ids)
1058
  await turso_execute(
1059
  [
1060
- {
1061
- "sql": "DELETE FROM pi_search_posts WHERE search_id = ?",
1062
- "args": [{"type": "text", "value": req.new_id}],
1063
- },
1064
  {
1065
  "sql": "DELETE FROM pi_searches WHERE id = ?",
1066
  "args": [{"type": "text", "value": search_id}],
@@ -1072,16 +966,14 @@ async def rename_search(search_id: str, req: RenameRequest):
1072
  {"type": "text", "value": post_ids_val},
1073
  ],
1074
  },
1075
- {
1076
- "sql": "UPDATE pi_search_posts SET search_id = ? WHERE search_id = ?",
1077
- "args": [
1078
- {"type": "text", "value": req.new_id},
1079
- {"type": "text", "value": search_id},
1080
- ],
1081
- },
1082
  ]
1083
  )
1084
  return {"status": "renamed", "new_id": req.new_id}
1085
 
1086
 
 
 
 
 
 
1087
  app.mount("/", StaticFiles(directory=FRONTEND_DIR, html=True), name="frontend")
 
53
  THUMB_MAX_AGE = 1800
54
  THUMB_DIR = Path(tempfile.gettempdir()) / "pixif2-thumbs"
55
  PAGE_URL_CACHE_MAX_AGE = 1800
 
 
56
 
57
  app = FastAPI()
58
  ACTIVE_TASKS = {}
 
127
  {
128
  "sql": "CREATE TABLE IF NOT EXISTS pi_scans (post_id TEXT PRIMARY KEY, url TEXT, exif_type INTEGER)"
129
  },
 
 
 
 
 
 
 
 
 
130
  ]
131
  )
132
  for sql in (
133
  "DROP INDEX IF EXISTS pi_searches_created_at_idx",
134
+ "DROP INDEX IF EXISTS pi_search_posts_search_pos_idx",
135
+ "DROP INDEX IF EXISTS pi_search_posts_post_idx",
136
+ "DROP TABLE IF EXISTS pi_search_posts",
137
  "ALTER TABLE pi_searches DROP COLUMN api_url",
138
  "ALTER TABLE pi_searches DROP COLUMN created_at",
139
  ):
 
163
  print(f"Discord webhook error: {repr(e)}")
164
 
165
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166
  def is_ai_post(post):
167
  if post.get("aiType") == 2:
168
  return True
 
226
  ],
227
  }
228
  await turso_execute([stmt])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
229
 
230
 
231
  async def pixiv_search_live(url, pages, mode, phpsessid, search_id):
 
242
  for coro in asyncio.as_completed(tasks):
243
  data = await coro
244
  done += 1
245
+ if search_id in ACTIVE_TASKS:
246
+ ACTIVE_TASKS[search_id].update({"total": pages, "done": done})
247
  if data.get("error"):
248
  continue
249
  body = data.get("body") or {}
 
263
  return post_ids, keywords, first_url
264
 
265
 
266
+ async def pixiv_user_posts(user_ids, phpsessid):
267
  cookies = {"PHPSESSID": phpsessid}
268
  results = []
269
  async with aiohttp.ClientSession(cookies=cookies, headers=PIXIV_HEADERS) as session:
 
283
  )
284
  username = (udata.get("body") or {}).get("name") or ""
285
  results.append({"user_id": uid, "post_ids": posts, "username": username})
 
 
286
  return results
287
 
288
 
 
421
  result = await coro
422
  results.append(result)
423
  pending.append(result)
424
+ if task_id and task_id in ACTIVE_TASKS:
425
+ ACTIVE_TASKS[task_id]["done"] = len(results)
426
  if save_live and len(pending) >= 20:
427
  await save_scan_results(pending)
428
  pending = []
 
455
  async def get_scanned_post_ids(post_ids):
456
  if not post_ids:
457
  return {}
458
+ chunks = [post_ids[i : i + 200] for i in range(0, len(post_ids), 200)]
459
+ scanned = {}
460
+ for chunk in chunks:
 
 
 
461
  placeholders = ",".join("?" for _ in chunk)
462
  stmt = {
463
  "sql": f"SELECT post_id, url, exif_type FROM pi_scans WHERE post_id IN ({placeholders})",
464
  "args": [{"type": "text", "value": str(pid)} for pid in chunk],
465
  }
466
  resp = await turso_execute([stmt])
 
467
  results = resp.get("results") or []
468
  if results and "response" in results[0]:
469
  rows = results[0]["response"].get("result", {}).get("rows", [])
 
471
  pid = row[0].get("value")
472
  url_val = row[1].get("value") if row[1].get("type") != "null" else ""
473
  et = row[2].get("value") if row[2].get("type") != "null" else None
474
+ scanned[pid] = {"url": url_val, "exif_type": int(et) if et else None}
 
 
 
 
 
475
  return scanned
476
 
477
 
 
557
 
558
  async def create_webp(post_id, image_url, phpsessid, page=0, kind="t"):
559
  cleanup_thumbs()
560
+ out = THUMB_DIR / f"{post_id}_p{page}_{kind}.webp"
 
561
  if out.exists():
562
  os.utime(out, None)
563
  return out
 
566
  raise HTTPException(status_code=404, detail="image not found")
567
  image = Image.open(io.BytesIO(data))
568
  if kind == "v":
569
+ image = image.resize((max(image.width // 2, 1), max(image.height // 2, 1)))
570
  else:
571
+ image.thumbnail((360, 360))
572
  if image.mode not in ("RGB", "RGBA"):
573
  image = image.convert("RGB")
574
  image.save(out, "WEBP", quality=82 if kind == "v" else 72)
 
589
 
590
 
591
  async def bg_search_task(search_id, url, pages, mode, phpsessid):
592
+ ACTIVE_TASKS[search_id] = {
593
+ "type": "search",
594
+ "phase": "searching",
595
+ "total": pages,
596
+ "done": 0,
597
+ }
598
+ await discord_notify(f"`{search_id}` started")
599
  try:
600
  post_ids, _, _ = await pixiv_search_live(url, pages, mode, phpsessid, search_id)
601
  await discord_notify(f"`{search_id}` completed - {len(post_ids)} posts found")
 
606
 
607
 
608
  async def bg_user_task(search_id, user_ids, phpsessid):
609
+ ACTIVE_TASKS[search_id] = {
610
+ "type": "user_search",
611
+ "phase": "searching",
612
+ "total": len(user_ids),
613
+ "done": 0,
614
+ }
615
+ await discord_notify(f"`{search_id}` started (users)")
616
  try:
617
+ results = await pixiv_user_posts(user_ids, phpsessid)
618
  all_post_ids = []
619
  for r in results:
620
  all_post_ids.extend(r["post_ids"])
621
  all_post_ids = list(dict.fromkeys(all_post_ids))
622
  await save_search(search_id, all_post_ids)
623
+ already = await get_scanned_post_ids(all_post_ids)
624
+ to_scan = [pid for pid in all_post_ids if pid not in already]
625
+ if to_scan:
626
+ ACTIVE_TASKS[search_id].update(
627
+ {"phase": "scanning", "total": len(to_scan), "done": 0}
628
+ )
629
+ await run_scan(to_scan, phpsessid, task_id=search_id, save_live=True)
630
  await discord_notify(
631
  f"`{search_id}` completed - {len(all_post_ids)} posts from {len(user_ids)} users"
632
  )
 
637
 
638
 
639
  async def bg_scan_task(search_id, post_ids, phpsessid):
640
+ ACTIVE_TASKS[search_id] = {
641
+ "type": "scan",
642
+ "phase": "scanning",
643
+ "total": len(post_ids),
644
+ "done": 0,
645
+ }
646
+ await discord_notify(f"`{search_id}` scan started ({len(post_ids)} posts)")
647
  try:
648
  results = await run_scan(post_ids, phpsessid, task_id=search_id, save_live=True)
649
  found = sum(1 for _, url, _ in results if url)
650
  await discord_notify(
651
+ f"`{search_id}` scan completed - {found}/{len(post_ids)} have exif"
652
  )
653
  except Exception as e:
654
  await discord_notify(f"`{search_id}` scan failed: {e}")
 
657
 
658
 
659
  async def bg_search_and_scan_task(search_id, url, pages, mode, phpsessid):
660
+ ACTIVE_TASKS[search_id] = {
661
+ "type": "search+scan",
662
+ "phase": "searching",
663
+ "total": pages,
664
+ "done": 0,
665
+ }
666
+ await discord_notify(f"`{search_id}` search+scan started")
667
  try:
668
  post_ids, _, _ = await pixiv_search_live(url, pages, mode, phpsessid, search_id)
669
+ await discord_notify(
670
+ f"`{search_id}` search done - {len(post_ids)} posts, scanning..."
671
+ )
672
  already = await get_scanned_post_ids(post_ids)
673
  to_scan = [pid for pid in post_ids if pid not in already]
674
  if to_scan:
675
+ ACTIVE_TASKS[search_id].update(
676
+ {"phase": "scanning", "total": len(to_scan), "done": 0}
677
+ )
678
  results = await run_scan(
679
  to_scan, phpsessid, task_id=search_id, save_live=True
680
  )
681
  found = sum(1 for _, url, _ in results if url)
682
  await discord_notify(
683
+ f"`{search_id}` scan completed - {found}/{len(to_scan)} new exif"
684
  )
685
  else:
686
+ await discord_notify(f"`{search_id}` all {len(post_ids)} already scanned")
687
  except Exception as e:
688
  await discord_notify(f"`{search_id}` failed: {e}")
689
  finally:
 
725
  async def submit_search(req: SearchRequest, bg: BackgroundTasks):
726
  search_id = base26_time()
727
  phpsessid = PHPSESSID
728
+ bg.add_task(
729
+ bg_search_and_scan_task, search_id, req.url, req.pages, req.mode, phpsessid
730
+ )
 
 
 
731
  return {"id": search_id, "status": "started"}
732
 
733
 
 
743
  @app.post("/api/scan")
744
  async def scan_search(req: ScanRequest, bg: BackgroundTasks):
745
  phpsessid = PHPSESSID
746
+ if req.search_id in ACTIVE_TASKS:
747
+ return {"status": "active", **ACTIVE_TASKS[req.search_id]}
748
  resp = await turso_execute(
749
  [
750
  {
 
837
  if not rows:
838
  return {"error": "not found"}
839
  post_ids = json.loads(rows[0][0].get("value", "[]"))
840
+ scanned = await get_scanned_post_ids(post_ids)
841
+ source = (
842
+ exif_items(post_ids, scanned)
843
+ if exif_only
844
+ else [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
845
  {
846
  "post_id": pid,
847
+ "url": scanned[pid]["url"] if pid in scanned else None,
848
+ "exif_type": scanned[pid]["exif_type"] if pid in scanned else None,
849
+ "scanned": pid in scanned,
850
+ **image_links(pid, scanned[pid]["url"] if pid in scanned else ""),
851
  }
852
+ for pid in post_ids
853
+ ]
854
+ )
855
+ page = max(page, 1)
856
+ total = len(source)
857
+ start = (page - 1) * PAGE_SIZE
858
+ items = source[start : start + PAGE_SIZE]
859
  return {
860
  "search_id": search_id,
861
  "items": items,
 
864
  "page_size": PAGE_SIZE,
865
  "pages": max((total + PAGE_SIZE - 1) // PAGE_SIZE, 1),
866
  "raw_total": len(post_ids),
867
+ "scanned_count": len(scanned),
868
  }
869
 
870
 
 
927
  async def delete_search(search_id: str):
928
  await turso_execute(
929
  [
 
 
 
 
930
  {
931
  "sql": "DELETE FROM pi_searches WHERE id = ?",
932
  "args": [{"type": "text", "value": search_id}],
 
953
  if not rows:
954
  return {"error": "not found"}
955
  post_ids_val = rows[0][0].get("value", "[]")
 
 
956
  await turso_execute(
957
  [
 
 
 
 
958
  {
959
  "sql": "DELETE FROM pi_searches WHERE id = ?",
960
  "args": [{"type": "text", "value": search_id}],
 
966
  {"type": "text", "value": post_ids_val},
967
  ],
968
  },
 
 
 
 
 
 
 
969
  ]
970
  )
971
  return {"status": "renamed", "new_id": req.new_id}
972
 
973
 
974
+ @app.get("/api/progress")
975
+ async def get_progress():
976
+ return [{"id": k, **v} for k, v in ACTIVE_TASKS.items()]
977
+
978
+
979
  app.mount("/", StaticFiles(directory=FRONTEND_DIR, html=True), name="frontend")
frontend/app.js CHANGED
@@ -17,7 +17,6 @@ $("#btn-submit").addEventListener("click", async () => {
17
  if (!url) return
18
  const pages = parseInt($("#input-pages").value) || 30
19
  const mode = $("#input-mode").value
20
- const action = $("#input-action").value
21
  const status = $("#submit-status")
22
  status.textContent = "Submitting..."
23
  status.className = ""
@@ -38,13 +37,13 @@ $("#btn-submit").addEventListener("click", async () => {
38
  resp = await fetch("/api/submit_users", {
39
  method: "POST",
40
  headers: { "Content-Type": "application/json" },
41
- body: JSON.stringify({ user_ids: userIds, action })
42
  })
43
  } else {
44
  resp = await fetch("/api/submit", {
45
  method: "POST",
46
  headers: { "Content-Type": "application/json" },
47
- body: JSON.stringify({ url, pages, mode, action })
48
  })
49
  }
50
  const data = await resp.json()
@@ -64,6 +63,7 @@ function route() {
64
  const params = new URLSearchParams(qs)
65
  $$(".tab").forEach(t => t.classList.toggle("active", t.dataset.tab === tab))
66
  $$(".panel").forEach(p => p.classList.toggle("active", p.id === tab))
 
67
  if (tab === "explorer") {
68
  if (parts[1]) {
69
  openSearch(decodeURIComponent(parts[1]), parseInt(params.get("page")) || 1, params.get("exif") !== "0")
@@ -85,9 +85,20 @@ async function loadSearches() {
85
  detail.classList.add("hidden")
86
  list.innerHTML = "Loading..."
87
  try {
88
- const searchResp = await fetch("/api/searches")
89
  const data = await searchResp.json()
90
- if (!data.length) { list.innerHTML = "No searches yet"; return }
 
 
 
 
 
 
 
 
 
 
 
91
  const savedHtml = data.map(s => {
92
  const d = new Date(parseInt(s.created_at) * 1000)
93
  const ts = d.toLocaleString()
@@ -100,7 +111,7 @@ async function loadSearches() {
100
  </span>
101
  </div>`
102
  }).join("")
103
- list.innerHTML = savedHtml
104
  list.querySelectorAll(".search-item").forEach(el => {
105
  if (!el.dataset.id) return
106
  el.querySelector(".id").addEventListener("click", () => { location.hash = explorerHash(el.dataset.id, 1, true) })
@@ -132,25 +143,7 @@ async function openSearch(id, page = 1, exifOnly = true) {
132
 
133
  const allScanned = data.scanned_count >= data.raw_total
134
  $("#detail-stats").textContent = `${data.total}/${data.raw_total} shown | ${data.scanned_count}/${data.raw_total} scanned`
135
- const scanBtn = $("#btn-scan")
136
- if (allScanned) {
137
- scanBtn.textContent = "Scanned"
138
- scanBtn.disabled = true
139
- } else {
140
- scanBtn.textContent = `Scan (${data.raw_total - data.scanned_count} remaining)`
141
- scanBtn.disabled = false
142
- scanBtn.onclick = async () => {
143
- scanBtn.disabled = true
144
- scanBtn.textContent = "Scanning..."
145
- const r = await fetch("/api/scan", {
146
- method: "POST",
147
- headers: { "Content-Type": "application/json" },
148
- body: JSON.stringify({ search_id: id })
149
- })
150
- const d = await r.json()
151
- scanBtn.textContent = d.status === "already_scanned" ? "Scanned" : `Scanning ${d.to_scan}...`
152
- }
153
- }
154
 
155
  $("#filter-exif").onchange = () => { location.hash = explorerHash(id, 1, $("#filter-exif").checked) }
156
  $("#btn-back").onclick = () => { location.hash = "#/explorer" }
@@ -161,6 +154,14 @@ async function openSearch(id, page = 1, exifOnly = true) {
161
  }
162
  }
163
 
 
 
 
 
 
 
 
 
164
  function pageSuffix(url) {
165
  if (!url) return ""
166
  const m = url.match(/_p(\d+)\./)
@@ -287,6 +288,30 @@ async function renameSearch(id) {
287
  loadSearches()
288
  }
289
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
290
  window.addEventListener("hashchange", route)
291
  if (!location.hash) location.hash = "#/submit"
292
  route()
 
17
  if (!url) return
18
  const pages = parseInt($("#input-pages").value) || 30
19
  const mode = $("#input-mode").value
 
20
  const status = $("#submit-status")
21
  status.textContent = "Submitting..."
22
  status.className = ""
 
37
  resp = await fetch("/api/submit_users", {
38
  method: "POST",
39
  headers: { "Content-Type": "application/json" },
40
+ body: JSON.stringify({ user_ids: userIds })
41
  })
42
  } else {
43
  resp = await fetch("/api/submit", {
44
  method: "POST",
45
  headers: { "Content-Type": "application/json" },
46
+ body: JSON.stringify({ url, pages, mode })
47
  })
48
  }
49
  const data = await resp.json()
 
63
  const params = new URLSearchParams(qs)
64
  $$(".tab").forEach(t => t.classList.toggle("active", t.dataset.tab === tab))
65
  $$(".panel").forEach(p => p.classList.toggle("active", p.id === tab))
66
+ if (tab === "progress") loadProgress()
67
  if (tab === "explorer") {
68
  if (parts[1]) {
69
  openSearch(decodeURIComponent(parts[1]), parseInt(params.get("page")) || 1, params.get("exif") !== "0")
 
85
  detail.classList.add("hidden")
86
  list.innerHTML = "Loading..."
87
  try {
88
+ const [searchResp, taskResp] = await Promise.all([fetch("/api/searches"), fetch("/api/progress")])
89
  const data = await searchResp.json()
90
+ const tasks = await taskResp.json()
91
+ const active = tasks.filter(t => t.type === "search" || t.type === "search+scan" || t.type === "user_search")
92
+ if (!data.length && !active.length) { list.innerHTML = "No searches yet"; return }
93
+ const activeHtml = active.map(t => {
94
+ const pct = t.total > 0 ? Math.round(t.done / t.total * 100) : 0
95
+ const label = t.total > 0 ? `${t.done}/${t.total}` : "..."
96
+ return `<div class="search-item active-task" data-id="${esc(t.id)}">
97
+ <span class="id">${esc(t.id)}</span>
98
+ <span class="time">${esc(t.type)} ${esc(t.phase)} ${label}</span>
99
+ <div class="mini-bar"><div style="width:${pct}%"></div></div>
100
+ </div>`
101
+ }).join("")
102
  const savedHtml = data.map(s => {
103
  const d = new Date(parseInt(s.created_at) * 1000)
104
  const ts = d.toLocaleString()
 
111
  </span>
112
  </div>`
113
  }).join("")
114
+ list.innerHTML = activeHtml + savedHtml
115
  list.querySelectorAll(".search-item").forEach(el => {
116
  if (!el.dataset.id) return
117
  el.querySelector(".id").addEventListener("click", () => { location.hash = explorerHash(el.dataset.id, 1, true) })
 
143
 
144
  const allScanned = data.scanned_count >= data.raw_total
145
  $("#detail-stats").textContent = `${data.total}/${data.raw_total} shown | ${data.scanned_count}/${data.raw_total} scanned`
146
+ if (!allScanned) resumeScan(id)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
 
148
  $("#filter-exif").onchange = () => { location.hash = explorerHash(id, 1, $("#filter-exif").checked) }
149
  $("#btn-back").onclick = () => { location.hash = "#/explorer" }
 
154
  }
155
  }
156
 
157
+ async function resumeScan(id) {
158
+ await fetch("/api/scan", {
159
+ method: "POST",
160
+ headers: { "Content-Type": "application/json" },
161
+ body: JSON.stringify({ search_id: id })
162
+ })
163
+ }
164
+
165
  function pageSuffix(url) {
166
  if (!url) return ""
167
  const m = url.match(/_p(\d+)\./)
 
288
  loadSearches()
289
  }
290
 
291
+ async function loadProgress() {
292
+ const el = $("#progress-list")
293
+ try {
294
+ const resp = await fetch("/api/progress")
295
+ const tasks = await resp.json()
296
+ if (!tasks.length) { el.innerHTML = '<div class="progress-empty">No active tasks</div>'; return }
297
+ el.innerHTML = tasks.map(t => {
298
+ const pct = t.total > 0 ? Math.round(t.done / t.total * 100) : 0
299
+ const label = t.total > 0 ? `${t.done} / ${t.total}` : "..."
300
+ return `<div class="progress-item">
301
+ <div class="progress-info">
302
+ <span class="progress-id">${t.id}</span>
303
+ <span class="progress-type">${t.type}</span>
304
+ <span class="progress-phase">${t.phase}</span>
305
+ <span class="progress-label">${label}</span>
306
+ </div>
307
+ <div class="progress-bar-bg"><div class="progress-bar-fill" style="width:${pct}%"></div></div>
308
+ </div>`
309
+ }).join("")
310
+ } catch (e) {
311
+ el.innerHTML = `<div class="progress-empty">Error: ${e.message}</div>`
312
+ }
313
+ }
314
+
315
  window.addEventListener("hashchange", route)
316
  if (!location.hash) location.hash = "#/submit"
317
  route()
frontend/index.html CHANGED
@@ -11,6 +11,7 @@
11
  <div id="app">
12
  <nav>
13
  <button class="tab active" data-tab="submit">Submit</button>
 
14
  <button class="tab" data-tab="explorer">Explorer</button>
15
  </nav>
16
 
@@ -33,18 +34,16 @@
33
  <option value="real">Real Only</option>
34
  </select>
35
  </div>
36
- <div class="form-group">
37
- <label>Action</label>
38
- <select id="input-action">
39
- <option value="search">Search</option>
40
- <option value="scan_and_search" selected>Search & Scan</option>
41
- </select>
42
- </div>
43
  </div>
44
  <button id="btn-submit" class="btn-primary">Submit</button>
45
  <div id="submit-status"></div>
46
  </section>
47
 
 
 
 
 
 
48
  <section id="explorer" class="panel">
49
  <h2>Explorer</h2>
50
  <div id="search-list"></div>
@@ -54,7 +53,6 @@
54
  <span id="detail-title"></span>
55
  <span id="detail-stats"></span>
56
  <label class="check-row"><input type="checkbox" id="filter-exif" checked> Exif Only</label>
57
- <button id="btn-scan" class="btn-primary">Scan</button>
58
  </div>
59
  <div id="pager"></div>
60
  <div id="results-grid"></div>
 
11
  <div id="app">
12
  <nav>
13
  <button class="tab active" data-tab="submit">Submit</button>
14
+ <button class="tab" data-tab="progress">Progress</button>
15
  <button class="tab" data-tab="explorer">Explorer</button>
16
  </nav>
17
 
 
34
  <option value="real">Real Only</option>
35
  </select>
36
  </div>
 
 
 
 
 
 
 
37
  </div>
38
  <button id="btn-submit" class="btn-primary">Submit</button>
39
  <div id="submit-status"></div>
40
  </section>
41
 
42
+ <section id="progress" class="panel">
43
+ <h2>Progress</h2>
44
+ <div id="progress-list"></div>
45
+ </section>
46
+
47
  <section id="explorer" class="panel">
48
  <h2>Explorer</h2>
49
  <div id="search-list"></div>
 
53
  <span id="detail-title"></span>
54
  <span id="detail-stats"></span>
55
  <label class="check-row"><input type="checkbox" id="filter-exif" checked> Exif Only</label>
 
56
  </div>
57
  <div id="pager"></div>
58
  <div id="results-grid"></div>
frontend/style.css CHANGED
@@ -20,6 +20,7 @@ input:focus, select:focus { outline: none; border-color: #6c63ff; }
20
  .btn-secondary:hover { background: #3a3a4e; }
21
  .btn-secondary:disabled { color: #666; cursor: not-allowed; }
22
  #submit-status { margin-top: 1rem; padding: .5rem; border-radius: 4px; font-size: .9rem; }
 
23
  .status-ok { background: #1a3a1a; color: #6f6; }
24
  .status-err { background: #3a1a1a; color: #f66; }
25
  .hidden { display: none; }
@@ -35,6 +36,9 @@ input:focus, select:focus { outline: none; border-color: #6c63ff; }
35
  .btn-icon { background: none; border: 1px solid transparent; color: #666; font-size: 1rem; cursor: pointer; padding: .1rem .4rem; border-radius: 3px; line-height: 1; }
36
  .btn-icon:hover { color: #e0e0e0; border-color: #555; }
37
  .btn-delete:hover { color: #f66; border-color: #f66; }
 
 
 
38
 
39
  .detail-header { display: flex; align-items: center; gap: 1rem; flex-wrap: wrap; margin-bottom: 1rem; }
40
  #detail-title { font-weight: bold; color: #6c63ff; }
@@ -66,6 +70,16 @@ input:focus, select:focus { outline: none; border-color: #6c63ff; }
66
  .not-scanned { color: #555; }
67
  .no-exif { color: #666; }
68
 
 
 
 
 
 
 
 
 
 
 
69
  #viewer { position: fixed; inset: 0; z-index: 10; background: rgba(0, 0, 0, .92); display: flex; flex-direction: column; }
70
  #viewer.hidden { display: none; }
71
  .viewer-bar { display: flex; align-items: center; justify-content: flex-end; gap: .5rem; padding: .75rem; background: #111; border-bottom: 1px solid #333; }
 
20
  .btn-secondary:hover { background: #3a3a4e; }
21
  .btn-secondary:disabled { color: #666; cursor: not-allowed; }
22
  #submit-status { margin-top: 1rem; padding: .5rem; border-radius: 4px; font-size: .9rem; }
23
+ #submit-status a { color: inherit; overflow-wrap: anywhere; }
24
  .status-ok { background: #1a3a1a; color: #6f6; }
25
  .status-err { background: #3a1a1a; color: #f66; }
26
  .hidden { display: none; }
 
36
  .btn-icon { background: none; border: 1px solid transparent; color: #666; font-size: 1rem; cursor: pointer; padding: .1rem .4rem; border-radius: 3px; line-height: 1; }
37
  .btn-icon:hover { color: #e0e0e0; border-color: #555; }
38
  .btn-delete:hover { color: #f66; border-color: #f66; }
39
+ .active-task { border-color: #ffb74d; }
40
+ .mini-bar { width: 120px; height: 4px; background: #333; border-radius: 2px; overflow: hidden; margin-left: .75rem; }
41
+ .mini-bar div { height: 100%; background: #ffb74d; }
42
 
43
  .detail-header { display: flex; align-items: center; gap: 1rem; flex-wrap: wrap; margin-bottom: 1rem; }
44
  #detail-title { font-weight: bold; color: #6c63ff; }
 
70
  .not-scanned { color: #555; }
71
  .no-exif { color: #666; }
72
 
73
+ .progress-empty { color: #666; padding: 1rem 0; }
74
+ .progress-item { background: #1a1a2e; border: 1px solid #333; border-radius: 6px; padding: .75rem 1rem; margin-bottom: .5rem; }
75
+ .progress-info { display: flex; align-items: center; gap: 1rem; margin-bottom: .5rem; font-size: .85rem; }
76
+ .progress-id { color: #6c63ff; font-family: monospace; }
77
+ .progress-type { color: #aaa; }
78
+ .progress-phase { color: #ffb74d; }
79
+ .progress-label { margin-left: auto; color: #ccc; }
80
+ .progress-bar-bg { background: #2a2a3e; border-radius: 3px; height: 6px; overflow: hidden; }
81
+ .progress-bar-fill { background: #6c63ff; height: 100%; transition: width .3s ease; }
82
+
83
  #viewer { position: fixed; inset: 0; z-index: 10; background: rgba(0, 0, 0, .92); display: flex; flex-direction: column; }
84
  #viewer.hidden { display: none; }
85
  .viewer-bar { display: flex; align-items: center; justify-content: flex-end; gap: .5rem; padding: .75rem; background: #111; border-bottom: 1px solid #333; }