q6 commited on
Commit
5d8577a
·
1 Parent(s): bdaff3a

Split user submissions by account

Browse files
Files changed (2) hide show
  1. backend/app.py +65 -37
  2. frontend/app.js +2 -1
backend/app.py CHANGED
@@ -238,6 +238,8 @@ def get_search_api_url(raw, keywords):
238
 
239
 
240
  async def save_search(search_id, post_ids):
 
 
241
  stmt = {
242
  "sql": "INSERT OR REPLACE INTO pi_searches (id, post_ids) VALUES (?, ?)",
243
  "args": [
@@ -248,6 +250,29 @@ async def save_search(search_id, post_ids):
248
  await turso_execute([stmt])
249
 
250
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
251
  async def pixiv_search_live(url, pages, mode, phpsessid, search_id):
252
  keywords = get_search_keywords(url)
253
  api_url = get_search_api_url(url, keywords)
@@ -282,27 +307,21 @@ async def pixiv_search_live(url, pages, mode, phpsessid, search_id):
282
  return post_ids, keywords, first_url
283
 
284
 
285
- async def pixiv_user_posts(user_ids, phpsessid):
286
  cookies = {"PHPSESSID": phpsessid}
287
- results = []
288
  async with aiohttp.ClientSession(cookies=cookies, headers=PIXIV_HEADERS) as session:
289
- for uid in user_ids:
290
- data = await fetch_page(
291
- session, f"https://www.pixiv.net/ajax/user/{uid}/profile/all"
292
- )
293
- body = data.get("body") or {}
294
- posts = list((body.get("illusts") or {}).keys())
295
- username = ""
296
- pickup = body.get("pickup") or []
297
- if pickup:
298
- username = (pickup[0] or {}).get("userName") or ""
299
- if not username:
300
- udata = await fetch_page(
301
- session, f"https://www.pixiv.net/ajax/user/{uid}"
302
- )
303
- username = (udata.get("body") or {}).get("name") or ""
304
- results.append({"user_id": uid, "post_ids": posts, "username": username})
305
- return results
306
 
307
 
308
  async def fetch_page(session, url):
@@ -634,30 +653,31 @@ async def bg_search_task(search_id, url, pages, mode, phpsessid):
634
  await finish_task(search_id)
635
 
636
 
637
- async def bg_user_task(search_id, user_ids, phpsessid):
638
  ACTIVE_TASKS[search_id] = {
639
  "type": "user_search",
640
  "phase": "searching",
641
- "total": len(user_ids),
642
  "done": 0,
643
  }
644
- await discord_notify(f"`{search_id}` started (users)")
645
  try:
646
- results = await pixiv_user_posts(user_ids, phpsessid)
647
- all_post_ids = []
648
- for r in results:
649
- all_post_ids.extend(r["post_ids"])
650
- all_post_ids = list(dict.fromkeys(all_post_ids))
651
- await save_search(search_id, all_post_ids)
652
- already = await get_scanned_post_ids(all_post_ids)
653
- to_scan = [pid for pid in all_post_ids if pid not in already]
 
654
  if to_scan:
655
  ACTIVE_TASKS[search_id].update(
656
  {"phase": "scanning", "total": len(to_scan), "done": 0}
657
  )
658
  await run_scan(to_scan, phpsessid, task_id=search_id, save_live=True)
659
  await discord_notify(
660
- f"`{search_id}` completed - {len(all_post_ids)} posts from {len(user_ids)} users"
661
  )
662
  except Exception as e:
663
  await discord_notify(f"`{search_id}` failed: {e}")
@@ -665,6 +685,12 @@ async def bg_user_task(search_id, user_ids, phpsessid):
665
  await finish_task(search_id)
666
 
667
 
 
 
 
 
 
 
668
  async def bg_scan_task(search_id, post_ids, phpsessid):
669
  ACTIVE_TASKS[search_id] = {
670
  "type": "scan",
@@ -762,11 +788,12 @@ async def submit_search(req: SearchRequest, bg: BackgroundTasks):
762
 
763
  @app.post("/api/submit_users")
764
  async def submit_users(req: UserSearchRequest, bg: BackgroundTasks):
765
- search_id = base26_time()
766
  phpsessid = PHPSESSID
767
- user_ids = [int(u) for u in req.user_ids]
768
- bg.add_task(bg_user_task, search_id, user_ids, phpsessid)
769
- return {"id": search_id, "status": "started"}
 
 
770
 
771
 
772
  @app.post("/api/scan")
@@ -804,10 +831,11 @@ async def list_searches(page: int = 1):
804
  resp = await turso_execute(
805
  [
806
  {
807
- "sql": "SELECT COUNT(*) FROM pi_searches"
808
  },
809
  {
810
- "sql": "SELECT id FROM pi_searches ORDER BY id DESC LIMIT ? OFFSET ?",
 
811
  "args": [
812
  {"type": "integer", "value": str(SEARCH_PAGE_SIZE)},
813
  {"type": "integer", "value": str(offset)},
 
238
 
239
 
240
  async def save_search(search_id, post_ids):
241
+ if not post_ids:
242
+ return
243
  stmt = {
244
  "sql": "INSERT OR REPLACE INTO pi_searches (id, post_ids) VALUES (?, ?)",
245
  "args": [
 
250
  await turso_execute([stmt])
251
 
252
 
253
+ def user_search_id(user_id, username):
254
+ label = str(username or user_id).strip() or str(user_id)
255
+ return f"{base26_time()}_{label}"
256
+
257
+
258
+ async def pixiv_user_name(user_id, session):
259
+ data = await fetch_page(session, f"https://www.pixiv.net/ajax/user/{user_id}")
260
+ body = data.get("body") or {}
261
+ return (body.get("name") or body.get("account") or "").strip()
262
+
263
+
264
+ async def pixiv_user_names(user_ids, phpsessid):
265
+ cookies = {"PHPSESSID": phpsessid}
266
+ async with aiohttp.ClientSession(cookies=cookies, headers=PIXIV_HEADERS) as session:
267
+ async def load_name(uid):
268
+ try:
269
+ return uid, await pixiv_user_name(uid, session)
270
+ except Exception:
271
+ return uid, ""
272
+
273
+ return dict(await asyncio.gather(*(load_name(uid) for uid in user_ids)))
274
+
275
+
276
  async def pixiv_search_live(url, pages, mode, phpsessid, search_id):
277
  keywords = get_search_keywords(url)
278
  api_url = get_search_api_url(url, keywords)
 
307
  return post_ids, keywords, first_url
308
 
309
 
310
+ async def pixiv_user_posts(user_id, phpsessid):
311
  cookies = {"PHPSESSID": phpsessid}
 
312
  async with aiohttp.ClientSession(cookies=cookies, headers=PIXIV_HEADERS) as session:
313
+ data = await fetch_page(
314
+ session, f"https://www.pixiv.net/ajax/user/{user_id}/profile/all"
315
+ )
316
+ body = data.get("body") or {}
317
+ posts = list((body.get("illusts") or {}).keys())
318
+ username = ""
319
+ pickup = body.get("pickup") or []
320
+ if pickup:
321
+ username = (pickup[0] or {}).get("userName") or ""
322
+ if not username:
323
+ username = await pixiv_user_name(user_id, session)
324
+ return {"user_id": user_id, "post_ids": posts, "username": username}
 
 
 
 
 
325
 
326
 
327
  async def fetch_page(session, url):
 
653
  await finish_task(search_id)
654
 
655
 
656
+ async def bg_user_task(search_id, user_id, phpsessid):
657
  ACTIVE_TASKS[search_id] = {
658
  "type": "user_search",
659
  "phase": "searching",
660
+ "total": 1,
661
  "done": 0,
662
  }
663
+ await discord_notify(f"`{search_id}` started (user {user_id})")
664
  try:
665
+ result = await pixiv_user_posts(user_id, phpsessid)
666
+ post_ids = list(dict.fromkeys(result["post_ids"]))
667
+ ACTIVE_TASKS[search_id]["done"] = 1
668
+ if not post_ids:
669
+ await discord_notify(f"`{search_id}` completed - no posts, not saved")
670
+ return
671
+ await save_search(search_id, post_ids)
672
+ already = await get_scanned_post_ids(post_ids)
673
+ to_scan = [pid for pid in post_ids if pid not in already]
674
  if to_scan:
675
  ACTIVE_TASKS[search_id].update(
676
  {"phase": "scanning", "total": len(to_scan), "done": 0}
677
  )
678
  await run_scan(to_scan, phpsessid, task_id=search_id, save_live=True)
679
  await discord_notify(
680
+ f"`{search_id}` completed - {len(post_ids)} posts from user {user_id}"
681
  )
682
  except Exception as e:
683
  await discord_notify(f"`{search_id}` failed: {e}")
 
685
  await finish_task(search_id)
686
 
687
 
688
+ async def bg_user_batch_task(jobs, phpsessid):
689
+ await asyncio.gather(
690
+ *(bg_user_task(search_id, user_id, phpsessid) for search_id, user_id in jobs)
691
+ )
692
+
693
+
694
  async def bg_scan_task(search_id, post_ids, phpsessid):
695
  ACTIVE_TASKS[search_id] = {
696
  "type": "scan",
 
788
 
789
  @app.post("/api/submit_users")
790
  async def submit_users(req: UserSearchRequest, bg: BackgroundTasks):
 
791
  phpsessid = PHPSESSID
792
+ user_ids = list(dict.fromkeys(int(u) for u in req.user_ids))
793
+ names = await pixiv_user_names(user_ids, phpsessid)
794
+ jobs = [(user_search_id(uid, names.get(uid)), uid) for uid in user_ids]
795
+ bg.add_task(bg_user_batch_task, jobs, phpsessid)
796
+ return {"ids": [search_id for search_id, _ in jobs], "status": "started"}
797
 
798
 
799
  @app.post("/api/scan")
 
831
  resp = await turso_execute(
832
  [
833
  {
834
+ "sql": "SELECT COUNT(*) FROM pi_searches WHERE post_ids != '[]'"
835
  },
836
  {
837
+ "sql": "SELECT id FROM pi_searches WHERE post_ids != '[]' "
838
+ "ORDER BY id DESC LIMIT ? OFFSET ?",
839
  "args": [
840
  {"type": "integer", "value": str(SEARCH_PAGE_SIZE)},
841
  {"type": "integer", "value": str(offset)},
frontend/app.js CHANGED
@@ -55,7 +55,8 @@ $("#btn-submit").addEventListener("click", async () => {
55
  })
56
  }
57
  const data = await resp.json()
58
- status.textContent = `Submitted as ${data.id} - you can close this page`
 
59
  status.className = "status-ok"
60
  } catch (e) {
61
  status.textContent = `Error: ${e.message}`
 
55
  })
56
  }
57
  const data = await resp.json()
58
+ const ids = data.ids || [data.id]
59
+ status.textContent = `Submitted as ${ids.join(", ")} - you can close this page`
60
  status.className = "status-ok"
61
  } catch (e) {
62
  status.textContent = `Error: ${e.message}`