q6 commited on
Commit
b165f5f
·
1 Parent(s): 4e1b77d
API/app.py CHANGED
@@ -1,5 +1,6 @@
1
- from fastapi import FastAPI
2
  import aiohttp
 
3
  import asyncio
4
  import time
5
  import uvicorn
@@ -26,11 +27,47 @@ headers = {
26
 
27
  app = FastAPI()
28
 
 
29
  async def fetch_page(session, url):
30
  async with session.get(url) as response:
31
  data = await response.json()
32
  return data
33
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  def base26(n):
35
  if n == 0:
36
  return "A"
@@ -45,6 +82,56 @@ def base26(n):
45
  def base26_time():
46
  return base26(int(time.time()))
47
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  def determine_exif_type(metadata):
49
  if metadata is None:
50
  return None
 
1
+ from fastapi import FastAPI, Query
2
  import aiohttp
3
+ import requests
4
  import asyncio
5
  import time
6
  import uvicorn
 
27
 
28
  app = FastAPI()
29
 
30
+
31
  async def fetch_page(session, url):
32
  async with session.get(url) as response:
33
  data = await response.json()
34
  return data
35
 
36
+ async def search(raw, pages, ai_only=True, cookies=None, headers=None):
37
+ keywords = raw.split('tags/')[-1].split('/')[0]
38
+ url = f"https://www.pixiv.net/ajax/search/artworks/{keywords}?word={keywords}"
39
+ if "?" in raw:
40
+ params = raw.split('?')[1]
41
+ url += f"&{params}"
42
+ if "s_mode" not in url:
43
+ url += "&s_mode=s_tag_full"
44
+
45
+ post_ids = []
46
+ tasks = []
47
+ prev_first_id = None
48
+
49
+ async with aiohttp.ClientSession(cookies=cookies, headers=headers) as session:
50
+ for page in range(1, pages + 1):
51
+ page_url = f"{url}&p={page}"
52
+ task = fetch_page(session, page_url)
53
+ tasks.append(task)
54
+
55
+ responses = await asyncio.gather(*tasks)
56
+ for data in responses:
57
+ if ai_only:
58
+ posts = [post for post in data['body']['illustManga']['data'] if post['aiType'] == 2]
59
+ else:
60
+ posts = data['body']['illustManga']['data']
61
+ if not posts:
62
+ break
63
+ current_first_id = posts[0]['id']
64
+ if prev_first_id and current_first_id == prev_first_id:
65
+ break
66
+ prev_first_id = current_first_id
67
+ post_ids.extend([post['id'] for post in posts])
68
+
69
+ return post_ids, requests.utils.unquote(keywords, encoding='utf-8')
70
+
71
  def base26(n):
72
  if n == 0:
73
  return "A"
 
82
  def base26_time():
83
  return base26(int(time.time()))
84
 
85
+ @app.get("/search")
86
+ async def search_endpoint(
87
+ raw: str = Query(..., description="The raw URL to search."),
88
+ pages: int = Query(1, description="Number of pages to fetch."),
89
+ ai_only: bool = Query(True, description="Filter for AI-generated content.")
90
+ ):
91
+ try:
92
+ post_ids, keywords = await search(raw, pages, ai_only, cookies=cookies, headers=headers)
93
+ return {"post_ids": post_ids, "filename": base26_time() + "_" + keywords}
94
+ except Exception as e:
95
+ return {"error": str(e)}
96
+
97
+ @app.get("/user")
98
+ async def user(
99
+ user_id: int = Query(..., description="The user ID to fetch.")
100
+ ):
101
+ async with aiohttp.ClientSession(cookies=cookies, headers=headers) as session:
102
+ data = await fetch_page(session, f'https://www.pixiv.net/ajax/user/{user_id}/profile/all')
103
+ posts = data["body"]["illusts"].keys()
104
+ try:
105
+ username = data['body']['pickup'][0]['userName']
106
+ except (KeyError, IndexError):
107
+ username = await fetch_page(session, f"https://www.pixiv.net/ajax/user/{user_id}")['body']['name']
108
+
109
+ return {"post_ids": list(posts), "filename": base26_time() + "_" + username.replace("|", "")}
110
+
111
+ @app.get("/users")
112
+ async def users(
113
+ user_ids: List[int] = Query(..., description="List of user IDs to fetch.", alias="user_ids")
114
+ ):
115
+ async def fetch_user_data(session, uid):
116
+ try:
117
+ data = await fetch_page(session, f'https://www.pixiv.net/ajax/user/{uid}/profile/all')
118
+ posts = list(data["body"]["illusts"].keys())
119
+ try:
120
+ username = data['body']['pickup'][0]['userName']
121
+ except (KeyError, IndexError):
122
+ user_data = await fetch_page(session, f"https://www.pixiv.net/ajax/user/{uid}")
123
+ username = user_data['body']['name']
124
+ filename = base26_time() + "_" + username.replace("|", "")
125
+ return {"post_ids": posts, "filename": filename}
126
+ except Exception as e:
127
+ return {"user_id": uid, "error": str(e)}
128
+
129
+ async with aiohttp.ClientSession(cookies=cookies, headers=headers) as session:
130
+ tasks = [fetch_user_data(session, uid) for uid in user_ids]
131
+ results = await asyncio.gather(*tasks)
132
+
133
+ return results
134
+
135
  def determine_exif_type(metadata):
136
  if metadata is None:
137
  return None
Client/Extract Pixiv/ai_search.py CHANGED
@@ -13,7 +13,7 @@ params = {
13
  'ai_only': True,
14
  }
15
 
16
- response = requests.get('http://127.0.0.1:8000/search', params=params)
17
 
18
  data = response.json()
19
 
 
13
  'ai_only': True,
14
  }
15
 
16
+ response = requests.get('https://q6-p.hf.space/search', params=params)
17
 
18
  data = response.json()
19
 
Client/Extract Pixiv/user.py CHANGED
@@ -10,7 +10,7 @@ def main():
10
 
11
  user_ids = [int(uid) for uid in user_ids]
12
 
13
- response = requests.get('http://127.0.0.1:8000/users', params={'user_ids': user_ids})
14
  response.raise_for_status()
15
  data = response.json()
16
 
 
10
 
11
  user_ids = [int(uid) for uid in user_ids]
12
 
13
+ response = requests.get('https://q6-p.hf.space/users', params={'user_ids': user_ids})
14
  response.raise_for_status()
15
  data = response.json()
16
 
Client/hunt.py CHANGED
@@ -8,7 +8,7 @@ os.chdir(os.path.dirname(os.path.abspath(__file__)))
8
  db = lmdb.open("db", subdir=True, map_size=10485760)
9
 
10
  valid = [f for f in os.listdir() if f.endswith(".txt")]
11
- print(valid)
12
  for idx, file in enumerate(valid):
13
  print(f"{idx + 1}: {file}")
14
 
@@ -30,6 +30,6 @@ for index in indexs:
30
  with db.begin(write=False) as txn:
31
  post_ids = [post_id for post_id in post_ids if not txn.get(post_id.encode())]
32
 
33
- data = requests.post('http://127.0.0.1:8000/pixif', json={"post_ids": post_ids}).json()
34
 
35
  print(data)
 
8
  db = lmdb.open("db", subdir=True, map_size=10485760)
9
 
10
  valid = [f for f in os.listdir() if f.endswith(".txt")]
11
+
12
  for idx, file in enumerate(valid):
13
  print(f"{idx + 1}: {file}")
14
 
 
30
  with db.begin(write=False) as txn:
31
  post_ids = [post_id for post_id in post_ids if not txn.get(post_id.encode())]
32
 
33
+ data = requests.post('https://q6-p.hf.space/pixif', json={"post_ids": post_ids}).json()
34
 
35
  print(data)