q6 commited on
Commit
3b91e61
1 Parent(s): ad65a09
API/app.py CHANGED
@@ -130,10 +130,10 @@ async def process_post(post_id, session, semaphore):
130
  except Exception:
131
  return post_id, None
132
 
133
- async def fetch_image_bytes(session, url, semaphore):
134
  async with semaphore:
135
  async with session.get(url) as response:
136
- return await response.read()
137
 
138
  @app.get("/allimages")
139
  async def all_images(
@@ -204,17 +204,15 @@ async def pixif_zip(items: PixifZipModel, background_tasks: BackgroundTasks):
204
  connector = aiohttp.TCPConnector(limit=20)
205
  semaphore = asyncio.Semaphore(20)
206
  async with aiohttp.ClientSession(headers=headers, connector=connector) as session:
207
- tasks = {}
208
  for post_id, url in downloads.items():
209
  full_url = url if url.startswith("http") else img_base + url
210
- task = asyncio.create_task(fetch_image_bytes(session, full_url, semaphore))
211
- tasks[task] = post_id
212
 
213
  with zipfile.ZipFile(tmp_path, "w", compression=zipfile.ZIP_DEFLATED) as zf:
214
  for task in asyncio.as_completed(tasks):
215
- post_id = tasks[task]
216
  try:
217
- data = await task
218
  except Exception:
219
  continue
220
  if data:
 
130
  except Exception:
131
  return post_id, None
132
 
133
+ async def fetch_image_bytes(session, url, post_id, semaphore):
134
  async with semaphore:
135
  async with session.get(url) as response:
136
+ return post_id, await response.read()
137
 
138
  @app.get("/allimages")
139
  async def all_images(
 
204
  connector = aiohttp.TCPConnector(limit=20)
205
  semaphore = asyncio.Semaphore(20)
206
  async with aiohttp.ClientSession(headers=headers, connector=connector) as session:
207
+ tasks = []
208
  for post_id, url in downloads.items():
209
  full_url = url if url.startswith("http") else img_base + url
210
+ tasks.append(asyncio.create_task(fetch_image_bytes(session, full_url, post_id, semaphore)))
 
211
 
212
  with zipfile.ZipFile(tmp_path, "w", compression=zipfile.ZIP_DEFLATED) as zf:
213
  for task in asyncio.as_completed(tasks):
 
214
  try:
215
+ post_id, data = await task
216
  except Exception:
217
  continue
218
  if data:
Client/Extract Pixiv/ai_search.py CHANGED
@@ -9,7 +9,7 @@ os.chdir(os.path.dirname(os.path.abspath(__file__)))
9
 
10
  input_url = input("Enter the URL: ")
11
 
12
- pages = 1
13
 
14
  x=''
15
  n=int(time.time()*100)
 
9
 
10
  input_url = input("Enter the URL: ")
11
 
12
+ pages = 15
13
 
14
  x=''
15
  n=int(time.time()*100)
Client/Extract Pixiv/pixiv_api.py CHANGED
@@ -18,6 +18,25 @@ headers = {
18
  'referer': 'https://www.pixiv.net/',
19
  }
20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  async def fetch_page(session, url):
22
  async with session.get(url) as response:
23
  data = await response.json()
@@ -35,8 +54,6 @@ async def search(raw, pages, ai_only=True, real_only=True, cookies=None, headers
35
  post_ids = []
36
  tasks = []
37
 
38
- # AI銈ゃ儵銈广儓 OR AI鐢熸垚 OR StableDiffusion OR AI-generated OR NovelAI OR NovelAIDiffusionAI OR AIart
39
- AI_TAGS = 'AI銈ゃ儵銈广儓 AI鐢熸垚 StableDiffusion AI-generated NovelAI NovelAIDiffusionAI AIart AI'.lower().split(' ')
40
  async with aiohttp.ClientSession(cookies=cookies, headers=headers) as session:
41
  for page in range(1, pages + 1):
42
  page_url = f"{url.strip()}&p={page}"
@@ -44,18 +61,12 @@ async def search(raw, pages, ai_only=True, real_only=True, cookies=None, headers
44
  tasks.append(task)
45
 
46
  responses = await asyncio.gather(*tasks)
47
- posts = []
48
  for data in responses:
 
49
  if ai_only:
50
- for post in data['body']['illustManga']['data']:
51
- if post['aiType'] == 2 or any(tag.lower() in AI_TAGS for tag in post['tags']):
52
- posts.append(post)
53
- if real_only:
54
- for post in data['body']['illustManga']['data']:
55
- if post['aiType'] != 2:
56
- posts.append(post)
57
- if not ai_only and not real_only:
58
- posts = data['body']['illustManga']['data']
59
  if not posts:
60
  break
61
  post_ids.extend([post['id'] for post in posts])
@@ -185,4 +196,4 @@ async def get_pixif_data(post_ids):
185
  results = await asyncio.gather(*tasks)
186
 
187
  image_exifs = {post_id: image_url.replace(img_base, '', 1) for post_id, image_url in results if image_url}
188
- return image_exifs
 
18
  'referer': 'https://www.pixiv.net/',
19
  }
20
 
21
+ AI_TAGS = {
22
+ "ai銈ゃ儵銈广儓",
23
+ "ai鐢熸垚",
24
+ "stablediffusion",
25
+ "ai-generated",
26
+ "novelai",
27
+ "novelaidiffusionai",
28
+ "aiart",
29
+ }
30
+
31
+ def is_ai_post(post):
32
+ if post.get("aiType") == 2:
33
+ return True
34
+ tags = post.get("tags") or []
35
+ for tag in tags:
36
+ if isinstance(tag, str) and tag.casefold() in AI_TAGS:
37
+ return True
38
+ return False
39
+
40
  async def fetch_page(session, url):
41
  async with session.get(url) as response:
42
  data = await response.json()
 
54
  post_ids = []
55
  tasks = []
56
 
 
 
57
  async with aiohttp.ClientSession(cookies=cookies, headers=headers) as session:
58
  for page in range(1, pages + 1):
59
  page_url = f"{url.strip()}&p={page}"
 
61
  tasks.append(task)
62
 
63
  responses = await asyncio.gather(*tasks)
 
64
  for data in responses:
65
+ posts = data['body']['illustManga']['data']
66
  if ai_only:
67
+ posts = [post for post in posts if is_ai_post(post)]
68
+ elif real_only:
69
+ posts = [post for post in posts if not is_ai_post(post)]
 
 
 
 
 
 
70
  if not posts:
71
  break
72
  post_ids.extend([post['id'] for post in posts])
 
196
  results = await asyncio.gather(*tasks)
197
 
198
  image_exifs = {post_id: image_url.replace(img_base, '', 1) for post_id, image_url in results if image_url}
199
+ return image_exifs