q6 commited on
Commit
a3d8f57
·
1 Parent(s): 06706ca
Files changed (3) hide show
  1. API/app.py +21 -2
  2. Client/Extract Pixiv/user.py +4 -2
  3. Client/hunt.py +1 -1
API/app.py CHANGED
@@ -194,13 +194,32 @@ async def process_post(post_id, session, semaphore):
194
  data = await fetch_page(session, f"https://www.pixiv.net/ajax/illust/{post_id}/pages")
195
  image_urls = [page['urls']['original'] for page in data['body'] if 'png' in page['urls']['original']]
196
 
197
- tasks = [get_exif(image_url, session) for image_url in image_urls]
198
- exif_data_list = await asyncio.gather(*tasks)
 
 
 
 
 
 
199
 
 
 
 
 
 
 
 
 
 
 
 
 
200
  for image_url, metadata in zip(image_urls, exif_data_list):
201
  exif_type = determine_exif_type(metadata)
202
  if exif_type not in ['photoshop', 'celsys', None]:
203
  return post_id, image_url
 
204
  return post_id, None
205
  except:
206
  return post_id, None
 
194
  data = await fetch_page(session, f"https://www.pixiv.net/ajax/illust/{post_id}/pages")
195
  image_urls = [page['urls']['original'] for page in data['body'] if 'png' in page['urls']['original']]
196
 
197
+ initial_chunks = [
198
+ (0, 1),
199
+ (1, 6),
200
+ (6, 10),
201
+ (10, 21),
202
+ (21, 31),
203
+ (31, 41),
204
+ ]
205
 
206
+ chunks = initial_chunks[:]
207
+ start = 41
208
+ while start < len(image_urls):
209
+ end = min(start + 10, len(image_urls))
210
+ chunks.append((start, end))
211
+ start = end
212
+
213
+ exif_data_list = []
214
+ for s, e in chunks:
215
+ chunk_tasks = [get_exif(image_urls[i], session) for i in range(s, e)]
216
+ exif_data_list.extend(await asyncio.gather(*chunk_tasks))
217
+
218
  for image_url, metadata in zip(image_urls, exif_data_list):
219
  exif_type = determine_exif_type(metadata)
220
  if exif_type not in ['photoshop', 'celsys', None]:
221
  return post_id, image_url
222
+
223
  return post_id, None
224
  except:
225
  return post_id, None
Client/Extract Pixiv/user.py CHANGED
@@ -11,8 +11,10 @@ else:
11
  os.chdir(os.path.dirname(os.path.abspath(__file__)))
12
 
13
  inp = input("Enter User IDs (separated by spaces or commas): ")
14
- while inp != "":
15
- inp += input()
 
 
16
  user_ids = re.findall(r"\d+", inp)
17
 
18
  user_ids = [int(uid) for uid in user_ids]
 
11
  os.chdir(os.path.dirname(os.path.abspath(__file__)))
12
 
13
  inp = input("Enter User IDs (separated by spaces or commas): ")
14
+ inps='1'
15
+ while inps != "":
16
+ inps = input('')
17
+ inp += inps
18
  user_ids = re.findall(r"\d+", inp)
19
 
20
  user_ids = [int(uid) for uid in user_ids]
Client/hunt.py CHANGED
@@ -18,7 +18,7 @@ os.makedirs("images/Stash", exist_ok=True)
18
 
19
  images_cache = os.listdir("images/Stash")
20
 
21
- db = lmdb.open("db", subdir=True, map_size=524288)
22
  valid = [f for f in os.listdir() if f.endswith(".txt")]
23
 
24
  for idx, file in enumerate(valid):
 
18
 
19
  images_cache = os.listdir("images/Stash")
20
 
21
+ db = lmdb.open("db", subdir=True, map_size=1048576)
22
  valid = [f for f in os.listdir() if f.endswith(".txt")]
23
 
24
  for idx, file in enumerate(valid):