q6 commited on
Commit
6566bf2
·
1 Parent(s): 1523025

Hyper Filter

Browse files
Files changed (3) hide show
  1. API/app.py +33 -17
  2. Client/hunt.py +1 -1
  3. Client/t2.py +1 -1
API/app.py CHANGED
@@ -189,33 +189,49 @@ def parse_png_metadata(data):
189
  index += chunk_len + 4
190
  return None
191
 
192
- async def process_post(post_id, session, semaphore):
193
  async with semaphore:
194
- try:
195
- data = await fetch_page(session, f"https://www.pixiv.net/ajax/illust/{post_id}/pages")
196
- image_urls = [page['urls']['original'] for page in data['body'] if 'png' in page['urls']['original']][:20]
197
- for image_url in image_urls:
198
- metadata = await get_exif(image_url, session)
199
- exif_type = determine_exif_type(metadata)
200
- if exif_type not in ['photoshop', 'celsys', None]:
201
- return post_id, image_url
202
- return post_id, None
203
- except Exception as e:
204
- return post_id, None
205
 
206
  @app.post("/pixif")
207
  async def pixif(
208
  items: pixifModel
209
  ):
210
  post_ids = items.post_ids
211
- semaphore = asyncio.Semaphore(100)
212
 
213
  async with aiohttp.ClientSession(cookies=cookies, headers=headers) as session:
214
- tasks = [process_post(post_id, session, semaphore) for post_id in post_ids]
215
- results = await asyncio.gather(*tasks)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
216
 
217
- image_exifs = {post_id: image_url.replace('https://i.pximg.net/img-original/', '', 1) for post_id, image_url in results if image_url}
218
- return image_exifs
219
 
220
  async def generate_zip(posts, session):
221
  zip_buffer = io.BytesIO()
 
189
  index += chunk_len + 4
190
  return None
191
 
192
+ async def process_image(image_dict, session, semaphore):
193
  async with semaphore:
194
+ image_url = image_dict['image_url']
195
+ post_id = image_dict['post_id']
196
+ metadata = await get_exif(image_url, session)
197
+ exif_type = determine_exif_type(metadata)
198
+ if exif_type not in ['photoshop', 'celsys', None]:
199
+ return {'post_id': post_id, 'image_url': image_url}
200
+ return None
201
+
 
 
 
202
 
203
  @app.post("/pixif")
204
  async def pixif(
205
  items: pixifModel
206
  ):
207
  post_ids = items.post_ids
208
+ semaphore = asyncio.Semaphore(200)
209
 
210
  async with aiohttp.ClientSession(cookies=cookies, headers=headers) as session:
211
+ pages_tasks = [fetch_page(session, f"https://www.pixiv.net/ajax/illust/{post_id}/pages") for post_id in post_ids]
212
+ pages_results = await asyncio.gather(*pages_tasks)
213
+
214
+ image_list = []
215
+ for post_id, data in zip(post_ids, pages_results):
216
+ image_urls = [page['urls']['original'] for page in data['body'] if 'png' in page['urls']['original']][:20]
217
+ for image_url in image_urls:
218
+ image_list.append({'post_id': post_id, 'image_url': image_url})
219
+
220
+ image_list.sort(key=lambda x: int(x['post_id']))
221
+
222
+ batch_size = 20
223
+ for i in range(0, len(image_list), batch_size):
224
+ batch = image_list[i:i+batch_size]
225
+ tasks = [process_image(image_dict, session, semaphore) for image_dict in batch]
226
+ results = await asyncio.gather(*tasks)
227
+ for result in results:
228
+ if result:
229
+ post_id = result['post_id']
230
+ image_url = result['image_url']
231
+ image_exifs = {post_id: image_url.replace('https://i.pximg.net/img-original/', '', 1)}
232
+ return image_exifs
233
+ return {}
234
 
 
 
235
 
236
  async def generate_zip(posts, session):
237
  zip_buffer = io.BytesIO()
Client/hunt.py CHANGED
@@ -9,7 +9,7 @@ img_base = 'https://i.pximg.net/img-original/img/'
9
  os.chdir(os.path.dirname(os.path.abspath(__file__)))
10
  os.makedirs("images/Stash", exist_ok=True)
11
 
12
- db = lmdb.open("db", subdir=True, map_size=1048576)
13
  valid = [f for f in os.listdir() if f.endswith(".txt")]
14
 
15
  for idx, file in enumerate(valid):
 
9
  os.chdir(os.path.dirname(os.path.abspath(__file__)))
10
  os.makedirs("images/Stash", exist_ok=True)
11
 
12
+ db = lmdb.open("db", subdir=True, map_size=524288)
13
  valid = [f for f in os.listdir() if f.endswith(".txt")]
14
 
15
  for idx, file in enumerate(valid):
Client/t2.py CHANGED
@@ -4,7 +4,7 @@ import lmdb
4
 
5
  os.chdir(os.path.dirname(os.path.abspath(__file__)))
6
 
7
- db = lmdb.open("db", subdir=True, map_size=1048576)
8
 
9
  # view all in db
10
  with db.begin() as txn:
 
4
 
5
  os.chdir(os.path.dirname(os.path.abspath(__file__)))
6
 
7
+ db = lmdb.open("db", subdir=True, map_size=524288)
8
 
9
  # view all in db
10
  with db.begin() as txn: