q6 commited on
Commit
134abb3
·
1 Parent(s): c2e82c1
API/app.py CHANGED
@@ -10,6 +10,8 @@ from pydantic import BaseModel
10
  from typing import List, Dict
11
  import tarfile
12
  from dotenv import load_dotenv
 
 
13
 
14
  img_base = 'https://i.pximg.net/img-original/img/'
15
 
@@ -195,20 +197,17 @@ async def process_post(post_id, session, semaphore):
195
  data = await fetch_page(session, f"https://www.pixiv.net/ajax/illust/{post_id}/pages")
196
  image_urls = [page['urls']['original'] for page in data['body'] if 'png' in page['urls']['original']][:20]
197
 
198
- # Create tasks to fetch EXIF data for all image URLs
199
  tasks = [get_exif(image_url, session) for image_url in image_urls]
200
  exif_data_list = await asyncio.gather(*tasks)
201
 
202
- # Process EXIF data to find the earliest matching image per post ID
203
  for image_url, metadata in zip(image_urls, exif_data_list):
204
  exif_type = determine_exif_type(metadata)
205
  if exif_type not in ['photoshop', 'celsys', None]:
206
  return post_id, image_url
207
  return post_id, None
208
- except Exception as e:
209
  return post_id, None
210
 
211
-
212
  @app.post("/pixif")
213
  async def pixif(
214
  items: pixifModel
@@ -223,11 +222,7 @@ async def pixif(
223
  image_exifs = {post_id: image_url.replace(img_base, '', 1) for post_id, image_url in results if image_url}
224
  return image_exifs
225
 
226
-
227
- import tempfile
228
- from fastapi import BackgroundTasks
229
-
230
- async def generate_tar_gz(posts, session):
231
  semaphore = asyncio.Semaphore(1000)
232
  images = {}
233
 
@@ -240,41 +235,42 @@ async def generate_tar_gz(posts, session):
240
 
241
  tasks = [fetch_image(post_id, image_url) for post_id, image_url in posts.items()]
242
  results = await asyncio.gather(*tasks)
243
-
244
  images = {post_id: image_data for post_id, image_data in results}
245
 
246
- temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".tar.gz")
247
- with tarfile.open(fileobj=temp_file, mode="w:gz") as tar:
 
248
  for post_id, image_data in images.items():
249
  image_name = f"{post_id}.png"
250
  file_info = tarfile.TarInfo(name=image_name)
251
  file_info.size = len(image_data)
252
  tar.addfile(tarinfo=file_info, fileobj=io.BytesIO(image_data))
 
 
 
 
 
253
 
 
 
 
254
  temp_file.seek(0)
255
  return temp_file
256
 
257
  @app.post("/download")
258
  async def download(
259
  items: PixifDownloadModel,
260
- background_tasks: BackgroundTasks
261
  ):
262
  posts = items.posts
263
  async with aiohttp.ClientSession(cookies=cookies, headers=headers) as session:
264
- temp_file = await generate_tar_gz(posts, session)
265
-
266
- filename = f"{base26_time()}.tar.gz"
267
-
268
- def cleanup_temp_file(file_path):
269
- os.unlink(file_path)
270
 
271
- background_tasks.add_task(cleanup_temp_file, temp_file.name)
272
 
273
  return FileResponse(
274
  path=temp_file.name,
275
- media_type="application/gzip",
276
- filename=filename,
277
- background=background_tasks
278
  )
279
 
280
  @app.get("/")
 
10
  from typing import List, Dict
11
  import tarfile
12
  from dotenv import load_dotenv
13
+ import tempfile
14
+ import zstandard as zstd
15
 
16
  img_base = 'https://i.pximg.net/img-original/img/'
17
 
 
197
  data = await fetch_page(session, f"https://www.pixiv.net/ajax/illust/{post_id}/pages")
198
  image_urls = [page['urls']['original'] for page in data['body'] if 'png' in page['urls']['original']][:20]
199
 
 
200
  tasks = [get_exif(image_url, session) for image_url in image_urls]
201
  exif_data_list = await asyncio.gather(*tasks)
202
 
 
203
  for image_url, metadata in zip(image_urls, exif_data_list):
204
  exif_type = determine_exif_type(metadata)
205
  if exif_type not in ['photoshop', 'celsys', None]:
206
  return post_id, image_url
207
  return post_id, None
208
+ except:
209
  return post_id, None
210
 
 
211
  @app.post("/pixif")
212
  async def pixif(
213
  items: pixifModel
 
222
  image_exifs = {post_id: image_url.replace(img_base, '', 1) for post_id, image_url in results if image_url}
223
  return image_exifs
224
 
225
+ async def generate_zstd_archive(posts, session):
 
 
 
 
226
  semaphore = asyncio.Semaphore(1000)
227
  images = {}
228
 
 
235
 
236
  tasks = [fetch_image(post_id, image_url) for post_id, image_url in posts.items()]
237
  results = await asyncio.gather(*tasks)
 
238
  images = {post_id: image_data for post_id, image_data in results}
239
 
240
+ # Create a tar in memory
241
+ tar_buffer = io.BytesIO()
242
+ with tarfile.open(fileobj=tar_buffer, mode="w") as tar:
243
  for post_id, image_data in images.items():
244
  image_name = f"{post_id}.png"
245
  file_info = tarfile.TarInfo(name=image_name)
246
  file_info.size = len(image_data)
247
  tar.addfile(tarinfo=file_info, fileobj=io.BytesIO(image_data))
248
+ tar_buffer.seek(0)
249
+
250
+ # Compress with zstd at level 3
251
+ cctx = zstd.ZstdCompressor(level=3)
252
+ compressed = cctx.compress(tar_buffer.read())
253
 
254
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".zstd")
255
+ temp_file.write(compressed)
256
+ temp_file.flush()
257
  temp_file.seek(0)
258
  return temp_file
259
 
260
  @app.post("/download")
261
  async def download(
262
  items: PixifDownloadModel,
 
263
  ):
264
  posts = items.posts
265
  async with aiohttp.ClientSession(cookies=cookies, headers=headers) as session:
266
+ temp_file = await generate_zstd_archive(posts, session)
 
 
 
 
 
267
 
268
+ filename = f"{base26_time()}.zstd"
269
 
270
  return FileResponse(
271
  path=temp_file.name,
272
+ media_type="application/zstd",
273
+ filename=filename
 
274
  )
275
 
276
  @app.get("/")
Client/Extract Pixiv/ai_search.py CHANGED
@@ -11,7 +11,7 @@ os.chdir(os.path.dirname(os.path.abspath(__file__)))
11
 
12
  input_url = input("Enter the URL: ")
13
 
14
- pages = 600 // 60
15
 
16
  params = {
17
  'raw': input_url,
 
11
 
12
  input_url = input("Enter the URL: ")
13
 
14
+ pages = 1200 // 60
15
 
16
  params = {
17
  'raw': input_url,
Client/hunt.py CHANGED
@@ -3,6 +3,7 @@ import lmdb
3
  import requests
4
  import io
5
  import tarfile
 
6
 
7
  local = 0
8
  if local:
@@ -62,14 +63,18 @@ for index in indexs:
62
  to_download = {post_id: url for post_id, url in post_ids.items() if url not in blacklist and f"{post_id}.png" not in images_cache}
63
  print(f"Downloading {len(to_download)} images...")
64
  if to_download:
65
- images_tar_gz = requests.post(f'{endpoint}/download', json={'posts': to_download}, stream=True)
66
 
67
- print("Extracting images...")
68
- with io.BytesIO(images_tar_gz.content) as f:
69
- with tarfile.open(fileobj=f, mode='r:gz') as tarf:
 
 
 
 
 
70
  tarf.extractall("images/Stash")
71
 
72
-
73
  images_cache = os.listdir("images/Stash")
74
  print("Linking images...")
75
  for i, post_id in enumerate(post_ids.keys()):
 
3
  import requests
4
  import io
5
  import tarfile
6
+ import zstandard
7
 
8
  local = 0
9
  if local:
 
63
  to_download = {post_id: url for post_id, url in post_ids.items() if url not in blacklist and f"{post_id}.png" not in images_cache}
64
  print(f"Downloading {len(to_download)} images...")
65
  if to_download:
66
+ images_zstd = requests.post(f'{endpoint}/download', json={'posts': to_download}, stream=True)
67
 
68
+ # Decompress zstd
69
+ print("Decompressing zstd...")
70
+ dctx = zstandard.ZstdDecompressor()
71
+ decompressed = dctx.decompress(images_zstd.content)
72
+
73
+ print("Extracting images from tar...")
74
+ with io.BytesIO(decompressed) as f:
75
+ with tarfile.open(fileobj=f, mode='r:') as tarf:
76
  tarf.extractall("images/Stash")
77
 
 
78
  images_cache = os.listdir("images/Stash")
79
  print("Linking images...")
80
  for i, post_id in enumerate(post_ids.keys()):
Client/show_all.py CHANGED
@@ -1,13 +1,36 @@
1
  import os
2
  import lmdb
3
 
4
-
5
  os.chdir(os.path.dirname(os.path.abspath(__file__)))
6
 
7
  db = lmdb.open("db", subdir=True, map_size=524288)
8
 
9
- # view all in db
 
10
  with db.begin() as txn:
11
- for key, value in txn.cursor():
 
 
12
  if value != b'\x00':
13
- print(key.decode(), value.decode().split('p')[1].split('.')[0], end=' ')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
  import lmdb
3
 
 
4
  os.chdir(os.path.dirname(os.path.abspath(__file__)))
5
 
6
  db = lmdb.open("db", subdir=True, map_size=524288)
7
 
8
+ items = []
9
+
10
  with db.begin() as txn:
11
+ cursor = txn.cursor()
12
+
13
+ for key, value in cursor:
14
  if value != b'\x00':
15
+ try:
16
+ key_str = key.decode('utf-8')
17
+
18
+ value_str = value.decode('utf-8')
19
+
20
+ int_part_str = value_str.split('p')[1].split('.')[0]
21
+
22
+ int_part = int(int_part_str)
23
+
24
+ items.append((key_str, int_part))
25
+
26
+ except (IndexError, ValueError) as e:
27
+ print(f"Skipping key {key} due to parsing error: {e}")
28
+ continue
29
+
30
+ sorted_items = sorted(items, key=lambda item: item[1])
31
+
32
+
33
+ for key, int_value in sorted_items:
34
+ print(f"{key} {int_value}", end=' ')
35
+
36
+ print()
Dockerfile CHANGED
@@ -8,7 +8,7 @@ ENV PYTHONDONTWRITEBYTECODE=1
8
  ENV PYTHONUNBUFFERED=1
9
  WORKDIR /app
10
 
11
- RUN pip install --no-cache-dir fastapi aiohttp uvicorn python-dotenv pydantic requests aiofiles
12
 
13
  COPY --chown=user ./API /app
14
  CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
 
8
  ENV PYTHONUNBUFFERED=1
9
  WORKDIR /app
10
 
11
+ RUN pip install --no-cache-dir fastapi aiohttp uvicorn python-dotenv pydantic requests zstandard
12
 
13
  COPY --chown=user ./API /app
14
  CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]