q6 commited on
Commit
7e1d786
·
1 Parent(s): 4ac6760

tar.gz Working

Browse files
API/app.py CHANGED
@@ -1,5 +1,5 @@
1
- from fastapi import FastAPI, Query
2
- from fastapi.responses import StreamingResponse
3
  import aiohttp
4
  import asyncio
5
  import requests.utils
@@ -8,7 +8,7 @@ import os
8
  import io
9
  from pydantic import BaseModel
10
  from typing import List, Dict
11
- import zipfile
12
  from dotenv import load_dotenv
13
 
14
  img_base = 'https://i.pximg.net/img-original/img/'
@@ -224,39 +224,57 @@ async def pixif(
224
  return image_exifs
225
 
226
 
227
- async def generate_zip(posts, session):
228
- zip_buffer = io.BytesIO()
229
- zipf = zipfile.ZipFile(zip_buffer, mode="w", compression=zipfile.ZIP_DEFLATED)
230
 
 
231
  semaphore = asyncio.Semaphore(100)
 
232
 
233
- async def add_to_zip(post_id, image_url):
234
  async with semaphore:
235
  url = f"{img_base}{image_url}"
236
  async with session.get(url) as response:
237
  image_data = await response.read()
238
- image_name = f"{post_id}.png"
239
- zipf.writestr(image_name, image_data)
240
 
241
- tasks = [add_to_zip(post_id, image_url) for post_id, image_url in posts.items()]
242
- await asyncio.gather(*tasks)
243
 
244
- zipf.close()
245
- zip_buffer.seek(0)
246
- return zip_buffer
 
 
 
 
 
 
 
 
 
247
 
248
  @app.post("/download")
249
  async def download(
250
- items: PixifDownloadModel
 
251
  ):
252
  posts = items.posts
253
  async with aiohttp.ClientSession(cookies=cookies, headers=headers) as session:
254
- zip_buffer = await generate_zip(posts, session)
 
 
 
 
 
 
 
255
 
256
- return StreamingResponse(
257
- zip_buffer,
258
- media_type="application/zip",
259
- headers={"Content-Disposition": f"attachment; filename={base26_time()}.zip"}
 
260
  )
261
 
262
  @app.get("/")
 
1
+ from fastapi import FastAPI, Query, BackgroundTasks
2
+ from fastapi.responses import FileResponse
3
  import aiohttp
4
  import asyncio
5
  import requests.utils
 
8
  import io
9
  from pydantic import BaseModel
10
  from typing import List, Dict
11
+ import tarfile
12
  from dotenv import load_dotenv
13
 
14
  img_base = 'https://i.pximg.net/img-original/img/'
 
224
  return image_exifs
225
 
226
 
227
+ import tempfile
228
+ from fastapi import BackgroundTasks
 
229
 
230
+ async def generate_tar_gz(posts, session):
231
  semaphore = asyncio.Semaphore(100)
232
+ images = {}
233
 
234
+ async def fetch_image(post_id, image_url):
235
  async with semaphore:
236
  url = f"{img_base}{image_url}"
237
  async with session.get(url) as response:
238
  image_data = await response.read()
239
+ return post_id, image_data
 
240
 
241
+ tasks = [fetch_image(post_id, image_url) for post_id, image_url in posts.items()]
242
+ results = await asyncio.gather(*tasks)
243
 
244
+ images = {post_id: image_data for post_id, image_data in results}
245
+
246
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".tar.gz")
247
+ with tarfile.open(fileobj=temp_file, mode="w:gz") as tar:
248
+ for post_id, image_data in images.items():
249
+ image_name = f"{post_id}.png"
250
+ file_info = tarfile.TarInfo(name=image_name)
251
+ file_info.size = len(image_data)
252
+ tar.addfile(tarinfo=file_info, fileobj=io.BytesIO(image_data))
253
+
254
+ temp_file.seek(0)
255
+ return temp_file
256
 
257
  @app.post("/download")
258
  async def download(
259
+ items: PixifDownloadModel,
260
+ background_tasks: BackgroundTasks
261
  ):
262
  posts = items.posts
263
  async with aiohttp.ClientSession(cookies=cookies, headers=headers) as session:
264
+ temp_file = await generate_tar_gz(posts, session)
265
+
266
+ filename = f"{base26_time()}.tar.gz"
267
+
268
+ def cleanup_temp_file(file_path):
269
+ os.unlink(file_path)
270
+
271
+ background_tasks.add_task(cleanup_temp_file, temp_file.name)
272
 
273
+ return FileResponse(
274
+ path=temp_file.name,
275
+ media_type="application/gzip",
276
+ filename=filename,
277
+ background=background_tasks
278
  )
279
 
280
  @app.get("/")
Client/Extract Pixiv/ai_search.py CHANGED
@@ -1,7 +1,7 @@
1
  import requests
2
  import os
3
 
4
- local = False
5
  if local:
6
  endpoint = "http://127.0.0.1:7860"
7
  else:
 
1
  import requests
2
  import os
3
 
4
+ local = 0
5
  if local:
6
  endpoint = "http://127.0.0.1:7860"
7
  else:
Client/Extract Pixiv/user.py CHANGED
@@ -2,7 +2,7 @@ import requests
2
  import re
3
  import os
4
 
5
- local = False
6
  if local:
7
  endpoint = "http://127.0.0.1:7860"
8
  else:
 
2
  import re
3
  import os
4
 
5
+ local = 0
6
  if local:
7
  endpoint = "http://127.0.0.1:7860"
8
  else:
Client/hunt.py CHANGED
@@ -2,9 +2,9 @@ import os
2
  import lmdb
3
  import requests
4
  import io
5
- import zipfile
6
 
7
- local = False
8
  if local:
9
  endpoint = "http://127.0.0.1:7860"
10
  else:
@@ -62,12 +62,12 @@ for index in indexs:
62
  to_download = {post_id: url for post_id, url in post_ids.items() if url not in blacklist and f"{post_id}.png" not in images_cache}
63
  print(f"Downloading {len(to_download)} images...")
64
  if to_download:
65
- images_zip = requests.post(f'{endpoint}/download', json={'posts': to_download}, stream=True)
66
 
67
  print("Extracting images...")
68
- with io.BytesIO(images_zip.content) as f:
69
- with zipfile.ZipFile(f, mode='r') as zipf:
70
- zipf.extractall("images/Stash")
71
 
72
 
73
  images_cache = os.listdir("images/Stash")
 
2
  import lmdb
3
  import requests
4
  import io
5
+ import tarfile
6
 
7
+ local = 0
8
  if local:
9
  endpoint = "http://127.0.0.1:7860"
10
  else:
 
62
  to_download = {post_id: url for post_id, url in post_ids.items() if url not in blacklist and f"{post_id}.png" not in images_cache}
63
  print(f"Downloading {len(to_download)} images...")
64
  if to_download:
65
+ images_tar_gz = requests.post(f'{endpoint}/download', json={'posts': to_download}, stream=True)
66
 
67
  print("Extracting images...")
68
+ with io.BytesIO(images_tar_gz.content) as f:
69
+ with tarfile.open(fileobj=f, mode='r:gz') as tarf:
70
+ tarf.extractall("images/Stash")
71
 
72
 
73
  images_cache = os.listdir("images/Stash")