q6 commited on
Commit
ea8db81
·
1 Parent(s): d625ef0

Server zip

Browse files
Files changed (2) hide show
  1. API/app.py +46 -1
  2. Client/hunt.py +25 -13
API/app.py CHANGED
@@ -1,7 +1,11 @@
1
- from fastapi import FastAPI, Query
 
2
  import aiohttp
3
  import asyncio
4
  import time
 
 
 
5
  from pydantic import BaseModel
6
  from typing import List, Dict
7
 
@@ -15,6 +19,9 @@ class pixifModel(BaseModel):
15
  class PixifDownloadModel(BaseModel):
16
  posts: Dict[str, str]
17
 
 
 
 
18
  headers = {
19
  "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:126.0) Gecko/20100101 Firefox/126.0",
20
  'referer': 'https://www.pixiv.net/',
@@ -123,6 +130,11 @@ async def process_post(post_id, session, semaphore):
123
  except Exception:
124
  return post_id, None
125
 
 
 
 
 
 
126
  @app.get("/allimages")
127
  async def all_images(
128
  only_first = Query("0", description="Only fetch the first image of each post."),
@@ -179,6 +191,39 @@ async def pixif(
179
 
180
  return image_exifs
181
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
  @app.get("/")
183
  async def read_root():
184
  return {"message": "Hello, World!"}
 
1
+ from fastapi import FastAPI, Query, BackgroundTasks
2
+ from fastapi.responses import FileResponse
3
  import aiohttp
4
  import asyncio
5
  import time
6
+ import tempfile
7
+ import zipfile
8
+ import os
9
  from pydantic import BaseModel
10
  from typing import List, Dict
11
 
 
19
  class PixifDownloadModel(BaseModel):
20
  posts: Dict[str, str]
21
 
22
+ class PixifZipModel(BaseModel):
23
+ d: Dict[str, str]
24
+
25
  headers = {
26
  "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:126.0) Gecko/20100101 Firefox/126.0",
27
  'referer': 'https://www.pixiv.net/',
 
130
  except Exception:
131
  return post_id, None
132
 
133
+ async def fetch_image_bytes(session, url, semaphore):
134
+ async with semaphore:
135
+ async with session.get(url) as response:
136
+ return await response.read()
137
+
138
  @app.get("/allimages")
139
  async def all_images(
140
  only_first = Query("0", description="Only fetch the first image of each post."),
 
191
 
192
  return image_exifs
193
 
194
+ @app.post("/pixif_zip")
195
+ async def pixif_zip(items: PixifZipModel, background_tasks: BackgroundTasks):
196
+ downloads = items.d
197
+ if not downloads:
198
+ return {"detail": "No downloads requested."}
199
+
200
+ tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".zip")
201
+ tmp_path = tmp_file.name
202
+ tmp_file.close()
203
+
204
+ connector = aiohttp.TCPConnector(limit=20)
205
+ semaphore = asyncio.Semaphore(20)
206
+ async with aiohttp.ClientSession(headers=headers, connector=connector) as session:
207
+ tasks = {}
208
+ for post_id, url in downloads.items():
209
+ full_url = url if url.startswith("http") else img_base + url
210
+ task = asyncio.create_task(fetch_image_bytes(session, full_url, semaphore))
211
+ tasks[task] = post_id
212
+
213
+ with zipfile.ZipFile(tmp_path, "w", compression=zipfile.ZIP_DEFLATED) as zf:
214
+ for task in asyncio.as_completed(tasks):
215
+ post_id = tasks[task]
216
+ try:
217
+ data = await task
218
+ except Exception:
219
+ continue
220
+ if data:
221
+ zf.writestr(f"{post_id}.png", data)
222
+
223
+ background_tasks.add_task(os.remove, tmp_path)
224
+ filename = f"pixif_{base26_time()}.zip"
225
+ return FileResponse(tmp_path, media_type="application/zip", filename=filename)
226
+
227
  @app.get("/")
228
  async def read_root():
229
  return {"message": "Hello, World!"}
Client/hunt.py CHANGED
@@ -1,12 +1,12 @@
1
  import os
2
  import lmdb
3
  import requests
4
- from concurrent.futures import ThreadPoolExecutor
 
5
  from tqdm import tqdm
6
 
7
  local = 0
8
  endpoint = "http://127.0.0.1:7860" if local else "https://q6-p.hf.space"
9
- img_base = 'https://i.pximg.net/img-original/img/'
10
 
11
  def read_dotenv_value(path, key):
12
  try:
@@ -54,15 +54,28 @@ for inp in inputs:
54
  elif inp.isdigit():
55
  indexs.append(int(inp) - 1)
56
 
57
- def download_image(args):
58
- post_id, url = args
59
- response = requests.get(img_base + url, headers={
60
- "Referer": "https://www.pixiv.net/",
61
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:126.0) Gecko/20100101 Firefox/126.0"
62
- })
63
- with open(f"images/Stash/{post_id}.png", "wb") as img_file:
64
- img_file.write(response.content)
65
- return post_id, True
 
 
 
 
 
 
 
 
 
 
 
 
 
66
 
67
  def decode_if_binary(val):
68
  if type(val) is bytes:
@@ -94,8 +107,7 @@ for index in indexs:
94
 
95
  if to_download:
96
  print(f"Total images to download: {len(to_download)}")
97
- with ThreadPoolExecutor(max_workers=8) as executor:
98
- list(tqdm(executor.map(download_image, to_download.items()), total=len(to_download), desc="Downloading"))
99
 
100
  images_cache.update(os.listdir("images/Stash"))
101
 
 
1
  import os
2
  import lmdb
3
  import requests
4
+ import tempfile
5
+ import zipfile
6
  from tqdm import tqdm
7
 
8
  local = 0
9
  endpoint = "http://127.0.0.1:7860" if local else "https://q6-p.hf.space"
 
10
 
11
  def read_dotenv_value(path, key):
12
  try:
 
54
  elif inp.isdigit():
55
  indexs.append(int(inp) - 1)
56
 
57
+ def download_zip(to_download, dest_dir):
58
+ response = requests.post(f"{endpoint}/pixif_zip", json={"d": to_download}, stream=True)
59
+ response.raise_for_status()
60
+ total = int(response.headers.get("Content-Length", 0))
61
+
62
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".zip") as tmp_file:
63
+ tmp_path = tmp_file.name
64
+ if total > 0:
65
+ with tqdm(total=total, unit="B", unit_scale=True, desc="Downloading zip") as pbar:
66
+ for chunk in response.iter_content(chunk_size=1024 * 1024):
67
+ if chunk:
68
+ tmp_file.write(chunk)
69
+ pbar.update(len(chunk))
70
+ else:
71
+ for chunk in response.iter_content(chunk_size=1024 * 1024):
72
+ if chunk:
73
+ tmp_file.write(chunk)
74
+
75
+ with zipfile.ZipFile(tmp_path, "r") as zf:
76
+ zf.extractall(dest_dir)
77
+
78
+ os.remove(tmp_path)
79
 
80
  def decode_if_binary(val):
81
  if type(val) is bytes:
 
107
 
108
  if to_download:
109
  print(f"Total images to download: {len(to_download)}")
110
+ download_zip(to_download, "images/Stash")
 
111
 
112
  images_cache.update(os.listdir("images/Stash"))
113