q6 commited on
Commit
2ec1e40
·
1 Parent(s): 5ab9102
Files changed (1) hide show
  1. Client/hunt.py +56 -21
Client/hunt.py CHANGED
@@ -1,14 +1,20 @@
1
  import json
2
  import os
3
  import sqlite3
 
 
4
  import requests
5
- import tempfile
6
- import zipfile
7
  from tqdm import tqdm
8
 
9
  local = 0
10
  dry_run = 0
11
  endpoint = "http://127.0.0.1:7860" if local else "https://q6-p.hf.space"
 
 
 
 
 
 
12
 
13
  def read_dotenv_value(path, key):
14
  try:
@@ -129,28 +135,57 @@ for inp in inputs:
129
  elif inp.isdigit():
130
  indexs.append(int(inp) - 1)
131
 
132
- def download_zip(to_download, dest_dir):
133
- response = requests.post(f"{endpoint}/pixif_zip", json={"d": to_download}, stream=True)
134
- response.raise_for_status()
135
- total = int(response.headers.get("Content-Length", 0))
 
 
 
 
 
136
 
137
- with tempfile.NamedTemporaryFile(delete=False, suffix=".zip") as tmp_file:
138
- tmp_path = tmp_file.name
139
- if total > 0:
140
- with tqdm(total=total, unit="B", unit_scale=True, desc="Downloading zip") as pbar:
 
 
 
 
 
 
 
 
141
  for chunk in response.iter_content(chunk_size=1024 * 1024):
142
  if chunk:
143
- tmp_file.write(chunk)
144
- pbar.update(len(chunk))
145
- else:
146
- for chunk in response.iter_content(chunk_size=1024 * 1024):
147
- if chunk:
148
- tmp_file.write(chunk)
149
-
150
- with zipfile.ZipFile(tmp_path, "r") as zf:
151
- zf.extractall(dest_dir)
 
 
 
 
 
152
 
153
- os.remove(tmp_path)
 
 
 
 
 
 
 
 
 
 
 
154
 
155
  def decode_if_binary(val):
156
  if type(val) is bytes:
@@ -185,7 +220,7 @@ for index in indexs:
185
  if to_download:
186
  print(f"Total images to download: {len(to_download)}")
187
  if not dry_run:
188
- download_zip(to_download, "images/Stash")
189
 
190
  images_cache.update(os.listdir("images/Stash"))
191
 
 
1
  import json
2
  import os
3
  import sqlite3
4
+ from concurrent.futures import ThreadPoolExecutor, as_completed
5
+
6
  import requests
 
 
7
  from tqdm import tqdm
8
 
9
  local = 0
10
  dry_run = 0
11
  endpoint = "http://127.0.0.1:7860" if local else "https://q6-p.hf.space"
12
+ IMG_BASE = "https://i.pximg.net/img-original/img/"
13
+ REQUEST_HEADERS = {
14
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:126.0) Gecko/20100101 Firefox/126.0",
15
+ "Referer": "https://www.pixiv.net/",
16
+ }
17
+ MAX_WORKERS = min(16, os.cpu_count() or 8)
18
 
19
  def read_dotenv_value(path, key):
20
  try:
 
135
  elif inp.isdigit():
136
  indexs.append(int(inp) - 1)
137
 
138
+ def build_image_url(url):
139
+ if url.startswith("http"):
140
+ return url
141
+ return IMG_BASE + url
142
+
143
+ def download_one_image(post_id, url, dest_dir, phpsessid):
144
+ dest_path = os.path.join(dest_dir, f"{post_id}.png")
145
+ if os.path.exists(dest_path):
146
+ return post_id, "exists", None
147
 
148
+ tmp_path = dest_path + ".part"
149
+ full_url = build_image_url(url)
150
+ try:
151
+ with requests.get(
152
+ full_url,
153
+ headers=REQUEST_HEADERS,
154
+ cookies={"PHPSESSID": phpsessid},
155
+ stream=True,
156
+ timeout=60,
157
+ ) as response:
158
+ response.raise_for_status()
159
+ with open(tmp_path, "wb") as handle:
160
  for chunk in response.iter_content(chunk_size=1024 * 1024):
161
  if chunk:
162
+ handle.write(chunk)
163
+ os.replace(tmp_path, dest_path)
164
+ return post_id, "ok", None
165
+ except Exception as exc:
166
+ if os.path.exists(tmp_path):
167
+ try:
168
+ os.remove(tmp_path)
169
+ except OSError:
170
+ pass
171
+ return post_id, "error", str(exc)
172
+
173
+ def download_images(to_download, dest_dir, phpsessid, max_workers=MAX_WORKERS):
174
+ if not to_download:
175
+ return
176
 
177
+ os.makedirs(dest_dir, exist_ok=True)
178
+ futures = []
179
+ with ThreadPoolExecutor(max_workers=max_workers) as executor:
180
+ for post_id, url in to_download.items():
181
+ futures.append(executor.submit(download_one_image, post_id, url, dest_dir, phpsessid))
182
+
183
+ with tqdm(total=len(futures), unit="image", desc="Downloading images") as pbar:
184
+ for future in as_completed(futures):
185
+ post_id, status, detail = future.result()
186
+ if status == "error":
187
+ tqdm.write(f"Failed {post_id}: {detail}")
188
+ pbar.update(1)
189
 
190
  def decode_if_binary(val):
191
  if type(val) is bytes:
 
220
  if to_download:
221
  print(f"Total images to download: {len(to_download)}")
222
  if not dry_run:
223
+ download_images(to_download, "images/Stash", phpsessid)
224
 
225
  images_cache.update(os.listdir("images/Stash"))
226