q6 commited on
Commit
76ebf3f
·
1 Parent(s): 433e037
Files changed (1) hide show
  1. Client/dry-hunt.py +222 -0
Client/dry-hunt.py ADDED
@@ -0,0 +1,222 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import concurrent.futures
2
+ import gzip
3
+ import io
4
+ import json
5
+ import os
6
+ import threading
7
+ from typing import Union
8
+
9
+ import numpy as np
10
+ import requests
11
+ from PIL import Image
12
+ from requests.adapters import HTTPAdapter
13
+ from tqdm import tqdm
14
+
15
+
16
+ IMG_BASE = "https://i.pximg.net/img-original/img/"
17
+ USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:126.0) Gecko/20100101 Firefox/126.0"
18
+ DEFAULT_WORKERS = 8
19
+ REQUEST_TIMEOUT = 45
20
+
21
+ thread_local = threading.local()
22
+
23
+
24
+ def read_dotenv_value(path, key):
25
+ try:
26
+ with open(path, "r") as env_file:
27
+ for line in env_file:
28
+ line = line.strip()
29
+ if not line or line.startswith("#") or "=" not in line:
30
+ continue
31
+ k, v = line.split("=", 1)
32
+ if k == key:
33
+ return v
34
+ except FileNotFoundError:
35
+ return None
36
+ return None
37
+
38
+
39
+ def get_phpsessid():
40
+ phpsessid = os.getenv("PHPSESSID")
41
+ if phpsessid:
42
+ return phpsessid
43
+ env_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".env"))
44
+ phpsessid = read_dotenv_value(env_path, "PHPSESSID")
45
+ if phpsessid:
46
+ return phpsessid
47
+ raise RuntimeError("PHPSESSID is not set in the environment or .env")
48
+
49
+
50
+ def byteize(alpha):
51
+ alpha = alpha.T.reshape((-1,))
52
+ alpha = alpha[:(alpha.shape[0] // 8) * 8]
53
+ alpha = np.bitwise_and(alpha, 1)
54
+ alpha = alpha.reshape((-1, 8))
55
+ alpha = np.packbits(alpha, axis=1)
56
+ return alpha
57
+
58
+
59
+ class LSBExtractor:
60
+ def __init__(self, alpha):
61
+ self.data = byteize(alpha)
62
+ self.pos = 0
63
+
64
+ def get_one_byte(self):
65
+ byte = self.data[self.pos]
66
+ self.pos += 1
67
+ return byte
68
+
69
+ def get_next_n_bytes(self, n):
70
+ n_bytes = self.data[self.pos:self.pos + n]
71
+ self.pos += n
72
+ return bytearray(n_bytes)
73
+
74
+ def read_32bit_integer(self):
75
+ bytes_list = self.get_next_n_bytes(4)
76
+ if len(bytes_list) == 4:
77
+ integer_value = int.from_bytes(bytes_list, byteorder="big")
78
+ return integer_value
79
+ return None
80
+
81
+
82
+ def extract_image_metadata(image: Union[Image.Image, np.ndarray]) -> dict:
83
+ if isinstance(image, Image.Image):
84
+ if "A" not in image.getbands():
85
+ raise AssertionError("image format")
86
+ alpha = np.array(image.getchannel("A"))
87
+ else:
88
+ if image.ndim == 3:
89
+ alpha = image[..., -1]
90
+ else:
91
+ alpha = image
92
+ assert alpha.ndim == 2, "image format"
93
+ reader = LSBExtractor(alpha)
94
+ magic = "stealth_pngcomp"
95
+ read_magic = reader.get_next_n_bytes(len(magic)).decode("utf-8")
96
+ assert magic == read_magic, "magic number"
97
+ read_len = reader.read_32bit_integer() // 8
98
+ json_data = reader.get_next_n_bytes(read_len)
99
+ json_data = json.loads(gzip.decompress(json_data).decode("utf-8"))
100
+ if "Comment" in json_data and isinstance(json_data["Comment"], str):
101
+ json_data["Comment"] = json.loads(json_data["Comment"])
102
+ return json_data
103
+
104
+
105
+ def iter_text_files():
106
+ valid = [f for f in os.listdir() if f.endswith(".txt")]
107
+ if not valid:
108
+ print("No .txt files found.")
109
+ return []
110
+ for idx, file in enumerate(valid):
111
+ print(f"{idx + 1}: {file}")
112
+ inputs = input("Enter the index of the file: ").split()
113
+ indexes = []
114
+ for inp in inputs:
115
+ if "-" in inp:
116
+ start, end = map(int, inp.split("-"))
117
+ indexes.extend(range(start - 1, end))
118
+ elif inp.isdigit():
119
+ indexes.append(int(inp) - 1)
120
+ indexes = [idx for idx in sorted(set(indexes)) if 0 <= idx < len(valid)]
121
+ return [valid[idx] for idx in indexes]
122
+
123
+
124
+ def fetch_post_pages(session, post_id):
125
+ url = f"https://www.pixiv.net/ajax/illust/{post_id}/pages"
126
+ response = session.get(url, timeout=REQUEST_TIMEOUT)
127
+ response.raise_for_status()
128
+ data = response.json()
129
+ return data.get("body") or []
130
+
131
+
132
+ def has_stealth_png(session, image_url, post_id):
133
+ headers = {"Referer": f"https://www.pixiv.net/artworks/{post_id}"}
134
+ response = session.get(image_url, headers=headers, timeout=REQUEST_TIMEOUT)
135
+ response.raise_for_status()
136
+ image = Image.open(io.BytesIO(response.content))
137
+ extract_image_metadata(image)
138
+ return True
139
+
140
+
141
+ def find_stealth_page(post_id, phpsessid):
142
+ session = get_thread_session(phpsessid)
143
+ try:
144
+ pages = fetch_post_pages(session, post_id)
145
+ except Exception:
146
+ return None
147
+
148
+ for idx, page in enumerate(pages):
149
+ original = page.get("urls", {}).get("original")
150
+ if not original or not original.lower().endswith(".png"):
151
+ continue
152
+ try:
153
+ if has_stealth_png(session, original, post_id):
154
+ return idx + 1
155
+ except Exception:
156
+ continue
157
+ return None
158
+
159
+
160
+ def build_session(phpsessid):
161
+ session = requests.Session()
162
+ session.headers.update({"User-Agent": USER_AGENT, "Referer": "https://www.pixiv.net/"})
163
+ session.cookies.update({"PHPSESSID": phpsessid})
164
+ adapter = HTTPAdapter(pool_connections=DEFAULT_WORKERS * 2, pool_maxsize=DEFAULT_WORKERS * 2)
165
+ session.mount("https://", adapter)
166
+ session.mount("http://", adapter)
167
+ return session
168
+
169
+
170
+ def get_thread_session(phpsessid):
171
+ session = getattr(thread_local, "session", None)
172
+ if session is None:
173
+ session = build_session(phpsessid)
174
+ thread_local.session = session
175
+ return session
176
+
177
+
178
+ def main() -> int:
179
+ os.chdir(os.path.dirname(os.path.abspath(__file__)))
180
+ try:
181
+ phpsessid = get_phpsessid()
182
+ except Exception as exc:
183
+ print(f"Failed to load PHPSESSID: {exc}")
184
+ return 1
185
+
186
+ files = iter_text_files()
187
+ if not files:
188
+ return 0
189
+
190
+ workers = int(os.getenv("PIXIF_WORKERS", DEFAULT_WORKERS))
191
+
192
+ for filename in files:
193
+ with open(filename, "r") as handle:
194
+ post_ids = handle.read().split()
195
+ if not post_ids:
196
+ continue
197
+
198
+ with concurrent.futures.ThreadPoolExecutor(max_workers=workers) as executor:
199
+ futures = {
200
+ executor.submit(find_stealth_page, post_id, phpsessid): post_id
201
+ for post_id in post_ids
202
+ }
203
+ bar = tqdm(
204
+ concurrent.futures.as_completed(futures),
205
+ total=len(futures),
206
+ desc=f"Scanning {filename}",
207
+ unit="post",
208
+ )
209
+ for future in bar:
210
+ post_id = futures[future]
211
+ try:
212
+ page = future.result()
213
+ except Exception:
214
+ page = None
215
+ if page is not None:
216
+ tqdm.write(f"{post_id} page {page}")
217
+
218
+ return 0
219
+
220
+
221
+ if __name__ == "__main__":
222
+ raise SystemExit(main())