p / Client /hunt.py
q6's picture
Server zip
ea8db81
import os
import lmdb
import requests
import tempfile
import zipfile
from tqdm import tqdm
local = 0
endpoint = "http://127.0.0.1:7860" if local else "https://q6-p.hf.space"
def read_dotenv_value(path, key):
try:
with open(path, "r") as env_file:
for line in env_file:
line = line.strip()
if not line or line.startswith("#") or "=" not in line:
continue
k, v = line.split("=", 1)
if k == key:
return v
except FileNotFoundError:
return None
return None
def get_phpsessid():
phpsessid = os.getenv("PHPSESSID")
if phpsessid:
return phpsessid
env_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".env"))
phpsessid = read_dotenv_value(env_path, "PHPSESSID")
if phpsessid:
return phpsessid
raise RuntimeError("PHPSESSID is not set in the environment or .env")
phpsessid = get_phpsessid()
os.chdir(os.path.dirname(os.path.abspath(__file__)))
os.makedirs("images/Stash", exist_ok=True)
images_cache = set(os.listdir("images/Stash"))
db = lmdb.open("db", subdir=True, map_size=1048576 * 2)
valid = [f for f in os.listdir() if f.endswith(".txt")]
for idx, file in enumerate(valid):
print(f"{idx + 1}: {file}")
inputs = input("Enter the index of the file: ").split()
indexs = []
for inp in inputs:
if "-" in inp:
start, end = map(int, inp.split("-"))
indexs.extend(range(start - 1, end))
elif inp.isdigit():
indexs.append(int(inp) - 1)
def download_zip(to_download, dest_dir):
response = requests.post(f"{endpoint}/pixif_zip", json={"d": to_download}, stream=True)
response.raise_for_status()
total = int(response.headers.get("Content-Length", 0))
with tempfile.NamedTemporaryFile(delete=False, suffix=".zip") as tmp_file:
tmp_path = tmp_file.name
if total > 0:
with tqdm(total=total, unit="B", unit_scale=True, desc="Downloading zip") as pbar:
for chunk in response.iter_content(chunk_size=1024 * 1024):
if chunk:
tmp_file.write(chunk)
pbar.update(len(chunk))
else:
for chunk in response.iter_content(chunk_size=1024 * 1024):
if chunk:
tmp_file.write(chunk)
with zipfile.ZipFile(tmp_path, "r") as zf:
zf.extractall(dest_dir)
os.remove(tmp_path)
def decode_if_binary(val):
if type(val) is bytes:
return val.decode()
return val
for index in indexs:
group_name = valid[index].rsplit(".", 1)[0]
os.makedirs(f"images/{group_name}", exist_ok=True)
with open(valid[index], "r") as f:
post_ids = f.read().split()
with db.begin(write=True) as txn:
post_ids_dict = {post_id: txn.get(post_id.encode()) for post_id in post_ids}
filtered = [post_id for post_id in post_ids if post_ids_dict[post_id] is None and f"{post_id}.png" not in images_cache]
print(f"Group: {group_name}\nFiltered: {len(filtered)}/{len(post_ids)}")
if filtered:
data = requests.post(f'{endpoint}/pixif', json={"post_ids": filtered, "phpsessid": phpsessid})
data = data.json()
for post_id, url in data.items():
txn.put(post_id.encode(), url.encode())
post_ids_dict[post_id] = url
no_exif = set(filtered) - set(data.keys())
for post_id in no_exif:
txn.put(post_id.encode(), b'')
to_download = {post_id: decode_if_binary(url) for post_id, url in post_ids_dict.items() if url and f"{post_id}.png" not in images_cache}
if to_download:
print(f"Total images to download: {len(to_download)}")
download_zip(to_download, "images/Stash")
images_cache.update(os.listdir("images/Stash"))
print("Linking images to the group directory...")
for i, post_id in enumerate(post_ids):
stash_path = f"images/Stash/{post_id}.png"
dest_path = f"images/{group_name}/{i}_{post_id}.png"
if os.path.exists(stash_path) and not os.path.exists(dest_path):
os.link(stash_path, dest_path)
if not os.listdir(f'images/{group_name}'):
os.rmdir(f"images/{group_name}")
db.close()