File size: 5,442 Bytes
4e1b77d a1a9773 4e1b77d ea8db81 ae9610b 4e1b77d 7e8faf0 82901f0 4e1b77d bd75875 4e1b77d 45d40ce 4e1b77d 82901f0 300c794 a1a9773 b165f5f 82901f0 4e1b77d 82901f0 ea81c1a 4e1b77d 82901f0 4e1b77d ea81c1a 82901f0 4e1b77d ea8db81 92ca3c6 82901f0 37f5e8d 82901f0 ea81c1a e948e33 0820d3a e948e33 82901f0 950bead a1a9773 950bead 82901f0 ea8db81 92ca3c6 82901f0 92ca3c6 82901f0 947856c 11da7cf 82901f0 950bead c32dbf5 a1a9773 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 |
import os
import sqlite3
import requests
import tempfile
import zipfile
from tqdm import tqdm
local = 0
endpoint = "http://127.0.0.1:7860" if local else "https://q6-p.hf.space"
def read_dotenv_value(path, key):
try:
with open(path, "r") as env_file:
for line in env_file:
line = line.strip()
if not line or line.startswith("#") or "=" not in line:
continue
k, v = line.split("=", 1)
if k == key:
return v
except FileNotFoundError:
return None
return None
def get_phpsessid():
phpsessid = os.getenv("PHPSESSID")
if phpsessid:
return phpsessid
env_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".env"))
phpsessid = read_dotenv_value(env_path, "PHPSESSID")
if phpsessid:
return phpsessid
raise RuntimeError("PHPSESSID is not set in the environment or .env")
phpsessid = get_phpsessid()
os.chdir(os.path.dirname(os.path.abspath(__file__)))
os.makedirs("images/Stash", exist_ok=True)
images_cache = set(os.listdir("images/Stash"))
DB_PATH = "db.sqlite"
def open_db(path):
conn = sqlite3.connect(path)
conn.execute(
"""
CREATE TABLE IF NOT EXISTS pixif_cache (
post_id TEXT PRIMARY KEY,
url TEXT
)
"""
)
conn.commit()
return conn
def chunked(seq, size):
for i in range(0, len(seq), size):
yield seq[i:i + size]
def fetch_cached_urls(conn, post_ids):
post_ids_dict = {post_id: None for post_id in post_ids}
if not post_ids:
return post_ids_dict
for chunk in chunked(post_ids, 900):
placeholders = ",".join("?" for _ in chunk)
query = f"SELECT post_id, COALESCE(url, '') FROM pixif_cache WHERE post_id IN ({placeholders})"
for post_id, url in conn.execute(query, chunk):
post_ids_dict[post_id] = url
return post_ids_dict
def upsert_urls(conn, rows):
if not rows:
return
conn.executemany(
"""
INSERT INTO pixif_cache (post_id, url)
VALUES (?, ?)
ON CONFLICT(post_id) DO UPDATE SET url = excluded.url
""",
rows,
)
conn = open_db(DB_PATH)
valid = [f for f in os.listdir() if f.endswith(".txt")]
for idx, file in enumerate(valid):
print(f"{idx + 1}: {file}")
inputs = input("Enter the index of the file: ").split()
indexs = []
for inp in inputs:
if "-" in inp:
start, end = map(int, inp.split("-"))
indexs.extend(range(start - 1, end))
elif inp.isdigit():
indexs.append(int(inp) - 1)
def download_zip(to_download, dest_dir):
response = requests.post(f"{endpoint}/pixif_zip", json={"d": to_download}, stream=True)
response.raise_for_status()
total = int(response.headers.get("Content-Length", 0))
with tempfile.NamedTemporaryFile(delete=False, suffix=".zip") as tmp_file:
tmp_path = tmp_file.name
if total > 0:
with tqdm(total=total, unit="B", unit_scale=True, desc="Downloading zip") as pbar:
for chunk in response.iter_content(chunk_size=1024 * 1024):
if chunk:
tmp_file.write(chunk)
pbar.update(len(chunk))
else:
for chunk in response.iter_content(chunk_size=1024 * 1024):
if chunk:
tmp_file.write(chunk)
with zipfile.ZipFile(tmp_path, "r") as zf:
zf.extractall(dest_dir)
os.remove(tmp_path)
def decode_if_binary(val):
if type(val) is bytes:
return val.decode()
return val
for index in indexs:
group_name = valid[index].rsplit(".", 1)[0]
os.makedirs(f"images/{group_name}", exist_ok=True)
with open(valid[index], "r") as f:
post_ids = f.read().split()
post_ids_dict = fetch_cached_urls(conn, post_ids)
filtered = [
post_id
for post_id in post_ids
if post_ids_dict[post_id] is None and f"{post_id}.png" not in images_cache
]
print(f"Group: {group_name}\nFiltered: {len(filtered)}/{len(post_ids)}")
if filtered:
data = requests.post(f'{endpoint}/pixif', json={"post_ids": filtered, "phpsessid": phpsessid})
data = data.json()
rows = [(post_id, url) for post_id, url in data.items()]
no_exif = set(filtered) - set(data.keys())
rows.extend((post_id, "") for post_id in no_exif)
with conn:
upsert_urls(conn, rows)
for post_id, url in data.items():
post_ids_dict[post_id] = url
for post_id in no_exif:
post_ids_dict[post_id] = ""
to_download = {post_id: decode_if_binary(url) for post_id, url in post_ids_dict.items() if url and f"{post_id}.png" not in images_cache}
if to_download:
print(f"Total images to download: {len(to_download)}")
download_zip(to_download, "images/Stash")
images_cache.update(os.listdir("images/Stash"))
print("Linking images to the group directory...")
for i, post_id in enumerate(post_ids):
stash_path = f"images/Stash/{post_id}.png"
dest_path = f"images/{group_name}/{i}_{post_id}.png"
if os.path.exists(stash_path) and not os.path.exists(dest_path):
os.link(stash_path, dest_path)
if not os.listdir(f'images/{group_name}'):
os.rmdir(f"images/{group_name}")
conn.close()
|