Spaces:

q6
/

p

Running

File size: 5,442 Bytes

import os
import sqlite3
import requests
import tempfile
import zipfile
from tqdm import tqdm

local = 0
endpoint = "http://127.0.0.1:7860" if local else "https://q6-p.hf.space"

def read_dotenv_value(path, key):
    try:
        with open(path, "r") as env_file:
            for line in env_file:
                line = line.strip()
                if not line or line.startswith("#") or "=" not in line:
                    continue
                k, v = line.split("=", 1)
                if k == key:
                    return v
    except FileNotFoundError:
        return None
    return None

def get_phpsessid():
    phpsessid = os.getenv("PHPSESSID")
    if phpsessid:
        return phpsessid
    env_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".env"))
    phpsessid = read_dotenv_value(env_path, "PHPSESSID")
    if phpsessid:
        return phpsessid
    raise RuntimeError("PHPSESSID is not set in the environment or .env")

phpsessid = get_phpsessid()

os.chdir(os.path.dirname(os.path.abspath(__file__)))
os.makedirs("images/Stash", exist_ok=True)

images_cache = set(os.listdir("images/Stash"))

DB_PATH = "db.sqlite"

def open_db(path):
    conn = sqlite3.connect(path)
    conn.execute(
        """
        CREATE TABLE IF NOT EXISTS pixif_cache (
            post_id TEXT PRIMARY KEY,
            url TEXT
        )
        """
    )
    conn.commit()
    return conn

def chunked(seq, size):
    for i in range(0, len(seq), size):
        yield seq[i:i + size]

def fetch_cached_urls(conn, post_ids):
    post_ids_dict = {post_id: None for post_id in post_ids}
    if not post_ids:
        return post_ids_dict

    for chunk in chunked(post_ids, 900):
        placeholders = ",".join("?" for _ in chunk)
        query = f"SELECT post_id, COALESCE(url, '') FROM pixif_cache WHERE post_id IN ({placeholders})"
        for post_id, url in conn.execute(query, chunk):
            post_ids_dict[post_id] = url

    return post_ids_dict

def upsert_urls(conn, rows):
    if not rows:
        return
    conn.executemany(
        """
        INSERT INTO pixif_cache (post_id, url)
        VALUES (?, ?)
        ON CONFLICT(post_id) DO UPDATE SET url = excluded.url
        """,
        rows,
    )

conn = open_db(DB_PATH)

valid = [f for f in os.listdir() if f.endswith(".txt")]
for idx, file in enumerate(valid):
    print(f"{idx + 1}: {file}")

inputs = input("Enter the index of the file: ").split()
indexs = []
for inp in inputs:
    if "-" in inp:
        start, end = map(int, inp.split("-"))
        indexs.extend(range(start - 1, end))
    elif inp.isdigit():
        indexs.append(int(inp) - 1)

def download_zip(to_download, dest_dir):
    response = requests.post(f"{endpoint}/pixif_zip", json={"d": to_download}, stream=True)
    response.raise_for_status()
    total = int(response.headers.get("Content-Length", 0))

    with tempfile.NamedTemporaryFile(delete=False, suffix=".zip") as tmp_file:
        tmp_path = tmp_file.name
        if total > 0:
            with tqdm(total=total, unit="B", unit_scale=True, desc="Downloading zip") as pbar:
                for chunk in response.iter_content(chunk_size=1024 * 1024):
                    if chunk:
                        tmp_file.write(chunk)
                        pbar.update(len(chunk))
        else:
            for chunk in response.iter_content(chunk_size=1024 * 1024):
                if chunk:
                    tmp_file.write(chunk)

    with zipfile.ZipFile(tmp_path, "r") as zf:
        zf.extractall(dest_dir)

    os.remove(tmp_path)

def decode_if_binary(val):
    if type(val) is bytes:
        return val.decode()
    return val

for index in indexs:
    group_name = valid[index].rsplit(".", 1)[0]
    os.makedirs(f"images/{group_name}", exist_ok=True)
    with open(valid[index], "r") as f:
        post_ids = f.read().split()

    post_ids_dict = fetch_cached_urls(conn, post_ids)
    filtered = [
        post_id
        for post_id in post_ids
        if post_ids_dict[post_id] is None and f"{post_id}.png" not in images_cache
    ]
    print(f"Group: {group_name}\nFiltered: {len(filtered)}/{len(post_ids)}")

    if filtered:
        data = requests.post(f'{endpoint}/pixif', json={"post_ids": filtered, "phpsessid": phpsessid})
        data = data.json()
        rows = [(post_id, url) for post_id, url in data.items()]
        no_exif = set(filtered) - set(data.keys())
        rows.extend((post_id, "") for post_id in no_exif)

        with conn:
            upsert_urls(conn, rows)

        for post_id, url in data.items():
            post_ids_dict[post_id] = url
        for post_id in no_exif:
            post_ids_dict[post_id] = ""

    to_download = {post_id: decode_if_binary(url) for post_id, url in post_ids_dict.items() if url and f"{post_id}.png" not in images_cache}
    
    if to_download:
        print(f"Total images to download: {len(to_download)}")
        download_zip(to_download, "images/Stash")

    images_cache.update(os.listdir("images/Stash"))

    print("Linking images to the group directory...")
    for i, post_id in enumerate(post_ids):
        stash_path = f"images/Stash/{post_id}.png"
        dest_path = f"images/{group_name}/{i}_{post_id}.png"

        if os.path.exists(stash_path) and not os.path.exists(dest_path):
            os.link(stash_path, dest_path)

    if not os.listdir(f'images/{group_name}'):
        os.rmdir(f"images/{group_name}")

conn.close()