Spaces:

q6
/

p

Paused

App Files Files Community

q6 commited on Dec 5, 2024

Commit

0820d3a

1 Parent(s): e948e33

X

Browse files

Files changed (4) hide show

Client/Extract Pixiv/ai_search.py +1 -1
Client/clear_db.py +3 -3
Client/hunt.py +40 -33
Client/{test2.py → t2.py} +4 -7

Client/Extract Pixiv/ai_search.py CHANGED Viewed

@@ -11,7 +11,7 @@ os.chdir(os.path.dirname(os.path.abspath(__file__)))
 input_url = input("Enter the URL: ")
-pages = 120 // 60
 params = {
     'raw': input_url,

 input_url = input("Enter the URL: ")
+pages = 60 // 60
 params = {
     'raw': input_url,

Client/clear_db.py CHANGED Viewed

@@ -5,6 +5,6 @@ os.chdir(os.path.dirname(os.path.abspath(__file__)))
 shutil.rmtree("images", ignore_errors=True)
 shutil.rmtree("db", ignore_errors=True)
-for file in os.listdir():
-    if file.endswith(".txt"):
-        os.rename(file, f"txt logs/{file}")

 shutil.rmtree("images", ignore_errors=True)
 shutil.rmtree("db", ignore_errors=True)
+# for file in os.listdir():
+#     if file.endswith(".txt"):
+#         os.rename(file, f"txt logs/{file}")

Client/hunt.py CHANGED Viewed

@@ -27,42 +27,49 @@ for inp in inputs:
         start, end = map(int, inp.split("-"))
         indexs.extend(range(start - 1, end))
 for index in indexs:
     group_name = valid[index].rsplit(".", 1)[0]
-    group_cache = os.listdir(f"images/{group_name}")
     with open(valid[index], "r") as f:
         post_ids = [x for x in f.read().split("\n") if x]
     with db.begin(write=True) as txn:
-        filtered_post_ids = [post_id for post_id in post_ids if txn.get(post_id.encode()) == None]
-        data = requests.post('https://q6-p.hf.space/pixif', json={"post_ids": filtered_post_ids}).json()
-        for post_id, url in data.items():
-            txn.put(post_id.encode(), url.encode())
-        missing_post_ids = set(filtered_post_ids) - set(data.keys())
-        for post_id in missing_post_ids:
-            txn.put(post_id.encode(), b'\x00')
-        to_download = {}
-        for post_id in set(post_ids):
-            if f"{post_id}.png" in images_cache:
-                continue
-            url = txn.get(post_id.encode())
-            if url != b'\x00':
-                to_download[post_id] = url.decode()
-    images_zip = requests.post('https://q6-p.hf.space/download', json={'posts': data})
-    with io.BytesIO(images_zip.content) as f:
-        with ZipFile(f) as z:
-            z.extractall("images/Stash")
-    for i, post_id in enumerate(post_ids):
-        if f"{post_id}.png" in images_cache and f"{i}_{post_id}.png" not in group_cache:
-            os.makedirs(f"images/{group_name}", exist_ok=True)
-            os.link(f"images/Stash/{post_id}.png", f"images/{group_name}/{i}_{post_id}.png")
-            continue

         start, end = map(int, inp.split("-"))
         indexs.extend(range(start - 1, end))
+blacklist = ['\x00', None]
+extracted_files = os.listdir("images/Stash")
 for index in indexs:
     group_name = valid[index].rsplit(".", 1)[0]
+    os.makedirs(f"images/{group_name}", exist_ok=True)
     with open(valid[index], "r") as f:
         post_ids = [x for x in f.read().split("\n") if x]
     with db.begin(write=True) as txn:
+        post_ids = {post_id: txn.get(post_id.encode()) for post_id in post_ids}
+        post_ids = {post_id: url.decode() if url else None for post_id, url in post_ids.items()}
+        filtered = [post_id for post_id, url in post_ids.items() if url == None and f"{post_id}.png" not in images_cache]
+        print(f"Group: {group_name}\nFiltered: {len(filtered)}/{len(post_ids)}")
+        if filtered:
+            data = requests.post('https://q6-p.hf.space/pixif', json={"post_ids": filtered}).json()
+            for post_id, url in data.items():
+                txn.put(post_id.encode(), url.encode())
+                post_ids[post_id] = url
+            no_exif = set(filtered) - set(data.keys())
+            for post_id in no_exif:
+                txn.put(post_id.encode(), b'\x00')
+    to_download = {post_id: url for post_id, url in post_ids.items() if url not in blacklist and f"{post_id}.png" not in images_cache}
+    print(f"Downloading {len(to_download)} images...")
+    if to_download:
+        images_zip = requests.post('https://q6-p.hf.space/download', json={'posts': to_download})
+        with io.BytesIO(images_zip.content) as f:
+            with ZipFile(f) as z:
+                z.extractall("images/Stash")
+                extracted_files.extend(z.namelist())
+    print("Moving images...")
+    for i, post_id in enumerate(post_ids.keys()):
+        print(f"Moving1 {post_id}.png")
+        if f"{post_id}.png" in images_cache:
+            print(f"Moving2 {post_id}.png")
+            if not os.path.exists(f"images/{group_name}/{i}_{post_id}.png"):
+                os.link(f"images/Stash/{post_id}.png", f"images/{group_name}/{i}_{post_id}.png")
+    if len(os.listdir(f'images/{group_name}')) == 0:
+        os.rmdir(f"images/{group_name}")

Client/{test2.py → t2.py} RENAMED Viewed

@@ -1,15 +1,12 @@
 import os
 import lmdb
-import requests
-import io
-from zipfile import ZipFile
-img_base = 'https://i.pximg.net/img-original/'
 os.chdir(os.path.dirname(os.path.abspath(__file__)))
-os.makedirs("images/Stash", exist_ok=True)
 db = lmdb.open("db", subdir=True, map_size=1048576)
-with db.begin(write=True) as txn:
-    print(txn.get(b"test") == None)

 import os
 import lmdb
 os.chdir(os.path.dirname(os.path.abspath(__file__)))
 db = lmdb.open("db", subdir=True, map_size=1048576)
+# view all in db
+with db.begin() as txn:
+    for key, value in txn.cursor():
+        print(key.decode(), value.decode())