q6 commited on
Commit
950bead
·
1 Parent(s): af5f76e
Files changed (2) hide show
  1. Client/hunt.py +31 -39
  2. build_docker.bat +1 -0
Client/hunt.py CHANGED
@@ -34,56 +34,48 @@ for inp in inputs:
34
 
35
  blacklist = ['\0', None]
36
 
37
- group_to_post_ids = {}
38
- post_id_to_groups = {}
39
- all_post_ids = set()
40
-
41
  for index in indexs:
 
42
  group_name = valid[index].rsplit(".", 1)[0]
43
  os.makedirs(f"images/{group_name}", exist_ok=True)
44
  with open(valid[index], "r") as f:
45
  post_ids = [x for x in f.read().split("\n") if x]
46
- group_to_post_ids[group_name] = post_ids
47
- for post_id in post_ids:
48
- all_post_ids.add(post_id)
49
- if post_id not in post_id_to_groups:
50
- post_id_to_groups[post_id] = []
51
- post_id_to_groups[post_id].append(group_name)
52
-
53
- images_cache = os.listdir("images/Stash")
54
-
55
- with db.begin(write=True) as txn:
56
- post_ids_db = {post_id: txn.get(post_id.encode()) for post_id in all_post_ids}
57
- post_ids_db = {post_id: url.decode() if url else None for post_id, url in post_ids_db.items()}
58
- filtered = [post_id for post_id, url in post_ids_db.items() if url == None and f"{post_id}.png" not in images_cache]
59
- print(f"Filtered: {len(filtered)}/{len(post_ids_db)}")
60
- if filtered:
61
- data = requests.post(f'{endpoint}/pixif', json={"post_ids": filtered}).json()
62
- for post_id, url in data.items():
63
- txn.put(post_id.encode(), url.encode())
64
- post_ids_db[post_id] = url
65
- no_exif = set(filtered) - set(data.keys())
66
- for post_id in no_exif:
67
- txn.put(post_id.encode(), b'\0')
68
-
69
- to_download = {post_id: url for post_id, url in post_ids_db.items() if url not in blacklist and f"{post_id}.png" not in images_cache}
70
  print(f"Downloading {len(to_download)} images...")
71
  if to_download:
72
  images_zip = requests.post(f'{endpoint}/download', json={'posts': to_download}, stream=True)
 
73
  print("Extracting images...")
74
  with io.BytesIO(images_zip.content) as f:
75
  with zipfile.ZipFile(f, mode='r') as zipf:
76
  zipf.extractall("images/Stash")
 
77
 
78
- images_cache = os.listdir("images/Stash")
79
- print("Linking images...")
80
-
81
- for group_name, post_ids in group_to_post_ids.items():
82
- group_folder = f"images/{group_name}"
83
- for i, post_id in enumerate(post_ids):
84
  if f"{post_id}.png" in images_cache:
85
- dest_path = f"{group_folder}/{i}_{post_id}.png"
86
- if not os.path.exists(dest_path):
87
- os.link(f"images/Stash/{post_id}.png", dest_path)
88
- if len(os.listdir(group_folder)) == 0:
89
- os.rmdir(group_folder)
 
34
 
35
  blacklist = ['\0', None]
36
 
 
 
 
 
37
  for index in indexs:
38
+ images_cache = os.listdir("images/Stash")
39
  group_name = valid[index].rsplit(".", 1)[0]
40
  os.makedirs(f"images/{group_name}", exist_ok=True)
41
  with open(valid[index], "r") as f:
42
  post_ids = [x for x in f.read().split("\n") if x]
43
+
44
+ with db.begin(write=True) as txn:
45
+ post_ids = {post_id: txn.get(post_id.encode()) for post_id in post_ids}
46
+ post_ids = {post_id: url.decode() if url else None for post_id, url in post_ids.items()}
47
+ filtered = [post_id for post_id, url in post_ids.items() if url == None and f"{post_id}.png" not in images_cache]
48
+
49
+ print(f"Group: {group_name}\nFiltered: {len(filtered)}/{len(post_ids)}")
50
+ if filtered:
51
+ data = requests.post(f'{endpoint}/pixif', json={"post_ids": filtered}).json()
52
+
53
+ for post_id, url in data.items():
54
+ txn.put(post_id.encode(), url.encode())
55
+ post_ids[post_id] = url
56
+
57
+ no_exif = set(filtered) - set(data.keys())
58
+
59
+ for post_id in no_exif:
60
+ txn.put(post_id.encode(), b'\0')
61
+
62
+ to_download = {post_id: url for post_id, url in post_ids.items() if url not in blacklist and f"{post_id}.png" not in images_cache}
 
 
 
 
63
  print(f"Downloading {len(to_download)} images...")
64
  if to_download:
65
  images_zip = requests.post(f'{endpoint}/download', json={'posts': to_download}, stream=True)
66
+
67
  print("Extracting images...")
68
  with io.BytesIO(images_zip.content) as f:
69
  with zipfile.ZipFile(f, mode='r') as zipf:
70
  zipf.extractall("images/Stash")
71
+
72
 
73
+ images_cache = os.listdir("images/Stash")
74
+ print("Linking images...")
75
+ for i, post_id in enumerate(post_ids.keys()):
 
 
 
76
  if f"{post_id}.png" in images_cache:
77
+ if not os.path.exists(f"images/{group_name}/{i}_{post_id}.png"):
78
+ os.link(f"images/Stash/{post_id}.png", f"images/{group_name}/{i}_{post_id}.png")
79
+
80
+ if len(os.listdir(f'images/{group_name}')) == 0:
81
+ os.rmdir(f"images/{group_name}")
build_docker.bat ADDED
@@ -0,0 +1 @@
 
 
1
+ docker build -t pixif .