q6 commited on
Commit
af5f76e
·
1 Parent(s): cede408
Files changed (1) hide show
  1. Client/hunt.py +39 -31
Client/hunt.py CHANGED
@@ -34,48 +34,56 @@ for inp in inputs:
34
 
35
  blacklist = ['\0', None]
36
 
 
 
 
 
37
  for index in indexs:
38
- images_cache = os.listdir("images/Stash")
39
  group_name = valid[index].rsplit(".", 1)[0]
40
  os.makedirs(f"images/{group_name}", exist_ok=True)
41
  with open(valid[index], "r") as f:
42
  post_ids = [x for x in f.read().split("\n") if x]
43
-
44
- with db.begin(write=True) as txn:
45
- post_ids = {post_id: txn.get(post_id.encode()) for post_id in post_ids}
46
- post_ids = {post_id: url.decode() if url else None for post_id, url in post_ids.items()}
47
- filtered = [post_id for post_id, url in post_ids.items() if url == None and f"{post_id}.png" not in images_cache]
48
-
49
- print(f"Group: {group_name}\nFiltered: {len(filtered)}/{len(post_ids)}")
50
- if filtered:
51
- data = requests.post(f'{endpoint}/pixif', json={"post_ids": filtered}).json()
52
-
53
- for post_id, url in data.items():
54
- txn.put(post_id.encode(), url.encode())
55
- post_ids[post_id] = url
56
-
57
- no_exif = set(filtered) - set(data.keys())
58
-
59
- for post_id in no_exif:
60
- txn.put(post_id.encode(), b'\0')
61
-
62
- to_download = {post_id: url for post_id, url in post_ids.items() if url not in blacklist and f"{post_id}.png" not in images_cache}
 
 
 
 
63
  print(f"Downloading {len(to_download)} images...")
64
  if to_download:
65
  images_zip = requests.post(f'{endpoint}/download', json={'posts': to_download}, stream=True)
66
-
67
  print("Extracting images...")
68
  with io.BytesIO(images_zip.content) as f:
69
  with zipfile.ZipFile(f, mode='r') as zipf:
70
  zipf.extractall("images/Stash")
71
-
72
 
73
- images_cache = os.listdir("images/Stash")
74
- print("Linking images...")
75
- for i, post_id in enumerate(post_ids.keys()):
76
- if f"{post_id}.png" in images_cache:
77
- if not os.path.exists(f"images/{group_name}/{i}_{post_id}.png"):
78
- os.link(f"images/Stash/{post_id}.png", f"images/{group_name}/{i}_{post_id}.png")
79
 
80
- if len(os.listdir(f'images/{group_name}')) == 0:
81
- os.rmdir(f"images/{group_name}")
 
 
 
 
 
 
 
 
34
 
35
  blacklist = ['\0', None]
36
 
37
+ group_to_post_ids = {}
38
+ post_id_to_groups = {}
39
+ all_post_ids = set()
40
+
41
  for index in indexs:
 
42
  group_name = valid[index].rsplit(".", 1)[0]
43
  os.makedirs(f"images/{group_name}", exist_ok=True)
44
  with open(valid[index], "r") as f:
45
  post_ids = [x for x in f.read().split("\n") if x]
46
+ group_to_post_ids[group_name] = post_ids
47
+ for post_id in post_ids:
48
+ all_post_ids.add(post_id)
49
+ if post_id not in post_id_to_groups:
50
+ post_id_to_groups[post_id] = []
51
+ post_id_to_groups[post_id].append(group_name)
52
+
53
+ images_cache = os.listdir("images/Stash")
54
+
55
+ with db.begin(write=True) as txn:
56
+ post_ids_db = {post_id: txn.get(post_id.encode()) for post_id in all_post_ids}
57
+ post_ids_db = {post_id: url.decode() if url else None for post_id, url in post_ids_db.items()}
58
+ filtered = [post_id for post_id, url in post_ids_db.items() if url == None and f"{post_id}.png" not in images_cache]
59
+ print(f"Filtered: {len(filtered)}/{len(post_ids_db)}")
60
+ if filtered:
61
+ data = requests.post(f'{endpoint}/pixif', json={"post_ids": filtered}).json()
62
+ for post_id, url in data.items():
63
+ txn.put(post_id.encode(), url.encode())
64
+ post_ids_db[post_id] = url
65
+ no_exif = set(filtered) - set(data.keys())
66
+ for post_id in no_exif:
67
+ txn.put(post_id.encode(), b'\0')
68
+
69
+ to_download = {post_id: url for post_id, url in post_ids_db.items() if url not in blacklist and f"{post_id}.png" not in images_cache}
70
  print(f"Downloading {len(to_download)} images...")
71
  if to_download:
72
  images_zip = requests.post(f'{endpoint}/download', json={'posts': to_download}, stream=True)
 
73
  print("Extracting images...")
74
  with io.BytesIO(images_zip.content) as f:
75
  with zipfile.ZipFile(f, mode='r') as zipf:
76
  zipf.extractall("images/Stash")
 
77
 
78
+ images_cache = os.listdir("images/Stash")
79
+ print("Linking images...")
 
 
 
 
80
 
81
+ for group_name, post_ids in group_to_post_ids.items():
82
+ group_folder = f"images/{group_name}"
83
+ for i, post_id in enumerate(post_ids):
84
+ if f"{post_id}.png" in images_cache:
85
+ dest_path = f"{group_folder}/{i}_{post_id}.png"
86
+ if not os.path.exists(dest_path):
87
+ os.link(f"images/Stash/{post_id}.png", dest_path)
88
+ if len(os.listdir(group_folder)) == 0:
89
+ os.rmdir(group_folder)