q6 commited on
Commit
0d0771b
·
1 Parent(s): 92ca3c6

More efficient filtered

Browse files
Files changed (3) hide show
  1. Client/hunt.py +4 -8
  2. Client/show_all.py +2 -1
  3. Client/t.py +12 -0
Client/hunt.py CHANGED
@@ -1,7 +1,6 @@
1
  import os
2
  import lmdb
3
  import requests
4
- import io
5
  from concurrent.futures import ThreadPoolExecutor
6
 
7
  local = 0
@@ -34,8 +33,6 @@ for inp in inputs:
34
  start, end = map(int, inp.split("-"))
35
  indexs.extend(range(start - 1, end))
36
 
37
- blacklist = ['\0', None]
38
-
39
  def download_image(args):
40
  post_id, url = args
41
  full_url = img_base + url
@@ -55,10 +52,9 @@ for index in indexs:
55
 
56
  with db.begin(write=True) as txn:
57
  post_ids_dict = {post_id: txn.get(post_id.encode()) for post_id in post_ids}
58
- post_ids_dict = {post_id: url.decode() if url else None for post_id, url in post_ids_dict.items()}
59
-
60
- filtered = [post_id for post_id, url in post_ids_dict.items() if url is None and f"{post_id}.png" not in images_cache]
61
 
 
62
  print(f"Group: {group_name}\nFiltered: {len(filtered)}/{len(post_ids_dict)}")
63
  if filtered:
64
  data = requests.post(f'{endpoint}/pixif', json={"post_ids": filtered}).json()
@@ -69,10 +65,10 @@ for index in indexs:
69
 
70
  no_exif = set(filtered) - set(data.keys())
71
  for post_id in no_exif:
72
- txn.put(post_id.encode(), b'\0')
73
 
74
  to_download = {post_id: url for post_id, url in post_ids_dict.items()
75
- if url not in blacklist and f"{post_id}.png" not in images_cache}
76
 
77
  print(f"Total images to download: {len(to_download)}")
78
 
 
1
  import os
2
  import lmdb
3
  import requests
 
4
  from concurrent.futures import ThreadPoolExecutor
5
 
6
  local = 0
 
33
  start, end = map(int, inp.split("-"))
34
  indexs.extend(range(start - 1, end))
35
 
 
 
36
  def download_image(args):
37
  post_id, url = args
38
  full_url = img_base + url
 
52
 
53
  with db.begin(write=True) as txn:
54
  post_ids_dict = {post_id: txn.get(post_id.encode()) for post_id in post_ids}
55
+ post_ids_dict = {post_id: url.decode() if url != None else None for post_id, url in post_ids_dict.items()}
 
 
56
 
57
+ filtered = [post_id for post_id, url in post_ids_dict.items() if post_ids_dict[post_id] != '' and f"{post_id}.png" not in images_cache]
58
  print(f"Group: {group_name}\nFiltered: {len(filtered)}/{len(post_ids_dict)}")
59
  if filtered:
60
  data = requests.post(f'{endpoint}/pixif', json={"post_ids": filtered}).json()
 
65
 
66
  no_exif = set(filtered) - set(data.keys())
67
  for post_id in no_exif:
68
+ txn.put(post_id.encode(), b'')
69
 
70
  to_download = {post_id: url for post_id, url in post_ids_dict.items()
71
+ if url and f"{post_id}.png" not in images_cache}
72
 
73
  print(f"Total images to download: {len(to_download)}")
74
 
Client/show_all.py CHANGED
@@ -3,7 +3,8 @@ import lmdb
3
 
4
  os.chdir(os.path.dirname(os.path.abspath(__file__)))
5
 
6
- db = lmdb.open("db", subdir=True, map_size=524288)
 
7
 
8
  items = []
9
 
 
3
 
4
  os.chdir(os.path.dirname(os.path.abspath(__file__)))
5
 
6
+ db = lmdb.open("db", subdir=True, map_size=1048576)
7
+
8
 
9
  items = []
10
 
Client/t.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import lmdb
2
+ import os
3
+
4
+ os.chdir(os.path.dirname(os.path.abspath(__file__)))
5
+
6
+ db = lmdb.open("db", subdir=True, map_size=1048576)
7
+
8
+ # print all
9
+ with db.begin(write=True) as txn:
10
+ cursor = txn.cursor()
11
+ print(txn.get(b'124922612').decode() == '')
12
+ cursor.close()