q6 commited on
Commit
0820d3a
·
1 Parent(s): e948e33
Client/Extract Pixiv/ai_search.py CHANGED
@@ -11,7 +11,7 @@ os.chdir(os.path.dirname(os.path.abspath(__file__)))
11
 
12
  input_url = input("Enter the URL: ")
13
 
14
- pages = 120 // 60
15
 
16
  params = {
17
  'raw': input_url,
 
11
 
12
  input_url = input("Enter the URL: ")
13
 
14
+ pages = 60 // 60
15
 
16
  params = {
17
  'raw': input_url,
Client/clear_db.py CHANGED
@@ -5,6 +5,6 @@ os.chdir(os.path.dirname(os.path.abspath(__file__)))
5
 
6
  shutil.rmtree("images", ignore_errors=True)
7
  shutil.rmtree("db", ignore_errors=True)
8
- for file in os.listdir():
9
- if file.endswith(".txt"):
10
- os.rename(file, f"txt logs/{file}")
 
5
 
6
  shutil.rmtree("images", ignore_errors=True)
7
  shutil.rmtree("db", ignore_errors=True)
8
+ # for file in os.listdir():
9
+ # if file.endswith(".txt"):
10
+ # os.rename(file, f"txt logs/{file}")
Client/hunt.py CHANGED
@@ -27,42 +27,49 @@ for inp in inputs:
27
  start, end = map(int, inp.split("-"))
28
  indexs.extend(range(start - 1, end))
29
 
 
 
30
  for index in indexs:
31
  group_name = valid[index].rsplit(".", 1)[0]
32
-
33
- group_cache = os.listdir(f"images/{group_name}")
34
-
35
  with open(valid[index], "r") as f:
36
  post_ids = [x for x in f.read().split("\n") if x]
37
 
38
  with db.begin(write=True) as txn:
39
- filtered_post_ids = [post_id for post_id in post_ids if txn.get(post_id.encode()) == None]
40
-
41
- data = requests.post('https://q6-p.hf.space/pixif', json={"post_ids": filtered_post_ids}).json()
42
-
43
- for post_id, url in data.items():
44
- txn.put(post_id.encode(), url.encode())
45
-
46
- missing_post_ids = set(filtered_post_ids) - set(data.keys())
47
- for post_id in missing_post_ids:
48
- txn.put(post_id.encode(), b'\x00')
49
-
50
- to_download = {}
51
- for post_id in set(post_ids):
52
- if f"{post_id}.png" in images_cache:
53
- continue
54
- url = txn.get(post_id.encode())
55
- if url != b'\x00':
56
- to_download[post_id] = url.decode()
57
-
58
- images_zip = requests.post('https://q6-p.hf.space/download', json={'posts': data})
59
-
60
- with io.BytesIO(images_zip.content) as f:
61
- with ZipFile(f) as z:
62
- z.extractall("images/Stash")
63
-
64
- for i, post_id in enumerate(post_ids):
65
- if f"{post_id}.png" in images_cache and f"{i}_{post_id}.png" not in group_cache:
66
- os.makedirs(f"images/{group_name}", exist_ok=True)
67
- os.link(f"images/Stash/{post_id}.png", f"images/{group_name}/{i}_{post_id}.png")
68
- continue
 
 
 
 
 
 
 
 
27
  start, end = map(int, inp.split("-"))
28
  indexs.extend(range(start - 1, end))
29
 
30
+ blacklist = ['\x00', None]
31
+ extracted_files = os.listdir("images/Stash")
32
  for index in indexs:
33
  group_name = valid[index].rsplit(".", 1)[0]
34
+ os.makedirs(f"images/{group_name}", exist_ok=True)
 
 
35
  with open(valid[index], "r") as f:
36
  post_ids = [x for x in f.read().split("\n") if x]
37
 
38
  with db.begin(write=True) as txn:
39
+ post_ids = {post_id: txn.get(post_id.encode()) for post_id in post_ids}
40
+ post_ids = {post_id: url.decode() if url else None for post_id, url in post_ids.items()}
41
+ filtered = [post_id for post_id, url in post_ids.items() if url == None and f"{post_id}.png" not in images_cache]
42
+
43
+ print(f"Group: {group_name}\nFiltered: {len(filtered)}/{len(post_ids)}")
44
+ if filtered:
45
+ data = requests.post('https://q6-p.hf.space/pixif', json={"post_ids": filtered}).json()
46
+
47
+ for post_id, url in data.items():
48
+ txn.put(post_id.encode(), url.encode())
49
+ post_ids[post_id] = url
50
+
51
+ no_exif = set(filtered) - set(data.keys())
52
+
53
+ for post_id in no_exif:
54
+ txn.put(post_id.encode(), b'\x00')
55
+
56
+ to_download = {post_id: url for post_id, url in post_ids.items() if url not in blacklist and f"{post_id}.png" not in images_cache}
57
+ print(f"Downloading {len(to_download)} images...")
58
+ if to_download:
59
+ images_zip = requests.post('https://q6-p.hf.space/download', json={'posts': to_download})
60
+
61
+ with io.BytesIO(images_zip.content) as f:
62
+ with ZipFile(f) as z:
63
+ z.extractall("images/Stash")
64
+ extracted_files.extend(z.namelist())
65
+
66
+ print("Moving images...")
67
+ for i, post_id in enumerate(post_ids.keys()):
68
+ print(f"Moving1 {post_id}.png")
69
+ if f"{post_id}.png" in images_cache:
70
+ print(f"Moving2 {post_id}.png")
71
+ if not os.path.exists(f"images/{group_name}/{i}_{post_id}.png"):
72
+ os.link(f"images/Stash/{post_id}.png", f"images/{group_name}/{i}_{post_id}.png")
73
+
74
+ if len(os.listdir(f'images/{group_name}')) == 0:
75
+ os.rmdir(f"images/{group_name}")
Client/{test2.py → t2.py} RENAMED
@@ -1,15 +1,12 @@
1
  import os
2
  import lmdb
3
- import requests
4
- import io
5
- from zipfile import ZipFile
6
 
7
- img_base = 'https://i.pximg.net/img-original/'
8
 
9
  os.chdir(os.path.dirname(os.path.abspath(__file__)))
10
- os.makedirs("images/Stash", exist_ok=True)
11
 
12
  db = lmdb.open("db", subdir=True, map_size=1048576)
13
 
14
- with db.begin(write=True) as txn:
15
- print(txn.get(b"test") == None)
 
 
 
1
  import os
2
  import lmdb
 
 
 
3
 
 
4
 
5
  os.chdir(os.path.dirname(os.path.abspath(__file__)))
 
6
 
7
  db = lmdb.open("db", subdir=True, map_size=1048576)
8
 
9
+ # view all in db
10
+ with db.begin() as txn:
11
+ for key, value in txn.cursor():
12
+ print(key.decode(), value.decode())