q6 commited on
Commit
b6cf1cb
·
1 Parent(s): fa446dd
Files changed (4) hide show
  1. .gitignore +2 -1
  2. API/app.py +1 -0
  3. Client/clear_db.py +10 -0
  4. Client/hunt.py +24 -4
.gitignore CHANGED
@@ -1,3 +1,4 @@
1
  .env
2
  *.txt
3
- db
 
 
1
  .env
2
  *.txt
3
+ db
4
+ images
API/app.py CHANGED
@@ -244,6 +244,7 @@ async def download(
244
  zipf.write(f"Stash/{post_id}.png", post_id + ".png")
245
 
246
  return FileResponse(path=f"Stash/{timezip}.zip", media_type="application/zip", filename=f"{timezip}.zip")
 
247
  @app.get("/")
248
  async def read_root():
249
  return {"message": "Hello, World!"}
 
244
  zipf.write(f"Stash/{post_id}.png", post_id + ".png")
245
 
246
  return FileResponse(path=f"Stash/{timezip}.zip", media_type="application/zip", filename=f"{timezip}.zip")
247
+
248
  @app.get("/")
249
  async def read_root():
250
  return {"message": "Hello, World!"}
Client/clear_db.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import shutil
3
+
4
+ os.chdir(os.path.dirname(os.path.abspath(__file__)))
5
+
6
+ shutil.rmtree("images", ignore_errors=True)
7
+ shutil.rmtree("db", ignore_errors=True)
8
+ for file in os.listdir():
9
+ if file.endswith(".txt"):
10
+ os.remove(file)
Client/hunt.py CHANGED
@@ -1,6 +1,8 @@
1
  import os
2
  import lmdb
3
  import requests
 
 
4
 
5
  img_base = 'https://i.pximg.net/img-original/'
6
 
@@ -29,6 +31,9 @@ with db.begin(write=True) as txn:
29
  for index in indexs:
30
  group_name = valid[index].rsplit(".", 1)[0]
31
 
 
 
 
32
  with open(valid[index], "r") as f:
33
  post_ids = [x for x in f.read().split("\n") if x]
34
 
@@ -42,7 +47,22 @@ with db.begin(write=True) as txn:
42
  for post_id in missing_post_ids:
43
  txn.put(post_id.encode(), b'\x00')
44
 
45
- data.update({post_id: txn.get(post_id.encode()).decode() for post_id in set(post_ids) - missing_post_ids if txn.get(post_id.encode()) != b'\x00'})
46
-
47
- print(data)
48
- images_zip = requests.post('https://q6-p.hf.space/download', json={'posts': data}).json()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
  import lmdb
3
  import requests
4
+ import io
5
+ from zipfile import ZipFile
6
 
7
  img_base = 'https://i.pximg.net/img-original/'
8
 
 
31
  for index in indexs:
32
  group_name = valid[index].rsplit(".", 1)[0]
33
 
34
+ os.makedirs(f"images/{group_name}", exist_ok=True)
35
+ group_cache = os.listdir(f"images/{group_name}")
36
+
37
  with open(valid[index], "r") as f:
38
  post_ids = [x for x in f.read().split("\n") if x]
39
 
 
47
  for post_id in missing_post_ids:
48
  txn.put(post_id.encode(), b'\x00')
49
 
50
+ to_download = {}
51
+ for post_id in set(post_ids):
52
+ if f"{post_id}.png" in images_cache:
53
+ continue
54
+ url = txn.get(post_id.encode())
55
+ if url != b'\x00':
56
+ to_download[post_id] = url.decode()
57
+
58
+ images_zip = requests.post('https://q6-p.hf.space/download', json={'posts': data})
59
+
60
+ with io.BytesIO(images_zip.content) as f:
61
+ with ZipFile(f) as z:
62
+ z.extractall("images/Stash")
63
+
64
+ for i, post_id in enumerate(post_ids):
65
+ print(post_id)
66
+ if f"{post_id}.png" in images_cache and f"{i}_{post_id}.png" not in group_cache:
67
+ os.link(f"images/Stash/{post_id}.png", f"images/{group_name}/{i}_{post_id}.png")
68
+ continue