q6 commited on
Commit
b3b97aa
·
1 Parent(s): 45d40ce
Files changed (3) hide show
  1. API/app.py +28 -2
  2. Client/hunt.py +16 -12
  3. Dockerfile +1 -1
API/app.py CHANGED
@@ -6,12 +6,19 @@ import time
6
  import uvicorn
7
  from dotenv import load_dotenv
8
  import os
 
9
  from pydantic import BaseModel
10
- from typing import List
11
 
 
12
  class pixifModel(BaseModel):
13
  post_ids: List[int]
14
 
 
 
 
 
 
15
  env_path = os.path.dirname(os.path.realpath(__file__)) + "/../.env"
16
  # config = dotenv_values(env_path)
17
  # PHPSESSID = config.get("PHPSESSID")
@@ -206,9 +213,28 @@ async def pixif(
206
  tasks = [process_post(post_id, session, semaphore) for post_id in post_ids]
207
  results = await asyncio.gather(*tasks)
208
 
209
- image_exifs = {post_id: image_url for post_id, image_url in results if image_url}
210
  return image_exifs
211
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
212
  @app.get("/")
213
  async def read_root():
214
  return {"message": "Hello, World!"}
 
6
  import uvicorn
7
  from dotenv import load_dotenv
8
  import os
9
+ from aiofiles import open as aio_open
10
  from pydantic import BaseModel
11
+ from typing import List, Dict
12
 
13
+ img_base = 'https://i.pximg.net/img-original/'
14
  class pixifModel(BaseModel):
15
  post_ids: List[int]
16
 
17
+ class PixifDownloadModel(BaseModel):
18
+ posts: Dict[str, str]
19
+
20
+ os.makedirs('Stash', exist_ok=True)
21
+
22
  env_path = os.path.dirname(os.path.realpath(__file__)) + "/../.env"
23
  # config = dotenv_values(env_path)
24
  # PHPSESSID = config.get("PHPSESSID")
 
213
  tasks = [process_post(post_id, session, semaphore) for post_id in post_ids]
214
  results = await asyncio.gather(*tasks)
215
 
216
+ image_exifs = {post_id: image_url.replace('https://i.pximg.net/img-original/', '', 1) for post_id, image_url in results if image_url}
217
  return image_exifs
218
 
219
+ async def download_image(session, post_id, post_url):
220
+ url = f"{img_base}{post_url}"
221
+ async with session.get(url) as response:
222
+ content = await response.read()
223
+ async with aio_open(f"Stash/{post_id}.png", "wb") as f:
224
+ await f.write(content)
225
+
226
+
227
+ @app.post("download")
228
+ async def download(
229
+ items: PixifDownloadModel
230
+ ):
231
+ posts = items.posts
232
+
233
+ async with aiohttp.ClientSession(cookies=cookies, headers=headers) as session:
234
+ tasks = [download_image(session, post_id, image_url) for post_id, image_url in posts.items()]
235
+ await asyncio.gather(*tasks)
236
+ print(os.listdir("Stash"))
237
+
238
  @app.get("/")
239
  async def read_root():
240
  return {"message": "Hello, World!"}
Client/hunt.py CHANGED
@@ -2,11 +2,12 @@ import os
2
  import lmdb
3
  import requests
4
 
 
5
 
6
  os.chdir(os.path.dirname(os.path.abspath(__file__)))
7
  os.makedirs("images/Stash", exist_ok=True)
8
 
9
- db = lmdb.open("db", subdir=True, map_size=10485760)
10
  images_cache = os.listdir("images/Stash")
11
  valid = [f for f in os.listdir() if f.endswith(".txt")]
12
 
@@ -24,21 +25,24 @@ for inp in inputs:
24
  start, end = map(int, inp.split("-"))
25
  indexs.extend(range(start - 1, end))
26
 
27
- for index in indexs:
28
- with open(valid[index], "r") as f:
29
- post_ids = [x for x in f.read().split("\n") if x]
30
 
31
- with db.begin(write=False) as txn:
32
- post_ids = [post_id for post_id in post_ids if not txn.get(post_id.encode())]
33
 
34
- if not post_ids:
35
- continue
36
- data = requests.post('https://q6-p.hf.space/pixif', json={"post_ids": post_ids}).json()
37
 
38
- with db.begin(write=True) as txn:
39
  for post_id, url in data.items():
40
  txn.put(post_id.encode(), url.encode())
41
 
42
- missing_post_ids = set(post_ids) - set(data.keys())
43
  for post_id in missing_post_ids:
44
- txn.put(post_id.encode(), b'')
 
 
 
 
 
 
2
  import lmdb
3
  import requests
4
 
5
+ img_base = 'https://i.pximg.net/img-original/'
6
 
7
  os.chdir(os.path.dirname(os.path.abspath(__file__)))
8
  os.makedirs("images/Stash", exist_ok=True)
9
 
10
+ db = lmdb.open("db", subdir=True, map_size=1048576)
11
  images_cache = os.listdir("images/Stash")
12
  valid = [f for f in os.listdir() if f.endswith(".txt")]
13
 
 
25
  start, end = map(int, inp.split("-"))
26
  indexs.extend(range(start - 1, end))
27
 
28
+ with db.begin(write=True) as txn:
29
+ for index in indexs:
30
+ group_name = valid[index].rsplit(".", 1)[0]
31
 
32
+ with open(valid[index], "r") as f:
33
+ post_ids = [x for x in f.read().split("\n") if x]
34
 
35
+ filtered_post_ids = [post_id for post_id in post_ids if not txn.get(post_id.encode())]
36
+ data = requests.post('https://q6-p.hf.space/pixif', json={"post_ids": filtered_post_ids}).json()
 
37
 
 
38
  for post_id, url in data.items():
39
  txn.put(post_id.encode(), url.encode())
40
 
41
+ missing_post_ids = set(filtered_post_ids) - set(data.keys())
42
  for post_id in missing_post_ids:
43
+ txn.put(post_id.encode(), b'\x00')
44
+
45
+ data.update({post_id: txn.get(post_id.encode()).decode() for post_id in set(post_ids) - missing_post_ids if txn.get(post_id.encode()) != b'\x00'})
46
+
47
+ print(data)
48
+ images_zip = requests.post('https://q6-p.hf.space/download', json={'posts': data}).json()
Dockerfile CHANGED
@@ -8,7 +8,7 @@ ENV PYTHONDONTWRITEBYTECODE=1
8
  ENV PYTHONUNBUFFERED=1
9
  WORKDIR /app
10
 
11
- RUN pip install --no-cache-dir fastapi aiohttp uvicorn python-dotenv pydantic requests
12
 
13
  COPY --chown=user ./API /app
14
  CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
 
8
  ENV PYTHONUNBUFFERED=1
9
  WORKDIR /app
10
 
11
+ RUN pip install --no-cache-dir fastapi aiohttp uvicorn python-dotenv pydantic requests aiofiles
12
 
13
  COPY --chown=user ./API /app
14
  CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]