q6 commited on
Commit
d7d9d98
·
1 Parent(s): d639071
Files changed (2) hide show
  1. API/app.py +5 -6
  2. Client/Extract Pixiv/ai_search.py +6 -4
API/app.py CHANGED
@@ -6,6 +6,8 @@ import time
6
  import os
7
  from pydantic import BaseModel
8
  from typing import List, Dict
 
 
9
 
10
  img_base = 'https://i.pximg.net/img-original/img/'
11
 
@@ -17,6 +19,7 @@ class PixifDownloadModel(BaseModel):
17
 
18
  env_path = os.path.dirname(os.path.realpath(__file__)) + "/../.env"
19
 
 
20
  PHPSESSID = os.getenv("PHPSESSID")
21
 
22
  cookies = {"PHPSESSID": PHPSESSID}
@@ -44,11 +47,10 @@ async def search(raw, pages, ai_only=True, real_only=True, cookies=None, headers
44
 
45
  post_ids = []
46
  tasks = []
47
- prev_first_id = None
48
 
49
  async with aiohttp.ClientSession(cookies=cookies, headers=headers) as session:
50
  for page in range(1, pages + 1):
51
- page_url = f"{url}&p={page}"
52
  task = fetch_page(session, page_url)
53
  tasks.append(task)
54
 
@@ -67,10 +69,6 @@ async def search(raw, pages, ai_only=True, real_only=True, cookies=None, headers
67
  posts = data['body']['illustManga']['data']
68
  if not posts:
69
  break
70
- current_first_id = posts[0]['id']
71
- if prev_first_id and current_first_id == prev_first_id:
72
- break
73
- prev_first_id = current_first_id
74
  post_ids.extend([post['id'] for post in posts])
75
 
76
  return post_ids, requests.utils.unquote(keywords, encoding='utf-8')
@@ -95,6 +93,7 @@ async def search_endpoint(
95
  pages: int = Query(1, description="Number of pages to fetch."),
96
  ai_only: bool = Query(True, description="Filter for AI-generated content.")
97
  ):
 
98
  try:
99
  post_ids, keywords = await search(raw, pages, ai_only, cookies=cookies, headers=headers)
100
  return {"post_ids": post_ids, "filename": base26_time() + "_" + keywords}
 
6
  import os
7
  from pydantic import BaseModel
8
  from typing import List, Dict
9
+ from dotenv import load_dotenv
10
+
11
 
12
  img_base = 'https://i.pximg.net/img-original/img/'
13
 
 
19
 
20
  env_path = os.path.dirname(os.path.realpath(__file__)) + "/../.env"
21
 
22
+ load_dotenv(env_path)
23
  PHPSESSID = os.getenv("PHPSESSID")
24
 
25
  cookies = {"PHPSESSID": PHPSESSID}
 
47
 
48
  post_ids = []
49
  tasks = []
 
50
 
51
  async with aiohttp.ClientSession(cookies=cookies, headers=headers) as session:
52
  for page in range(1, pages + 1):
53
+ page_url = f"{url.strip()}&p={page}"
54
  task = fetch_page(session, page_url)
55
  tasks.append(task)
56
 
 
69
  posts = data['body']['illustManga']['data']
70
  if not posts:
71
  break
 
 
 
 
72
  post_ids.extend([post['id'] for post in posts])
73
 
74
  return post_ids, requests.utils.unquote(keywords, encoding='utf-8')
 
93
  pages: int = Query(1, description="Number of pages to fetch."),
94
  ai_only: bool = Query(True, description="Filter for AI-generated content.")
95
  ):
96
+ print(raw, pages, ai_only, cookies, headers)
97
  try:
98
  post_ids, keywords = await search(raw, pages, ai_only, cookies=cookies, headers=headers)
99
  return {"post_ids": post_ids, "filename": base26_time() + "_" + keywords}
Client/Extract Pixiv/ai_search.py CHANGED
@@ -1,7 +1,7 @@
1
  import requests
2
  import os
3
 
4
- local = 0
5
  if local:
6
  endpoint = "http://127.0.0.1:7860"
7
  else:
@@ -11,7 +11,7 @@ os.chdir(os.path.dirname(os.path.abspath(__file__)))
11
 
12
  input_url = input("Enter the URL: ")
13
 
14
- pages = 300 // 60
15
 
16
  params = {
17
  'raw': input_url,
@@ -23,5 +23,7 @@ params = {
23
  response = requests.get(f'{endpoint}/search', params=params)
24
 
25
  data = response.json()
26
- with open(f"../{data['filename']}.txt", "w") as f:
27
- f.write("\n".join(data['post_ids']))
 
 
 
1
  import requests
2
  import os
3
 
4
+ local = 1
5
  if local:
6
  endpoint = "http://127.0.0.1:7860"
7
  else:
 
11
 
12
  input_url = input("Enter the URL: ")
13
 
14
+ pages = 3
15
 
16
  params = {
17
  'raw': input_url,
 
23
  response = requests.get(f'{endpoint}/search', params=params)
24
 
25
  data = response.json()
26
+ # with open(f"../{data['filename']}.txt", "w") as f:
27
+ # f.write("\n".join(data['post_ids']))
28
+
29
+ print(data)