- API/app.py +5 -6
- Client/Extract Pixiv/ai_search.py +6 -4
API/app.py
CHANGED
|
@@ -6,6 +6,8 @@ import time
|
|
| 6 |
import os
|
| 7 |
from pydantic import BaseModel
|
| 8 |
from typing import List, Dict
|
|
|
|
|
|
|
| 9 |
|
| 10 |
img_base = 'https://i.pximg.net/img-original/img/'
|
| 11 |
|
|
@@ -17,6 +19,7 @@ class PixifDownloadModel(BaseModel):
|
|
| 17 |
|
| 18 |
env_path = os.path.dirname(os.path.realpath(__file__)) + "/../.env"
|
| 19 |
|
|
|
|
| 20 |
PHPSESSID = os.getenv("PHPSESSID")
|
| 21 |
|
| 22 |
cookies = {"PHPSESSID": PHPSESSID}
|
|
@@ -44,11 +47,10 @@ async def search(raw, pages, ai_only=True, real_only=True, cookies=None, headers
|
|
| 44 |
|
| 45 |
post_ids = []
|
| 46 |
tasks = []
|
| 47 |
-
prev_first_id = None
|
| 48 |
|
| 49 |
async with aiohttp.ClientSession(cookies=cookies, headers=headers) as session:
|
| 50 |
for page in range(1, pages + 1):
|
| 51 |
-
page_url = f"{url}&p={page}"
|
| 52 |
task = fetch_page(session, page_url)
|
| 53 |
tasks.append(task)
|
| 54 |
|
|
@@ -67,10 +69,6 @@ async def search(raw, pages, ai_only=True, real_only=True, cookies=None, headers
|
|
| 67 |
posts = data['body']['illustManga']['data']
|
| 68 |
if not posts:
|
| 69 |
break
|
| 70 |
-
current_first_id = posts[0]['id']
|
| 71 |
-
if prev_first_id and current_first_id == prev_first_id:
|
| 72 |
-
break
|
| 73 |
-
prev_first_id = current_first_id
|
| 74 |
post_ids.extend([post['id'] for post in posts])
|
| 75 |
|
| 76 |
return post_ids, requests.utils.unquote(keywords, encoding='utf-8')
|
|
@@ -95,6 +93,7 @@ async def search_endpoint(
|
|
| 95 |
pages: int = Query(1, description="Number of pages to fetch."),
|
| 96 |
ai_only: bool = Query(True, description="Filter for AI-generated content.")
|
| 97 |
):
|
|
|
|
| 98 |
try:
|
| 99 |
post_ids, keywords = await search(raw, pages, ai_only, cookies=cookies, headers=headers)
|
| 100 |
return {"post_ids": post_ids, "filename": base26_time() + "_" + keywords}
|
|
|
|
| 6 |
import os
|
| 7 |
from pydantic import BaseModel
|
| 8 |
from typing import List, Dict
|
| 9 |
+
from dotenv import load_dotenv
|
| 10 |
+
|
| 11 |
|
| 12 |
img_base = 'https://i.pximg.net/img-original/img/'
|
| 13 |
|
|
|
|
| 19 |
|
| 20 |
env_path = os.path.dirname(os.path.realpath(__file__)) + "/../.env"
|
| 21 |
|
| 22 |
+
load_dotenv(env_path)
|
| 23 |
PHPSESSID = os.getenv("PHPSESSID")
|
| 24 |
|
| 25 |
cookies = {"PHPSESSID": PHPSESSID}
|
|
|
|
| 47 |
|
| 48 |
post_ids = []
|
| 49 |
tasks = []
|
|
|
|
| 50 |
|
| 51 |
async with aiohttp.ClientSession(cookies=cookies, headers=headers) as session:
|
| 52 |
for page in range(1, pages + 1):
|
| 53 |
+
page_url = f"{url.strip()}&p={page}"
|
| 54 |
task = fetch_page(session, page_url)
|
| 55 |
tasks.append(task)
|
| 56 |
|
|
|
|
| 69 |
posts = data['body']['illustManga']['data']
|
| 70 |
if not posts:
|
| 71 |
break
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
post_ids.extend([post['id'] for post in posts])
|
| 73 |
|
| 74 |
return post_ids, requests.utils.unquote(keywords, encoding='utf-8')
|
|
|
|
| 93 |
pages: int = Query(1, description="Number of pages to fetch."),
|
| 94 |
ai_only: bool = Query(True, description="Filter for AI-generated content.")
|
| 95 |
):
|
| 96 |
+
print(raw, pages, ai_only, cookies, headers)
|
| 97 |
try:
|
| 98 |
post_ids, keywords = await search(raw, pages, ai_only, cookies=cookies, headers=headers)
|
| 99 |
return {"post_ids": post_ids, "filename": base26_time() + "_" + keywords}
|
Client/Extract Pixiv/ai_search.py
CHANGED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
import requests
|
| 2 |
import os
|
| 3 |
|
| 4 |
-
local =
|
| 5 |
if local:
|
| 6 |
endpoint = "http://127.0.0.1:7860"
|
| 7 |
else:
|
|
@@ -11,7 +11,7 @@ os.chdir(os.path.dirname(os.path.abspath(__file__)))
|
|
| 11 |
|
| 12 |
input_url = input("Enter the URL: ")
|
| 13 |
|
| 14 |
-
pages =
|
| 15 |
|
| 16 |
params = {
|
| 17 |
'raw': input_url,
|
|
@@ -23,5 +23,7 @@ params = {
|
|
| 23 |
response = requests.get(f'{endpoint}/search', params=params)
|
| 24 |
|
| 25 |
data = response.json()
|
| 26 |
-
with open(f"../{data['filename']}.txt", "w") as f:
|
| 27 |
-
|
|
|
|
|
|
|
|
|
| 1 |
import requests
|
| 2 |
import os
|
| 3 |
|
| 4 |
+
local = 1
|
| 5 |
if local:
|
| 6 |
endpoint = "http://127.0.0.1:7860"
|
| 7 |
else:
|
|
|
|
| 11 |
|
| 12 |
input_url = input("Enter the URL: ")
|
| 13 |
|
| 14 |
+
pages = 3
|
| 15 |
|
| 16 |
params = {
|
| 17 |
'raw': input_url,
|
|
|
|
| 23 |
response = requests.get(f'{endpoint}/search', params=params)
|
| 24 |
|
| 25 |
data = response.json()
|
| 26 |
+
# with open(f"../{data['filename']}.txt", "w") as f:
|
| 27 |
+
# f.write("\n".join(data['post_ids']))
|
| 28 |
+
|
| 29 |
+
print(data)
|