AI
Browse files- API/app.py +5 -7
- Client/Extract Pixiv/ai_search.py +1 -1
- Client/Extract Pixiv/pixiv_api.py +24 -13
API/app.py
CHANGED
|
@@ -130,10 +130,10 @@ async def process_post(post_id, session, semaphore):
|
|
| 130 |
except Exception:
|
| 131 |
return post_id, None
|
| 132 |
|
| 133 |
-
async def fetch_image_bytes(session, url, semaphore):
|
| 134 |
async with semaphore:
|
| 135 |
async with session.get(url) as response:
|
| 136 |
-
return await response.read()
|
| 137 |
|
| 138 |
@app.get("/allimages")
|
| 139 |
async def all_images(
|
|
@@ -204,17 +204,15 @@ async def pixif_zip(items: PixifZipModel, background_tasks: BackgroundTasks):
|
|
| 204 |
connector = aiohttp.TCPConnector(limit=20)
|
| 205 |
semaphore = asyncio.Semaphore(20)
|
| 206 |
async with aiohttp.ClientSession(headers=headers, connector=connector) as session:
|
| 207 |
-
tasks =
|
| 208 |
for post_id, url in downloads.items():
|
| 209 |
full_url = url if url.startswith("http") else img_base + url
|
| 210 |
-
|
| 211 |
-
tasks[task] = post_id
|
| 212 |
|
| 213 |
with zipfile.ZipFile(tmp_path, "w", compression=zipfile.ZIP_DEFLATED) as zf:
|
| 214 |
for task in asyncio.as_completed(tasks):
|
| 215 |
-
post_id = tasks[task]
|
| 216 |
try:
|
| 217 |
-
data = await task
|
| 218 |
except Exception:
|
| 219 |
continue
|
| 220 |
if data:
|
|
|
|
| 130 |
except Exception:
|
| 131 |
return post_id, None
|
| 132 |
|
| 133 |
+
async def fetch_image_bytes(session, url, post_id, semaphore):
|
| 134 |
async with semaphore:
|
| 135 |
async with session.get(url) as response:
|
| 136 |
+
return post_id, await response.read()
|
| 137 |
|
| 138 |
@app.get("/allimages")
|
| 139 |
async def all_images(
|
|
|
|
| 204 |
connector = aiohttp.TCPConnector(limit=20)
|
| 205 |
semaphore = asyncio.Semaphore(20)
|
| 206 |
async with aiohttp.ClientSession(headers=headers, connector=connector) as session:
|
| 207 |
+
tasks = []
|
| 208 |
for post_id, url in downloads.items():
|
| 209 |
full_url = url if url.startswith("http") else img_base + url
|
| 210 |
+
tasks.append(asyncio.create_task(fetch_image_bytes(session, full_url, post_id, semaphore)))
|
|
|
|
| 211 |
|
| 212 |
with zipfile.ZipFile(tmp_path, "w", compression=zipfile.ZIP_DEFLATED) as zf:
|
| 213 |
for task in asyncio.as_completed(tasks):
|
|
|
|
| 214 |
try:
|
| 215 |
+
post_id, data = await task
|
| 216 |
except Exception:
|
| 217 |
continue
|
| 218 |
if data:
|
Client/Extract Pixiv/ai_search.py
CHANGED
|
@@ -9,7 +9,7 @@ os.chdir(os.path.dirname(os.path.abspath(__file__)))
|
|
| 9 |
|
| 10 |
input_url = input("Enter the URL: ")
|
| 11 |
|
| 12 |
-
pages =
|
| 13 |
|
| 14 |
x=''
|
| 15 |
n=int(time.time()*100)
|
|
|
|
| 9 |
|
| 10 |
input_url = input("Enter the URL: ")
|
| 11 |
|
| 12 |
+
pages = 15
|
| 13 |
|
| 14 |
x=''
|
| 15 |
n=int(time.time()*100)
|
Client/Extract Pixiv/pixiv_api.py
CHANGED
|
@@ -18,6 +18,25 @@ headers = {
|
|
| 18 |
'referer': 'https://www.pixiv.net/',
|
| 19 |
}
|
| 20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
async def fetch_page(session, url):
|
| 22 |
async with session.get(url) as response:
|
| 23 |
data = await response.json()
|
|
@@ -35,8 +54,6 @@ async def search(raw, pages, ai_only=True, real_only=True, cookies=None, headers
|
|
| 35 |
post_ids = []
|
| 36 |
tasks = []
|
| 37 |
|
| 38 |
-
# AI銈ゃ儵銈广儓 OR AI鐢熸垚 OR StableDiffusion OR AI-generated OR NovelAI OR NovelAIDiffusionAI OR AIart
|
| 39 |
-
AI_TAGS = 'AI銈ゃ儵銈广儓 AI鐢熸垚 StableDiffusion AI-generated NovelAI NovelAIDiffusionAI AIart AI'.lower().split(' ')
|
| 40 |
async with aiohttp.ClientSession(cookies=cookies, headers=headers) as session:
|
| 41 |
for page in range(1, pages + 1):
|
| 42 |
page_url = f"{url.strip()}&p={page}"
|
|
@@ -44,18 +61,12 @@ async def search(raw, pages, ai_only=True, real_only=True, cookies=None, headers
|
|
| 44 |
tasks.append(task)
|
| 45 |
|
| 46 |
responses = await asyncio.gather(*tasks)
|
| 47 |
-
posts = []
|
| 48 |
for data in responses:
|
|
|
|
| 49 |
if ai_only:
|
| 50 |
-
for post in
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
if real_only:
|
| 54 |
-
for post in data['body']['illustManga']['data']:
|
| 55 |
-
if post['aiType'] != 2:
|
| 56 |
-
posts.append(post)
|
| 57 |
-
if not ai_only and not real_only:
|
| 58 |
-
posts = data['body']['illustManga']['data']
|
| 59 |
if not posts:
|
| 60 |
break
|
| 61 |
post_ids.extend([post['id'] for post in posts])
|
|
@@ -185,4 +196,4 @@ async def get_pixif_data(post_ids):
|
|
| 185 |
results = await asyncio.gather(*tasks)
|
| 186 |
|
| 187 |
image_exifs = {post_id: image_url.replace(img_base, '', 1) for post_id, image_url in results if image_url}
|
| 188 |
-
return image_exifs
|
|
|
|
| 18 |
'referer': 'https://www.pixiv.net/',
|
| 19 |
}
|
| 20 |
|
| 21 |
+
AI_TAGS = {
|
| 22 |
+
"ai銈ゃ儵銈广儓",
|
| 23 |
+
"ai鐢熸垚",
|
| 24 |
+
"stablediffusion",
|
| 25 |
+
"ai-generated",
|
| 26 |
+
"novelai",
|
| 27 |
+
"novelaidiffusionai",
|
| 28 |
+
"aiart",
|
| 29 |
+
}
|
| 30 |
+
|
| 31 |
+
def is_ai_post(post):
|
| 32 |
+
if post.get("aiType") == 2:
|
| 33 |
+
return True
|
| 34 |
+
tags = post.get("tags") or []
|
| 35 |
+
for tag in tags:
|
| 36 |
+
if isinstance(tag, str) and tag.casefold() in AI_TAGS:
|
| 37 |
+
return True
|
| 38 |
+
return False
|
| 39 |
+
|
| 40 |
async def fetch_page(session, url):
|
| 41 |
async with session.get(url) as response:
|
| 42 |
data = await response.json()
|
|
|
|
| 54 |
post_ids = []
|
| 55 |
tasks = []
|
| 56 |
|
|
|
|
|
|
|
| 57 |
async with aiohttp.ClientSession(cookies=cookies, headers=headers) as session:
|
| 58 |
for page in range(1, pages + 1):
|
| 59 |
page_url = f"{url.strip()}&p={page}"
|
|
|
|
| 61 |
tasks.append(task)
|
| 62 |
|
| 63 |
responses = await asyncio.gather(*tasks)
|
|
|
|
| 64 |
for data in responses:
|
| 65 |
+
posts = data['body']['illustManga']['data']
|
| 66 |
if ai_only:
|
| 67 |
+
posts = [post for post in posts if is_ai_post(post)]
|
| 68 |
+
elif real_only:
|
| 69 |
+
posts = [post for post in posts if not is_ai_post(post)]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
if not posts:
|
| 71 |
break
|
| 72 |
post_ids.extend([post['id'] for post in posts])
|
|
|
|
| 196 |
results = await asyncio.gather(*tasks)
|
| 197 |
|
| 198 |
image_exifs = {post_id: image_url.replace(img_base, '', 1) for post_id, image_url in results if image_url}
|
| 199 |
+
return image_exifs
|