Spaces:

q6
/

p

Sleeping

App Files Files Community

q6 commited on Dec 25, 2025

Commit

3b91e61

1 Parent(s): ad65a09

AI

Browse files

Files changed (3) hide show

API/app.py +5 -7
Client/Extract Pixiv/ai_search.py +1 -1
Client/Extract Pixiv/pixiv_api.py +24 -13

API/app.py CHANGED Viewed

@@ -130,10 +130,10 @@ async def process_post(post_id, session, semaphore):
         except Exception:
             return post_id, None
-async def fetch_image_bytes(session, url, semaphore):
     async with semaphore:
         async with session.get(url) as response:
-            return await response.read()
 @app.get("/allimages")
 async def all_images(
@@ -204,17 +204,15 @@ async def pixif_zip(items: PixifZipModel, background_tasks: BackgroundTasks):
     connector = aiohttp.TCPConnector(limit=20)
     semaphore = asyncio.Semaphore(20)
     async with aiohttp.ClientSession(headers=headers, connector=connector) as session:
-        tasks = {}
         for post_id, url in downloads.items():
             full_url = url if url.startswith("http") else img_base + url
-            task = asyncio.create_task(fetch_image_bytes(session, full_url, semaphore))
-            tasks[task] = post_id
         with zipfile.ZipFile(tmp_path, "w", compression=zipfile.ZIP_DEFLATED) as zf:
             for task in asyncio.as_completed(tasks):
-                post_id = tasks[task]
                 try:
-                    data = await task
                 except Exception:
                     continue
                 if data:

         except Exception:
             return post_id, None
+async def fetch_image_bytes(session, url, post_id, semaphore):
     async with semaphore:
         async with session.get(url) as response:
+            return post_id, await response.read()
 @app.get("/allimages")
 async def all_images(
     connector = aiohttp.TCPConnector(limit=20)
     semaphore = asyncio.Semaphore(20)
     async with aiohttp.ClientSession(headers=headers, connector=connector) as session:
+        tasks = []
         for post_id, url in downloads.items():
             full_url = url if url.startswith("http") else img_base + url
+            tasks.append(asyncio.create_task(fetch_image_bytes(session, full_url, post_id, semaphore)))
         with zipfile.ZipFile(tmp_path, "w", compression=zipfile.ZIP_DEFLATED) as zf:
             for task in asyncio.as_completed(tasks):
                 try:
+                    post_id, data = await task
                 except Exception:
                     continue
                 if data:

Client/Extract Pixiv/ai_search.py CHANGED Viewed

@@ -9,7 +9,7 @@ os.chdir(os.path.dirname(os.path.abspath(__file__)))
 input_url = input("Enter the URL: ")
-pages = 1
 x=''
 n=int(time.time()*100)

 input_url = input("Enter the URL: ")
+pages = 15
 x=''
 n=int(time.time()*100)

Client/Extract Pixiv/pixiv_api.py CHANGED Viewed

@@ -18,6 +18,25 @@ headers = {
     'referer': 'https://www.pixiv.net/',
 }
 async def fetch_page(session, url):
     async with session.get(url) as response:
         data = await response.json()
@@ -35,8 +54,6 @@ async def search(raw, pages, ai_only=True, real_only=True, cookies=None, headers
     post_ids = []
     tasks = []
-    #  AIイラスト OR AI生成 OR StableDiffusion OR AI-generated OR NovelAI OR NovelAIDiffusionAI OR AIart
-    AI_TAGS = 'AIイラスト AI生成 StableDiffusion AI-generated NovelAI NovelAIDiffusionAI AIart AI'.lower().split(' ')
     async with aiohttp.ClientSession(cookies=cookies, headers=headers) as session:
         for page in range(1, pages + 1):
             page_url = f"{url.strip()}&p={page}"
@@ -44,18 +61,12 @@ async def search(raw, pages, ai_only=True, real_only=True, cookies=None, headers
             tasks.append(task)
         responses = await asyncio.gather(*tasks)
-        posts = []
         for data in responses:
             if ai_only:
-                for post in data['body']['illustManga']['data']:
-                    if post['aiType'] == 2 or any(tag.lower() in AI_TAGS for tag in post['tags']):
-                        posts.append(post)
-            if real_only:
-                for post in data['body']['illustManga']['data']:
-                    if post['aiType'] != 2:
-                        posts.append(post)
-            if not ai_only and not real_only:
-                posts = data['body']['illustManga']['data']
             if not posts:
                 break
             post_ids.extend([post['id'] for post in posts])
@@ -185,4 +196,4 @@ async def get_pixif_data(post_ids):
         results = await asyncio.gather(*tasks)
     image_exifs = {post_id: image_url.replace(img_base, '', 1) for post_id, image_url in results if image_url}
-    return image_exifs

     'referer': 'https://www.pixiv.net/',
 }
+AI_TAGS = {
+    "aiイラスト",
+    "ai生成",
+    "stablediffusion",
+    "ai-generated",
+    "novelai",
+    "novelaidiffusionai",
+    "aiart",
+}
+def is_ai_post(post):
+    if post.get("aiType") == 2:
+        return True
+    tags = post.get("tags") or []
+    for tag in tags:
+        if isinstance(tag, str) and tag.casefold() in AI_TAGS:
+            return True
+    return False
 async def fetch_page(session, url):
     async with session.get(url) as response:
         data = await response.json()
     post_ids = []
     tasks = []
     async with aiohttp.ClientSession(cookies=cookies, headers=headers) as session:
         for page in range(1, pages + 1):
             page_url = f"{url.strip()}&p={page}"
             tasks.append(task)
         responses = await asyncio.gather(*tasks)
         for data in responses:
+            posts = data['body']['illustManga']['data']
             if ai_only:
+                posts = [post for post in posts if is_ai_post(post)]
+            elif real_only:
+                posts = [post for post in posts if not is_ai_post(post)]
             if not posts:
                 break
             post_ids.extend([post['id'] for post in posts])
         results = await asyncio.gather(*tasks)
     image_exifs = {post_id: image_url.replace(img_base, '', 1) for post_id, image_url in results if image_url}
+    return image_exifs