Spaces:

q6
/

p

Running

App Files Files Community

q6 commited on Dec 5, 2024

Commit

b165f5f

1 Parent(s): 4e1b77d

X

Browse files

Files changed (4) hide show

API/app.py +88 -1
Client/Extract Pixiv/ai_search.py +1 -1
Client/Extract Pixiv/user.py +1 -1
Client/hunt.py +2 -2

API/app.py CHANGED Viewed

@@ -1,5 +1,6 @@
-from fastapi import FastAPI
 import aiohttp
 import asyncio
 import time
 import uvicorn
@@ -26,11 +27,47 @@ headers = {
 app = FastAPI()
 async def fetch_page(session, url):
     async with session.get(url) as response:
         data = await response.json()
         return data
 def base26(n):
     if n == 0:
         return "A"
@@ -45,6 +82,56 @@ def base26(n):
 def base26_time():
     return base26(int(time.time()))
 def determine_exif_type(metadata):
     if metadata is None:
         return None

+from fastapi import FastAPI, Query
 import aiohttp
+import requests
 import asyncio
 import time
 import uvicorn
 app = FastAPI()
 async def fetch_page(session, url):
     async with session.get(url) as response:
         data = await response.json()
         return data
+async def search(raw, pages, ai_only=True, cookies=None, headers=None):
+    keywords = raw.split('tags/')[-1].split('/')[0]
+    url = f"https://www.pixiv.net/ajax/search/artworks/{keywords}?word={keywords}"
+    if "?" in raw:
+        params = raw.split('?')[1]
+        url += f"&{params}"
+    if "s_mode" not in url:
+        url += "&s_mode=s_tag_full"
+    post_ids = []
+    tasks = []
+    prev_first_id = None
+    async with aiohttp.ClientSession(cookies=cookies, headers=headers) as session:
+        for page in range(1, pages + 1):
+            page_url = f"{url}&p={page}"
+            task = fetch_page(session, page_url)
+            tasks.append(task)
+        responses = await asyncio.gather(*tasks)
+        for data in responses:
+            if ai_only:
+                posts = [post for post in data['body']['illustManga']['data'] if post['aiType'] == 2]
+            else:
+                posts = data['body']['illustManga']['data']
+            if not posts:
+                break
+            current_first_id = posts[0]['id']
+            if prev_first_id and current_first_id == prev_first_id:
+                break
+            prev_first_id = current_first_id
+            post_ids.extend([post['id'] for post in posts])
+    return post_ids, requests.utils.unquote(keywords, encoding='utf-8')
 def base26(n):
     if n == 0:
         return "A"
 def base26_time():
     return base26(int(time.time()))
+@app.get("/search")
+async def search_endpoint(
+    raw: str = Query(..., description="The raw URL to search."),
+    pages: int = Query(1, description="Number of pages to fetch."),
+    ai_only: bool = Query(True, description="Filter for AI-generated content.")
+):
+    try:
+        post_ids, keywords = await search(raw, pages, ai_only, cookies=cookies, headers=headers)
+        return {"post_ids": post_ids, "filename": base26_time() + "_" + keywords}
+    except Exception as e:
+        return {"error": str(e)}
+@app.get("/user")
+async def user(
+    user_id: int = Query(..., description="The user ID to fetch.")
+):
+    async with aiohttp.ClientSession(cookies=cookies, headers=headers) as session:
+        data = await fetch_page(session, f'https://www.pixiv.net/ajax/user/{user_id}/profile/all')
+        posts = data["body"]["illusts"].keys()
+        try:
+            username = data['body']['pickup'][0]['userName']
+        except (KeyError, IndexError):
+            username = await fetch_page(session, f"https://www.pixiv.net/ajax/user/{user_id}")['body']['name']
+    return {"post_ids": list(posts), "filename": base26_time() + "_" + username.replace("|", "")}
+@app.get("/users")
+async def users(
+    user_ids: List[int] = Query(..., description="List of user IDs to fetch.", alias="user_ids")
+):
+    async def fetch_user_data(session, uid):
+        try:
+            data = await fetch_page(session, f'https://www.pixiv.net/ajax/user/{uid}/profile/all')
+            posts = list(data["body"]["illusts"].keys())
+            try:
+                username = data['body']['pickup'][0]['userName']
+            except (KeyError, IndexError):
+                user_data = await fetch_page(session, f"https://www.pixiv.net/ajax/user/{uid}")
+                username = user_data['body']['name']
+            filename = base26_time() + "_" + username.replace("|", "")
+            return {"post_ids": posts, "filename": filename}
+        except Exception as e:
+            return {"user_id": uid, "error": str(e)}
+    async with aiohttp.ClientSession(cookies=cookies, headers=headers) as session:
+        tasks = [fetch_user_data(session, uid) for uid in user_ids]
+        results = await asyncio.gather(*tasks)
+    return results
 def determine_exif_type(metadata):
     if metadata is None:
         return None

Client/Extract Pixiv/ai_search.py CHANGED Viewed

@@ -13,7 +13,7 @@ params = {
     'ai_only': True,
 }
-response = requests.get('http://127.0.0.1:8000/search', params=params)
 data = response.json()

     'ai_only': True,
 }
+response = requests.get('https://q6-p.hf.space/search', params=params)
 data = response.json()

Client/Extract Pixiv/user.py CHANGED Viewed

@@ -10,7 +10,7 @@ def main():
     user_ids = [int(uid) for uid in user_ids]
-    response = requests.get('http://127.0.0.1:8000/users', params={'user_ids': user_ids})
     response.raise_for_status()
     data = response.json()

     user_ids = [int(uid) for uid in user_ids]
+    response = requests.get('https://q6-p.hf.space/users', params={'user_ids': user_ids})
     response.raise_for_status()
     data = response.json()

Client/hunt.py CHANGED Viewed

@@ -8,7 +8,7 @@ os.chdir(os.path.dirname(os.path.abspath(__file__)))
 db = lmdb.open("db", subdir=True, map_size=10485760)
 valid = [f for f in os.listdir() if f.endswith(".txt")]
-print(valid)
 for idx, file in enumerate(valid):
     print(f"{idx + 1}: {file}")
@@ -30,6 +30,6 @@ for index in indexs:
     with db.begin(write=False) as txn:
         post_ids = [post_id for post_id in post_ids if not txn.get(post_id.encode())]
-    data = requests.post('http://127.0.0.1:8000/pixif', json={"post_ids": post_ids}).json()
     print(data)

 db = lmdb.open("db", subdir=True, map_size=10485760)
 valid = [f for f in os.listdir() if f.endswith(".txt")]
 for idx, file in enumerate(valid):
     print(f"{idx + 1}: {file}")
     with db.begin(write=False) as txn:
         post_ids = [post_id for post_id in post_ids if not txn.get(post_id.encode())]
+    data = requests.post('https://q6-p.hf.space/pixif', json={"post_ids": post_ids}).json()
     print(data)