q6 commited on
Commit
da5502e
·
1 Parent(s): a1119e6
Files changed (2) hide show
  1. API/app.py +126 -3
  2. Client/comments.py +29 -55
API/app.py CHANGED
@@ -7,10 +7,13 @@ import tempfile
7
  import zipfile
8
  import os
9
  from pydantic import BaseModel
10
- from typing import List, Dict
11
 
12
 
13
  img_base = 'https://i.pximg.net/img-original/img/'
 
 
 
14
 
15
  class pixifModel(BaseModel):
16
  post_ids: List[int]
@@ -22,6 +25,11 @@ class PixifDownloadModel(BaseModel):
22
  class PixifZipModel(BaseModel):
23
  d: Dict[str, str]
24
 
 
 
 
 
 
25
  headers = {
26
  "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:126.0) Gecko/20100101 Firefox/126.0",
27
  'referer': 'https://www.pixiv.net/',
@@ -29,8 +37,13 @@ headers = {
29
 
30
  app = FastAPI()
31
 
32
- async def fetch_page(session, url):
33
- async with session.get(url) as response:
 
 
 
 
 
34
  data = await response.json()
35
  return data
36
 
@@ -95,6 +108,91 @@ def parse_png_metadata(data):
95
  index += chunk_len + 4
96
  return None
97
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
  async def process_post(post_id, session, semaphore):
99
  async with semaphore:
100
  try:
@@ -222,6 +320,31 @@ async def pixif_zip(items: PixifZipModel, background_tasks: BackgroundTasks):
222
  filename = f"pixif_{base26_time()}.zip"
223
  return FileResponse(tmp_path, media_type="application/zip", filename=filename)
224
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
225
  @app.get("/")
226
  async def read_root():
227
  return {"message": "Hello, World!"}
 
7
  import zipfile
8
  import os
9
  from pydantic import BaseModel
10
+ from typing import List, Dict, Optional
11
 
12
 
13
  img_base = 'https://i.pximg.net/img-original/img/'
14
+ COMMENTS_ROOTS_URL = "https://www.pixiv.net/ajax/illusts/comments/roots"
15
+ COMMENTS_REPLIES_URL = "https://www.pixiv.net/ajax/illusts/comments/replies"
16
+ COMMENTS_LIMIT = 30
17
 
18
  class pixifModel(BaseModel):
19
  post_ids: List[int]
 
25
  class PixifZipModel(BaseModel):
26
  d: Dict[str, str]
27
 
28
+ class PixifCommentsModel(BaseModel):
29
+ post_ids: List[int]
30
+ phpsessid: str
31
+ limit: int = COMMENTS_LIMIT
32
+
33
  headers = {
34
  "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:126.0) Gecko/20100101 Firefox/126.0",
35
  'referer': 'https://www.pixiv.net/',
 
37
 
38
  app = FastAPI()
39
 
40
+ async def fetch_page(session, url, params=None, extra_headers=None, semaphore: Optional[asyncio.Semaphore] = None):
41
+ if semaphore:
42
+ async with semaphore:
43
+ async with session.get(url, params=params, headers=extra_headers) as response:
44
+ data = await response.json()
45
+ return data
46
+ async with session.get(url, params=params, headers=extra_headers) as response:
47
  data = await response.json()
48
  return data
49
 
 
108
  index += chunk_len + 4
109
  return None
110
 
111
+ def format_comment_text(comment):
112
+ text = comment.get("comment") or ""
113
+ if not text and comment.get("stampId"):
114
+ text = f"[stamp:{comment['stampId']}]"
115
+ return text
116
+
117
+ async def fetch_comment_replies(comment_id, post_id, session, semaphore):
118
+ replies = []
119
+ page = 1
120
+ referer = {"Referer": f"https://www.pixiv.net/artworks/{post_id}"}
121
+
122
+ while True:
123
+ params = {"comment_id": comment_id, "page": page, "lang": "en"}
124
+ payload = await fetch_page(
125
+ session,
126
+ COMMENTS_REPLIES_URL,
127
+ params=params,
128
+ extra_headers=referer,
129
+ semaphore=semaphore,
130
+ )
131
+
132
+ if payload.get("error"):
133
+ message = payload.get("message") or "Unknown error"
134
+ raise RuntimeError(f"Pixiv error for reply {comment_id}: {message}")
135
+
136
+ body = payload.get("body") or {}
137
+ comments = body.get("comments") or []
138
+
139
+ for reply in comments:
140
+ replies.append(
141
+ {
142
+ "name": reply.get("userName") or "",
143
+ "Comment": format_comment_text(reply),
144
+ }
145
+ )
146
+
147
+ if not body.get("hasNext"):
148
+ break
149
+ page += 1
150
+
151
+ return replies
152
+
153
+ async def fetch_post_comments(post_id, session, limit, semaphore):
154
+ offset = 0
155
+ results = []
156
+ referer = {"Referer": f"https://www.pixiv.net/artworks/{post_id}"}
157
+
158
+ while True:
159
+ params = {"illust_id": post_id, "offset": offset, "limit": limit}
160
+ payload = await fetch_page(
161
+ session,
162
+ COMMENTS_ROOTS_URL,
163
+ params=params,
164
+ extra_headers=referer,
165
+ semaphore=semaphore,
166
+ )
167
+
168
+ if payload.get("error"):
169
+ message = payload.get("message") or "Unknown error"
170
+ raise RuntimeError(f"Pixiv error for {post_id}: {message}")
171
+
172
+ body = payload.get("body") or {}
173
+ comments = body.get("comments") or []
174
+
175
+ for comment in comments:
176
+ replies = []
177
+ if comment.get("hasReplies"):
178
+ try:
179
+ replies = await fetch_comment_replies(comment.get("id"), post_id, session, semaphore)
180
+ except Exception:
181
+ replies = []
182
+ results.append(
183
+ {
184
+ "name": comment.get("userName") or "",
185
+ "Comment": format_comment_text(comment),
186
+ "Replies": replies,
187
+ }
188
+ )
189
+
190
+ if not body.get("hasNext"):
191
+ break
192
+ offset += limit
193
+
194
+ return str(post_id), results
195
+
196
  async def process_post(post_id, session, semaphore):
197
  async with semaphore:
198
  try:
 
320
  filename = f"pixif_{base26_time()}.zip"
321
  return FileResponse(tmp_path, media_type="application/zip", filename=filename)
322
 
323
+ @app.post("/comments")
324
+ async def comments(items: PixifCommentsModel):
325
+ if not items.post_ids:
326
+ return {"comments": {}}
327
+
328
+ cookies = build_cookies(items.phpsessid)
329
+ semaphore = asyncio.Semaphore(10)
330
+
331
+ async with aiohttp.ClientSession(cookies=cookies, headers=headers) as session:
332
+ tasks = [
333
+ fetch_post_comments(post_id, session, items.limit, semaphore)
334
+ for post_id in items.post_ids
335
+ ]
336
+ results = await asyncio.gather(*tasks, return_exceptions=True)
337
+
338
+ comments_by_post = {}
339
+ for post_id, result in zip(items.post_ids, results):
340
+ if isinstance(result, Exception):
341
+ comments_by_post[str(post_id)] = []
342
+ else:
343
+ key, comments = result
344
+ comments_by_post[str(key)] = comments
345
+
346
+ return {"comments": comments_by_post}
347
+
348
  @app.get("/")
349
  async def read_root():
350
  return {"message": "Hello, World!"}
Client/comments.py CHANGED
@@ -1,16 +1,10 @@
1
  import os
2
  import json
3
  import requests
4
- from tqdm import tqdm
5
 
6
 
7
- COMMENTS_URL = "https://www.pixiv.net/ajax/illusts/comments/roots"
8
  COMMENTS_LIMIT = 30
9
-
10
- HEADERS = {
11
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:126.0) Gecko/20100101 Firefox/126.0",
12
- "Referer": "https://www.pixiv.net/",
13
- }
14
 
15
 
16
  def read_dotenv_value(path, key):
@@ -39,47 +33,26 @@ def get_phpsessid():
39
  raise RuntimeError("PHPSESSID is not set in the environment or .env")
40
 
41
 
42
- def build_session(phpsessid):
43
- session = requests.Session()
44
- session.cookies.update({"PHPSESSID": phpsessid})
45
- session.headers.update(HEADERS)
46
- return session
47
-
48
-
49
- def fetch_comments(post_id, session, limit=COMMENTS_LIMIT):
50
- offset = 0
51
- results = []
52
-
53
- while True:
54
- params = {"illust_id": post_id, "offset": offset, "limit": limit}
55
- headers = {"Referer": f"https://www.pixiv.net/artworks/{post_id}"}
56
- response = session.get(COMMENTS_URL, params=params, headers=headers, timeout=30)
57
- response.raise_for_status()
58
- payload = response.json()
59
-
60
- if payload.get("error"):
61
- message = payload.get("message") or "Unknown error"
62
- raise RuntimeError(f"Pixiv error for {post_id}: {message}")
63
-
64
- body = payload.get("body") or {}
65
- comments = body.get("comments") or []
66
-
67
- for comment in comments:
68
- text = comment.get("comment") or ""
69
- if not text and comment.get("stampId"):
70
- text = f"[stamp:{comment['stampId']}]"
71
- results.append(
72
- {
73
- "name": comment.get("userName") or "",
74
- "Comment": text,
75
- }
76
- )
77
 
78
- if not body.get("hasNext"):
79
- break
80
- offset += limit
81
 
82
- return results
 
 
 
 
 
 
 
 
83
 
84
 
85
  def parse_indexes(inputs):
@@ -109,25 +82,26 @@ def main():
109
  indexes = parse_indexes(inputs)
110
 
111
  phpsessid = get_phpsessid()
112
- session = build_session(phpsessid)
113
 
114
  for index in indexes:
115
  if index < 0 or index >= len(valid):
116
  continue
117
  group_name = valid[index].rsplit(".", 1)[0]
118
  with open(valid[index], "r") as f:
119
- post_ids = f.read().split()
120
 
 
121
  if not post_ids:
122
  continue
123
 
124
- comments_by_post = {}
125
- for post_id in tqdm(post_ids, desc=f"Fetching {group_name}"):
126
- try:
127
- comments_by_post[str(post_id)] = fetch_comments(post_id, session)
128
- except Exception as exc:
129
- print(f"Failed to fetch comments for {post_id}: {exc}")
130
- comments_by_post[str(post_id)] = []
131
 
132
  output_path = os.path.join("comments", f"{group_name}.json")
133
  with open(output_path, "w", encoding="utf-8") as f:
 
1
  import os
2
  import json
3
  import requests
 
4
 
5
 
 
6
  COMMENTS_LIMIT = 30
7
+ DEFAULT_ENDPOINT = "http://127.0.0.1:7860"
 
 
 
 
8
 
9
 
10
  def read_dotenv_value(path, key):
 
33
  raise RuntimeError("PHPSESSID is not set in the environment or .env")
34
 
35
 
36
+ def get_endpoint():
37
+ endpoint = os.getenv("PIXIF_ENDPOINT")
38
+ if endpoint:
39
+ return endpoint.rstrip("/")
40
+ env_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".env"))
41
+ endpoint = read_dotenv_value(env_path, "PIXIF_ENDPOINT")
42
+ if endpoint:
43
+ return endpoint.rstrip("/")
44
+ return DEFAULT_ENDPOINT
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
 
 
 
 
46
 
47
+ def fetch_comments(post_ids, phpsessid, endpoint, limit=COMMENTS_LIMIT):
48
+ payload = {"post_ids": post_ids, "phpsessid": phpsessid, "limit": limit}
49
+ response = requests.post(f"{endpoint}/comments", json=payload, timeout=300)
50
+ response.raise_for_status()
51
+ data = response.json()
52
+ comments = data.get("comments", data)
53
+ if not isinstance(comments, dict):
54
+ raise RuntimeError("Unexpected response from comments endpoint.")
55
+ return comments
56
 
57
 
58
  def parse_indexes(inputs):
 
82
  indexes = parse_indexes(inputs)
83
 
84
  phpsessid = get_phpsessid()
85
+ endpoint = get_endpoint()
86
 
87
  for index in indexes:
88
  if index < 0 or index >= len(valid):
89
  continue
90
  group_name = valid[index].rsplit(".", 1)[0]
91
  with open(valid[index], "r") as f:
92
+ raw_ids = f.read().split()
93
 
94
+ post_ids = [int(post_id) for post_id in raw_ids if post_id.isdigit()]
95
  if not post_ids:
96
  continue
97
 
98
+ try:
99
+ fetched = fetch_comments(post_ids, phpsessid, endpoint)
100
+ except Exception as exc:
101
+ print(f"Failed to fetch comments for {group_name}: {exc}")
102
+ fetched = {}
103
+
104
+ comments_by_post = {str(post_id): fetched.get(str(post_id), []) for post_id in post_ids}
105
 
106
  output_path = os.path.join("comments", f"{group_name}.json")
107
  with open(output_path, "w", encoding="utf-8") as f: