q6 commited on
Commit
a1119e6
·
1 Parent(s): c9445a1
Files changed (2) hide show
  1. .gitignore +1 -0
  2. Client/comments.py +140 -0
.gitignore CHANGED
@@ -1,6 +1,7 @@
1
  .env
2
  db
3
  images
 
4
  merge_dev.bat
5
  __pycache__
6
  *.png
 
1
  .env
2
  db
3
  images
4
+ comments
5
  merge_dev.bat
6
  __pycache__
7
  *.png
Client/comments.py ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import requests
4
+ from tqdm import tqdm
5
+
6
+
7
+ COMMENTS_URL = "https://www.pixiv.net/ajax/illusts/comments/roots"
8
+ COMMENTS_LIMIT = 30
9
+
10
+ HEADERS = {
11
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:126.0) Gecko/20100101 Firefox/126.0",
12
+ "Referer": "https://www.pixiv.net/",
13
+ }
14
+
15
+
16
+ def read_dotenv_value(path, key):
17
+ try:
18
+ with open(path, "r") as env_file:
19
+ for line in env_file:
20
+ line = line.strip()
21
+ if not line or line.startswith("#") or "=" not in line:
22
+ continue
23
+ k, v = line.split("=", 1)
24
+ if k == key:
25
+ return v
26
+ except FileNotFoundError:
27
+ return None
28
+ return None
29
+
30
+
31
+ def get_phpsessid():
32
+ phpsessid = os.getenv("PHPSESSID")
33
+ if phpsessid:
34
+ return phpsessid
35
+ env_path = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".env"))
36
+ phpsessid = read_dotenv_value(env_path, "PHPSESSID")
37
+ if phpsessid:
38
+ return phpsessid
39
+ raise RuntimeError("PHPSESSID is not set in the environment or .env")
40
+
41
+
42
+ def build_session(phpsessid):
43
+ session = requests.Session()
44
+ session.cookies.update({"PHPSESSID": phpsessid})
45
+ session.headers.update(HEADERS)
46
+ return session
47
+
48
+
49
+ def fetch_comments(post_id, session, limit=COMMENTS_LIMIT):
50
+ offset = 0
51
+ results = []
52
+
53
+ while True:
54
+ params = {"illust_id": post_id, "offset": offset, "limit": limit}
55
+ headers = {"Referer": f"https://www.pixiv.net/artworks/{post_id}"}
56
+ response = session.get(COMMENTS_URL, params=params, headers=headers, timeout=30)
57
+ response.raise_for_status()
58
+ payload = response.json()
59
+
60
+ if payload.get("error"):
61
+ message = payload.get("message") or "Unknown error"
62
+ raise RuntimeError(f"Pixiv error for {post_id}: {message}")
63
+
64
+ body = payload.get("body") or {}
65
+ comments = body.get("comments") or []
66
+
67
+ for comment in comments:
68
+ text = comment.get("comment") or ""
69
+ if not text and comment.get("stampId"):
70
+ text = f"[stamp:{comment['stampId']}]"
71
+ results.append(
72
+ {
73
+ "name": comment.get("userName") or "",
74
+ "Comment": text,
75
+ }
76
+ )
77
+
78
+ if not body.get("hasNext"):
79
+ break
80
+ offset += limit
81
+
82
+ return results
83
+
84
+
85
+ def parse_indexes(inputs):
86
+ indexes = []
87
+ for inp in inputs:
88
+ if "-" in inp:
89
+ start, end = map(int, inp.split("-"))
90
+ indexes.extend(range(start - 1, end))
91
+ elif inp.isdigit():
92
+ indexes.append(int(inp) - 1)
93
+ return indexes
94
+
95
+
96
+ def main():
97
+ os.chdir(os.path.dirname(os.path.abspath(__file__)))
98
+ os.makedirs("comments", exist_ok=True)
99
+
100
+ valid = [f for f in os.listdir() if f.endswith(".txt")]
101
+ for idx, file in enumerate(valid):
102
+ print(f"{idx + 1}: {file}")
103
+
104
+ if not valid:
105
+ print("No .txt files found in the Client directory.")
106
+ return
107
+
108
+ inputs = input("Enter the index of the file: ").split()
109
+ indexes = parse_indexes(inputs)
110
+
111
+ phpsessid = get_phpsessid()
112
+ session = build_session(phpsessid)
113
+
114
+ for index in indexes:
115
+ if index < 0 or index >= len(valid):
116
+ continue
117
+ group_name = valid[index].rsplit(".", 1)[0]
118
+ with open(valid[index], "r") as f:
119
+ post_ids = f.read().split()
120
+
121
+ if not post_ids:
122
+ continue
123
+
124
+ comments_by_post = {}
125
+ for post_id in tqdm(post_ids, desc=f"Fetching {group_name}"):
126
+ try:
127
+ comments_by_post[str(post_id)] = fetch_comments(post_id, session)
128
+ except Exception as exc:
129
+ print(f"Failed to fetch comments for {post_id}: {exc}")
130
+ comments_by_post[str(post_id)] = []
131
+
132
+ output_path = os.path.join("comments", f"{group_name}.json")
133
+ with open(output_path, "w", encoding="utf-8") as f:
134
+ json.dump(comments_by_post, f, indent=2, ensure_ascii=False)
135
+
136
+ print(f"Saved comments to {output_path}")
137
+
138
+
139
+ if __name__ == "__main__":
140
+ main()