qgyd2021 commited on
Commit
0a5396d
·
1 Parent(s): c0ab610
data/porter_tasks/porter_task_chenjieshen_douyin_video_to_bilibili.json CHANGED
@@ -6,7 +6,7 @@
6
  "sec_user_id": "MS4wLjABAAAATGoBrO7yiJ3q9go4fxq9JXjrnP1bFpdkgKckC1IpfXA_vrjSmL9ZtjmTju8ApwbT",
7
  "check_interval": 900,
8
  "key_of_credentials": "douyin_wentao_credentials",
9
- "min_date2": "2025-09-23 00:00:00",
10
  "output_video_dir": "data/tasks/chenjieshen_douyin_video_to_bilibili/video/douyin/陈杰森",
11
  "output_video_info_file": "data/tasks/chenjieshen_douyin_video_to_bilibili/video/douyin/陈杰森/file_info.json"
12
  },
@@ -17,7 +17,7 @@
17
  "sec_user_id": "MS4wLjABAAAA49QFP6YhorLIIX9M-FiZeKxmqhqXlttluSsZeaxvxzU",
18
  "check_interval": 900,
19
  "key_of_credentials": "douyin_wentao_credentials",
20
- "min_date2": "2025-09-23 00:00:00",
21
  "output_video_dir": "data/tasks/chenjieshen_douyin_video_to_bilibili/video/douyin/陈杰森",
22
  "output_video_info_file": "data/tasks/chenjieshen_douyin_video_to_bilibili/video/douyin/陈杰森/file_info.json"
23
  },
@@ -31,6 +31,6 @@
31
  "target_user_id": "442286660",
32
  "key_of_credentials": "bilibili_chenjiesen_credentials",
33
  "remove_after_upload": true,
34
- "min_date2": "2025-09-23 00:00:00"
35
  }
36
  ]
 
6
  "sec_user_id": "MS4wLjABAAAATGoBrO7yiJ3q9go4fxq9JXjrnP1bFpdkgKckC1IpfXA_vrjSmL9ZtjmTju8ApwbT",
7
  "check_interval": 900,
8
  "key_of_credentials": "douyin_wentao_credentials",
9
+ "min_date": "2025-10-02 00:00:00",
10
  "output_video_dir": "data/tasks/chenjieshen_douyin_video_to_bilibili/video/douyin/陈杰森",
11
  "output_video_info_file": "data/tasks/chenjieshen_douyin_video_to_bilibili/video/douyin/陈杰森/file_info.json"
12
  },
 
17
  "sec_user_id": "MS4wLjABAAAA49QFP6YhorLIIX9M-FiZeKxmqhqXlttluSsZeaxvxzU",
18
  "check_interval": 900,
19
  "key_of_credentials": "douyin_wentao_credentials",
20
+ "min_date": "2025-10-02 00:00:00",
21
  "output_video_dir": "data/tasks/chenjieshen_douyin_video_to_bilibili/video/douyin/陈杰森",
22
  "output_video_info_file": "data/tasks/chenjieshen_douyin_video_to_bilibili/video/douyin/陈杰森/file_info.json"
23
  },
 
31
  "target_user_id": "442286660",
32
  "key_of_credentials": "bilibili_chenjiesen_credentials",
33
  "remove_after_upload": true,
34
+ "min_date": "2025-10-02 00:00:00"
35
  }
36
  ]
data/porter_tasks/porter_task_chenjieshen_douyin_video_to_youtube.json CHANGED
@@ -6,7 +6,7 @@
6
  "sec_user_id": "MS4wLjABAAAATGoBrO7yiJ3q9go4fxq9JXjrnP1bFpdkgKckC1IpfXA_vrjSmL9ZtjmTju8ApwbT",
7
  "check_interval": 900,
8
  "key_of_credentials": "douyin_wentao_credentials",
9
- "min_date2": "2025-09-23 00:00:00",
10
  "output_video_dir": "data/tasks/chenjieshen_douyin_video_to_youtube/video/douyin/陈杰森",
11
  "output_video_info_file": "data/tasks/chenjieshen_douyin_video_to_youtube/video/douyin/陈杰森/file_info.json"
12
  },
@@ -17,7 +17,7 @@
17
  "sec_user_id": "MS4wLjABAAAA49QFP6YhorLIIX9M-FiZeKxmqhqXlttluSsZeaxvxzU",
18
  "check_interval": 900,
19
  "key_of_credentials": "douyin_wentao_credentials",
20
- "min_date2": "2025-09-23 00:00:00",
21
  "output_video_dir": "data/tasks/chenjieshen_douyin_video_to_youtube/video/douyin/陈杰森",
22
  "output_video_info_file": "data/tasks/chenjieshen_douyin_video_to_youtube/video/douyin/陈杰森/file_info.json"
23
  },
@@ -34,6 +34,6 @@
34
  "remove_after_upload_delay": 1,
35
  "playlist_title": "Short",
36
  "playlist_id": "PL1KtQ49rVMElugHudIdyKLAmgmMVdqoxQ",
37
- "min_date2": "2025-09-23 00:00:00"
38
  }
39
  ]
 
6
  "sec_user_id": "MS4wLjABAAAATGoBrO7yiJ3q9go4fxq9JXjrnP1bFpdkgKckC1IpfXA_vrjSmL9ZtjmTju8ApwbT",
7
  "check_interval": 900,
8
  "key_of_credentials": "douyin_wentao_credentials",
9
+ "min_date": "2025-10-02 00:00:00",
10
  "output_video_dir": "data/tasks/chenjieshen_douyin_video_to_youtube/video/douyin/陈杰森",
11
  "output_video_info_file": "data/tasks/chenjieshen_douyin_video_to_youtube/video/douyin/陈杰森/file_info.json"
12
  },
 
17
  "sec_user_id": "MS4wLjABAAAA49QFP6YhorLIIX9M-FiZeKxmqhqXlttluSsZeaxvxzU",
18
  "check_interval": 900,
19
  "key_of_credentials": "douyin_wentao_credentials",
20
+ "min_date": "2025-10-02 00:00:00",
21
  "output_video_dir": "data/tasks/chenjieshen_douyin_video_to_youtube/video/douyin/陈杰森",
22
  "output_video_info_file": "data/tasks/chenjieshen_douyin_video_to_youtube/video/douyin/陈杰森/file_info.json"
23
  },
 
34
  "remove_after_upload_delay": 1,
35
  "playlist_title": "Short",
36
  "playlist_id": "PL1KtQ49rVMElugHudIdyKLAmgmMVdqoxQ",
37
+ "min_date": "2025-10-02 00:00:00"
38
  }
39
  ]
toolbox/douyin/video/download.py CHANGED
@@ -8,6 +8,7 @@ import logging
8
  from pathlib import Path
9
  from zoneinfo import ZoneInfo
10
 
 
11
  from tenacity import before_sleep_log, retry, retry_if_exception_type, stop_after_attempt, wait_fixed
12
 
13
  from project_settings import project_path, time_zone_info
@@ -145,7 +146,7 @@ class VideoDownload(DouyinClient):
145
 
146
  stop_flag = False
147
  max_cursor = 0
148
- for i in range(100):
149
  if stop_flag:
150
  break
151
  rows = await self.get_video_list_by_user_id(sec_user_id=sec_user_id, max_cursor=max_cursor, count=18)
@@ -184,6 +185,127 @@ class VideoDownload(DouyinClient):
184
  result.append(task)
185
  return result
186
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
 
188
  def get_args():
189
  parser = argparse.ArgumentParser()
@@ -222,5 +344,55 @@ async def main():
222
  return
223
 
224
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
225
  if __name__ == "__main__":
226
- asyncio.run(main())
 
8
  from pathlib import Path
9
  from zoneinfo import ZoneInfo
10
 
11
+ from gradio.cli.commands.deploy_space import readme_file
12
  from tenacity import before_sleep_log, retry, retry_if_exception_type, stop_after_attempt, wait_fixed
13
 
14
  from project_settings import project_path, time_zone_info
 
146
 
147
  stop_flag = False
148
  max_cursor = 0
149
+ for i in range(1000):
150
  if stop_flag:
151
  break
152
  rows = await self.get_video_list_by_user_id(sec_user_id=sec_user_id, max_cursor=max_cursor, count=18)
 
185
  result.append(task)
186
  return result
187
 
188
+ async def get_video_list_by_mix_id(self, mix_id: str, cursor: int = 0, count: int = 18):
189
+ url = "https://www.douyin.com/aweme/v1/web/mix/aweme/"
190
+
191
+ params = {
192
+ "device_platform": "webapp",
193
+ "aid": "6383",
194
+ "channel": "channel_pc_web",
195
+ "cursor": cursor,
196
+ "count": count,
197
+ "publish_video_strategy_type": "2",
198
+ "version_code": "290100",
199
+ "version_name": "29.1.0",
200
+
201
+ "mix_id": mix_id,
202
+ }
203
+
204
+ response = await self.async_session.request(
205
+ method="GET",
206
+ url=url,
207
+ headers={
208
+ **self.headers,
209
+ "referer": "https://www.douyin.com/",
210
+ },
211
+ params=params,
212
+ )
213
+ if response.status_code == 444:
214
+ # Access Denied
215
+ raise ExpectedError(status_code=60444, message=f"request failed, status_code: {response.status_code}, text: {response.text}")
216
+ elif response.status_code == 200 and len(response.text) == 0:
217
+ # Maybe Access Denied
218
+ raise ExpectedError(status_code=60444, message=f"request failed, status_code: {response.status_code}, text: {response.text}")
219
+ elif response.status_code != 200:
220
+ raise AssertionError(f"request failed, status_code: {response.status_code}, text: {response.text}")
221
+ elif response.text == "blocked":
222
+ raise AssertionError(f"request failed, status_code: {response.status_code}, text: {response.text}")
223
+ js = response.json()
224
+ aweme_list = js["aweme_list"]
225
+ if aweme_list is None:
226
+ return list()
227
+
228
+ result = list()
229
+ for aweme in aweme_list:
230
+ # aweme_ = json.dumps(aweme, ensure_ascii=False, indent=4)
231
+ # print(aweme_)
232
+
233
+ aweme_id = aweme["aweme_id"]
234
+ desc = aweme["desc"]
235
+ create_time = aweme["create_time"]
236
+ create_time_ = datetime.fromtimestamp(
237
+ create_time,
238
+ tz=ZoneInfo(time_zone_info)
239
+ )
240
+ create_time_str = create_time_.strftime("%Y%m%dT%H%M%S")
241
+
242
+ # video
243
+ video = aweme["video"]
244
+ url_list = video["play_addr"]["url_list"]
245
+
246
+ # tags
247
+ text_extra = aweme["text_extra"]
248
+ tags = set()
249
+ for t in text_extra:
250
+ tag = t.get("hashtag_name")
251
+ if tag is None:
252
+ tag = t.get("search_text")
253
+ if tag is None:
254
+ # print(t)
255
+ continue
256
+ tags.add(tag)
257
+ tags = list(tags)
258
+
259
+ # title
260
+ title: str = desc
261
+ for tag in tags:
262
+ title = title.replace(f"#{tag}", "")
263
+ # title = title.replace(f"# {tag}", "")
264
+ title = title.strip()
265
+
266
+ row = {
267
+ "aweme_id": aweme_id,
268
+ "create_time": create_time,
269
+ "create_time_str": create_time_str,
270
+ "title": title,
271
+ "desc": desc,
272
+ "url_list": url_list,
273
+ "tags": tags,
274
+ }
275
+ result.append(row)
276
+ return result
277
+
278
+ async def get_all_video_by_mix_id(self, mix_id: str):
279
+ result = list()
280
+
281
+ cursor = 0
282
+ count = 18
283
+ for i in range(1000):
284
+ rows = await self.get_video_list_by_mix_id(mix_id=mix_id, cursor=cursor, count=count)
285
+ if len(rows) == 0:
286
+ break
287
+ cursor += count
288
+
289
+ for row in rows:
290
+ create_time = row["create_time"]
291
+ aweme_id = row["aweme_id"]
292
+ create_time_str = row["create_time_str"]
293
+ title = row["title"]
294
+ desc = row["desc"]
295
+ url_list = row["url_list"]
296
+ tags = row["tags"]
297
+
298
+ task = {
299
+ "aweme_id": aweme_id,
300
+ "create_time_str": create_time_str,
301
+ "title": title,
302
+ "desc": desc,
303
+ "url_list": url_list,
304
+ "tags": tags,
305
+ }
306
+ result.append(task)
307
+ return result
308
+
309
 
310
  def get_args():
311
  parser = argparse.ArgumentParser()
 
344
  return
345
 
346
 
347
+ async def main2():
348
+ import random
349
+ import re
350
+ import log
351
+ from project_settings import project_path, log_directory
352
+
353
+ log.setup_size_rotating(log_directory=log_directory)
354
+
355
+ args = get_args()
356
+
357
+ output_video_dir = Path("output_video_dir")
358
+ output_video_dir.mkdir(parents=True, exist_ok=True)
359
+ read_me_file = output_video_dir / "README.txt"
360
+
361
+ client = VideoDownload()
362
+
363
+ flag = client.check_login()
364
+ print(f"flag: {flag}")
365
+ client.login_with_credentials_file(args.credentials_file)
366
+ # client.login_with_qrcode_url()
367
+ flag = client.check_login()
368
+ print(f"flag: {flag}")
369
+
370
+ js = await client.get_all_video_by_mix_id(
371
+ mix_id="7337883410737661989",
372
+ )
373
+ print(f"count: {len(js)}")
374
+
375
+ with open(read_me_file.as_posix(), "w", encoding="utf-8") as f:
376
+ for row in js:
377
+ aweme_id = row["aweme_id"]
378
+ create_time_str = row["create_time_str"]
379
+ title = row["title"]
380
+ desc = row["desc"]
381
+ url_list = row["url_list"]
382
+ tags = row["tags"]
383
+ video_url = random.sample(url_list, k=1)[0]
384
+
385
+ title_ = re.sub(r'[\\/:*?"<>|]', '_', title)
386
+ title_ = title_[:50]
387
+ filename = output_video_dir / f"{aweme_id}_{create_time_str}_{title_}.mp4"
388
+ filename.parent.mkdir(parents=True, exist_ok=True)
389
+ await client.download_video_by_url(filename, video_url)
390
+
391
+ content = f"{title}\n{desc}\n{tags}"
392
+ f.write(f"{content}\n\n\n")
393
+ f.flush()
394
+ return
395
+
396
+
397
  if __name__ == "__main__":
398
+ asyncio.run(main2())
toolbox/douyin_selenium/tasks/bless_bag_collector.py CHANGED
@@ -98,7 +98,7 @@ class BlessBagCollector(object):
98
  return element
99
 
100
  def get_bless_bag_card_button_element(self):
101
- xpath = '//div[@id="lottery_close_cotainer"]//div[contains(text(), "一键发评论参与福袋") or contains(text(), "分享直播间") or contains(text(), "加入粉丝团(1钻)") or contains(text(), "已参与") or contains(text(), "知道了")]'
102
  return self.get_element_by_xpath(xpath)
103
 
104
  def get_bless_bag_card_content_element(self):
@@ -256,6 +256,8 @@ class BlessBagCollector(object):
256
  elif bless_bag_card_content == "加入粉丝团(1钻)":
257
  self.sleep_by_bless_bag_countdown()
258
  # self.click_element(bless_bag_card_button_element)
 
 
259
  elif bless_bag_card_content == "已参与":
260
  self.try_to_close_bless_bag_card()
261
  self.sleep_by_bless_bag_countdown()
 
98
  return element
99
 
100
  def get_bless_bag_card_button_element(self):
101
+ xpath = '//div[@id="lottery_close_cotainer"]//div[contains(text(), "一键发评论参与福袋") or contains(text(), "分享直播间") or contains(text(), "加入粉丝团(1钻)") or contains(text(), "已参与") or contains(text(), "知道了") or contains(text(), "提升亲密度")]'
102
  return self.get_element_by_xpath(xpath)
103
 
104
  def get_bless_bag_card_content_element(self):
 
256
  elif bless_bag_card_content == "加入粉丝团(1钻)":
257
  self.sleep_by_bless_bag_countdown()
258
  # self.click_element(bless_bag_card_button_element)
259
+ elif bless_bag_card_content == "提升亲密度":
260
+ self.sleep_by_bless_bag_countdown()
261
  elif bless_bag_card_content == "已参与":
262
  self.try_to_close_bless_bag_card()
263
  self.sleep_by_bless_bag_countdown()