Spaces:
Paused
Paused
update
Browse files
data/porter_tasks/porter_task_chenjieshen_douyin_video_to_bilibili.json
CHANGED
|
@@ -6,7 +6,7 @@
|
|
| 6 |
"sec_user_id": "MS4wLjABAAAATGoBrO7yiJ3q9go4fxq9JXjrnP1bFpdkgKckC1IpfXA_vrjSmL9ZtjmTju8ApwbT",
|
| 7 |
"check_interval": 900,
|
| 8 |
"key_of_credentials": "douyin_wentao_credentials",
|
| 9 |
-
"
|
| 10 |
"output_video_dir": "data/tasks/chenjieshen_douyin_video_to_bilibili/video/douyin/陈杰森",
|
| 11 |
"output_video_info_file": "data/tasks/chenjieshen_douyin_video_to_bilibili/video/douyin/陈杰森/file_info.json"
|
| 12 |
},
|
|
@@ -17,7 +17,7 @@
|
|
| 17 |
"sec_user_id": "MS4wLjABAAAA49QFP6YhorLIIX9M-FiZeKxmqhqXlttluSsZeaxvxzU",
|
| 18 |
"check_interval": 900,
|
| 19 |
"key_of_credentials": "douyin_wentao_credentials",
|
| 20 |
-
"
|
| 21 |
"output_video_dir": "data/tasks/chenjieshen_douyin_video_to_bilibili/video/douyin/陈杰森",
|
| 22 |
"output_video_info_file": "data/tasks/chenjieshen_douyin_video_to_bilibili/video/douyin/陈杰森/file_info.json"
|
| 23 |
},
|
|
@@ -31,6 +31,6 @@
|
|
| 31 |
"target_user_id": "442286660",
|
| 32 |
"key_of_credentials": "bilibili_chenjiesen_credentials",
|
| 33 |
"remove_after_upload": true,
|
| 34 |
-
"
|
| 35 |
}
|
| 36 |
]
|
|
|
|
| 6 |
"sec_user_id": "MS4wLjABAAAATGoBrO7yiJ3q9go4fxq9JXjrnP1bFpdkgKckC1IpfXA_vrjSmL9ZtjmTju8ApwbT",
|
| 7 |
"check_interval": 900,
|
| 8 |
"key_of_credentials": "douyin_wentao_credentials",
|
| 9 |
+
"min_date": "2025-10-02 00:00:00",
|
| 10 |
"output_video_dir": "data/tasks/chenjieshen_douyin_video_to_bilibili/video/douyin/陈杰森",
|
| 11 |
"output_video_info_file": "data/tasks/chenjieshen_douyin_video_to_bilibili/video/douyin/陈杰森/file_info.json"
|
| 12 |
},
|
|
|
|
| 17 |
"sec_user_id": "MS4wLjABAAAA49QFP6YhorLIIX9M-FiZeKxmqhqXlttluSsZeaxvxzU",
|
| 18 |
"check_interval": 900,
|
| 19 |
"key_of_credentials": "douyin_wentao_credentials",
|
| 20 |
+
"min_date": "2025-10-02 00:00:00",
|
| 21 |
"output_video_dir": "data/tasks/chenjieshen_douyin_video_to_bilibili/video/douyin/陈杰森",
|
| 22 |
"output_video_info_file": "data/tasks/chenjieshen_douyin_video_to_bilibili/video/douyin/陈杰森/file_info.json"
|
| 23 |
},
|
|
|
|
| 31 |
"target_user_id": "442286660",
|
| 32 |
"key_of_credentials": "bilibili_chenjiesen_credentials",
|
| 33 |
"remove_after_upload": true,
|
| 34 |
+
"min_date": "2025-10-02 00:00:00"
|
| 35 |
}
|
| 36 |
]
|
data/porter_tasks/porter_task_chenjieshen_douyin_video_to_youtube.json
CHANGED
|
@@ -6,7 +6,7 @@
|
|
| 6 |
"sec_user_id": "MS4wLjABAAAATGoBrO7yiJ3q9go4fxq9JXjrnP1bFpdkgKckC1IpfXA_vrjSmL9ZtjmTju8ApwbT",
|
| 7 |
"check_interval": 900,
|
| 8 |
"key_of_credentials": "douyin_wentao_credentials",
|
| 9 |
-
"
|
| 10 |
"output_video_dir": "data/tasks/chenjieshen_douyin_video_to_youtube/video/douyin/陈杰森",
|
| 11 |
"output_video_info_file": "data/tasks/chenjieshen_douyin_video_to_youtube/video/douyin/陈杰森/file_info.json"
|
| 12 |
},
|
|
@@ -17,7 +17,7 @@
|
|
| 17 |
"sec_user_id": "MS4wLjABAAAA49QFP6YhorLIIX9M-FiZeKxmqhqXlttluSsZeaxvxzU",
|
| 18 |
"check_interval": 900,
|
| 19 |
"key_of_credentials": "douyin_wentao_credentials",
|
| 20 |
-
"
|
| 21 |
"output_video_dir": "data/tasks/chenjieshen_douyin_video_to_youtube/video/douyin/陈杰森",
|
| 22 |
"output_video_info_file": "data/tasks/chenjieshen_douyin_video_to_youtube/video/douyin/陈杰森/file_info.json"
|
| 23 |
},
|
|
@@ -34,6 +34,6 @@
|
|
| 34 |
"remove_after_upload_delay": 1,
|
| 35 |
"playlist_title": "Short",
|
| 36 |
"playlist_id": "PL1KtQ49rVMElugHudIdyKLAmgmMVdqoxQ",
|
| 37 |
-
"
|
| 38 |
}
|
| 39 |
]
|
|
|
|
| 6 |
"sec_user_id": "MS4wLjABAAAATGoBrO7yiJ3q9go4fxq9JXjrnP1bFpdkgKckC1IpfXA_vrjSmL9ZtjmTju8ApwbT",
|
| 7 |
"check_interval": 900,
|
| 8 |
"key_of_credentials": "douyin_wentao_credentials",
|
| 9 |
+
"min_date": "2025-10-02 00:00:00",
|
| 10 |
"output_video_dir": "data/tasks/chenjieshen_douyin_video_to_youtube/video/douyin/陈杰森",
|
| 11 |
"output_video_info_file": "data/tasks/chenjieshen_douyin_video_to_youtube/video/douyin/陈杰森/file_info.json"
|
| 12 |
},
|
|
|
|
| 17 |
"sec_user_id": "MS4wLjABAAAA49QFP6YhorLIIX9M-FiZeKxmqhqXlttluSsZeaxvxzU",
|
| 18 |
"check_interval": 900,
|
| 19 |
"key_of_credentials": "douyin_wentao_credentials",
|
| 20 |
+
"min_date": "2025-10-02 00:00:00",
|
| 21 |
"output_video_dir": "data/tasks/chenjieshen_douyin_video_to_youtube/video/douyin/陈杰森",
|
| 22 |
"output_video_info_file": "data/tasks/chenjieshen_douyin_video_to_youtube/video/douyin/陈杰森/file_info.json"
|
| 23 |
},
|
|
|
|
| 34 |
"remove_after_upload_delay": 1,
|
| 35 |
"playlist_title": "Short",
|
| 36 |
"playlist_id": "PL1KtQ49rVMElugHudIdyKLAmgmMVdqoxQ",
|
| 37 |
+
"min_date": "2025-10-02 00:00:00"
|
| 38 |
}
|
| 39 |
]
|
toolbox/douyin/video/download.py
CHANGED
|
@@ -8,6 +8,7 @@ import logging
|
|
| 8 |
from pathlib import Path
|
| 9 |
from zoneinfo import ZoneInfo
|
| 10 |
|
|
|
|
| 11 |
from tenacity import before_sleep_log, retry, retry_if_exception_type, stop_after_attempt, wait_fixed
|
| 12 |
|
| 13 |
from project_settings import project_path, time_zone_info
|
|
@@ -145,7 +146,7 @@ class VideoDownload(DouyinClient):
|
|
| 145 |
|
| 146 |
stop_flag = False
|
| 147 |
max_cursor = 0
|
| 148 |
-
for i in range(
|
| 149 |
if stop_flag:
|
| 150 |
break
|
| 151 |
rows = await self.get_video_list_by_user_id(sec_user_id=sec_user_id, max_cursor=max_cursor, count=18)
|
|
@@ -184,6 +185,127 @@ class VideoDownload(DouyinClient):
|
|
| 184 |
result.append(task)
|
| 185 |
return result
|
| 186 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 187 |
|
| 188 |
def get_args():
|
| 189 |
parser = argparse.ArgumentParser()
|
|
@@ -222,5 +344,55 @@ async def main():
|
|
| 222 |
return
|
| 223 |
|
| 224 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 225 |
if __name__ == "__main__":
|
| 226 |
-
asyncio.run(
|
|
|
|
| 8 |
from pathlib import Path
|
| 9 |
from zoneinfo import ZoneInfo
|
| 10 |
|
| 11 |
+
from gradio.cli.commands.deploy_space import readme_file
|
| 12 |
from tenacity import before_sleep_log, retry, retry_if_exception_type, stop_after_attempt, wait_fixed
|
| 13 |
|
| 14 |
from project_settings import project_path, time_zone_info
|
|
|
|
| 146 |
|
| 147 |
stop_flag = False
|
| 148 |
max_cursor = 0
|
| 149 |
+
for i in range(1000):
|
| 150 |
if stop_flag:
|
| 151 |
break
|
| 152 |
rows = await self.get_video_list_by_user_id(sec_user_id=sec_user_id, max_cursor=max_cursor, count=18)
|
|
|
|
| 185 |
result.append(task)
|
| 186 |
return result
|
| 187 |
|
| 188 |
+
async def get_video_list_by_mix_id(self, mix_id: str, cursor: int = 0, count: int = 18):
|
| 189 |
+
url = "https://www.douyin.com/aweme/v1/web/mix/aweme/"
|
| 190 |
+
|
| 191 |
+
params = {
|
| 192 |
+
"device_platform": "webapp",
|
| 193 |
+
"aid": "6383",
|
| 194 |
+
"channel": "channel_pc_web",
|
| 195 |
+
"cursor": cursor,
|
| 196 |
+
"count": count,
|
| 197 |
+
"publish_video_strategy_type": "2",
|
| 198 |
+
"version_code": "290100",
|
| 199 |
+
"version_name": "29.1.0",
|
| 200 |
+
|
| 201 |
+
"mix_id": mix_id,
|
| 202 |
+
}
|
| 203 |
+
|
| 204 |
+
response = await self.async_session.request(
|
| 205 |
+
method="GET",
|
| 206 |
+
url=url,
|
| 207 |
+
headers={
|
| 208 |
+
**self.headers,
|
| 209 |
+
"referer": "https://www.douyin.com/",
|
| 210 |
+
},
|
| 211 |
+
params=params,
|
| 212 |
+
)
|
| 213 |
+
if response.status_code == 444:
|
| 214 |
+
# Access Denied
|
| 215 |
+
raise ExpectedError(status_code=60444, message=f"request failed, status_code: {response.status_code}, text: {response.text}")
|
| 216 |
+
elif response.status_code == 200 and len(response.text) == 0:
|
| 217 |
+
# Maybe Access Denied
|
| 218 |
+
raise ExpectedError(status_code=60444, message=f"request failed, status_code: {response.status_code}, text: {response.text}")
|
| 219 |
+
elif response.status_code != 200:
|
| 220 |
+
raise AssertionError(f"request failed, status_code: {response.status_code}, text: {response.text}")
|
| 221 |
+
elif response.text == "blocked":
|
| 222 |
+
raise AssertionError(f"request failed, status_code: {response.status_code}, text: {response.text}")
|
| 223 |
+
js = response.json()
|
| 224 |
+
aweme_list = js["aweme_list"]
|
| 225 |
+
if aweme_list is None:
|
| 226 |
+
return list()
|
| 227 |
+
|
| 228 |
+
result = list()
|
| 229 |
+
for aweme in aweme_list:
|
| 230 |
+
# aweme_ = json.dumps(aweme, ensure_ascii=False, indent=4)
|
| 231 |
+
# print(aweme_)
|
| 232 |
+
|
| 233 |
+
aweme_id = aweme["aweme_id"]
|
| 234 |
+
desc = aweme["desc"]
|
| 235 |
+
create_time = aweme["create_time"]
|
| 236 |
+
create_time_ = datetime.fromtimestamp(
|
| 237 |
+
create_time,
|
| 238 |
+
tz=ZoneInfo(time_zone_info)
|
| 239 |
+
)
|
| 240 |
+
create_time_str = create_time_.strftime("%Y%m%dT%H%M%S")
|
| 241 |
+
|
| 242 |
+
# video
|
| 243 |
+
video = aweme["video"]
|
| 244 |
+
url_list = video["play_addr"]["url_list"]
|
| 245 |
+
|
| 246 |
+
# tags
|
| 247 |
+
text_extra = aweme["text_extra"]
|
| 248 |
+
tags = set()
|
| 249 |
+
for t in text_extra:
|
| 250 |
+
tag = t.get("hashtag_name")
|
| 251 |
+
if tag is None:
|
| 252 |
+
tag = t.get("search_text")
|
| 253 |
+
if tag is None:
|
| 254 |
+
# print(t)
|
| 255 |
+
continue
|
| 256 |
+
tags.add(tag)
|
| 257 |
+
tags = list(tags)
|
| 258 |
+
|
| 259 |
+
# title
|
| 260 |
+
title: str = desc
|
| 261 |
+
for tag in tags:
|
| 262 |
+
title = title.replace(f"#{tag}", "")
|
| 263 |
+
# title = title.replace(f"# {tag}", "")
|
| 264 |
+
title = title.strip()
|
| 265 |
+
|
| 266 |
+
row = {
|
| 267 |
+
"aweme_id": aweme_id,
|
| 268 |
+
"create_time": create_time,
|
| 269 |
+
"create_time_str": create_time_str,
|
| 270 |
+
"title": title,
|
| 271 |
+
"desc": desc,
|
| 272 |
+
"url_list": url_list,
|
| 273 |
+
"tags": tags,
|
| 274 |
+
}
|
| 275 |
+
result.append(row)
|
| 276 |
+
return result
|
| 277 |
+
|
| 278 |
+
async def get_all_video_by_mix_id(self, mix_id: str):
|
| 279 |
+
result = list()
|
| 280 |
+
|
| 281 |
+
cursor = 0
|
| 282 |
+
count = 18
|
| 283 |
+
for i in range(1000):
|
| 284 |
+
rows = await self.get_video_list_by_mix_id(mix_id=mix_id, cursor=cursor, count=count)
|
| 285 |
+
if len(rows) == 0:
|
| 286 |
+
break
|
| 287 |
+
cursor += count
|
| 288 |
+
|
| 289 |
+
for row in rows:
|
| 290 |
+
create_time = row["create_time"]
|
| 291 |
+
aweme_id = row["aweme_id"]
|
| 292 |
+
create_time_str = row["create_time_str"]
|
| 293 |
+
title = row["title"]
|
| 294 |
+
desc = row["desc"]
|
| 295 |
+
url_list = row["url_list"]
|
| 296 |
+
tags = row["tags"]
|
| 297 |
+
|
| 298 |
+
task = {
|
| 299 |
+
"aweme_id": aweme_id,
|
| 300 |
+
"create_time_str": create_time_str,
|
| 301 |
+
"title": title,
|
| 302 |
+
"desc": desc,
|
| 303 |
+
"url_list": url_list,
|
| 304 |
+
"tags": tags,
|
| 305 |
+
}
|
| 306 |
+
result.append(task)
|
| 307 |
+
return result
|
| 308 |
+
|
| 309 |
|
| 310 |
def get_args():
|
| 311 |
parser = argparse.ArgumentParser()
|
|
|
|
| 344 |
return
|
| 345 |
|
| 346 |
|
| 347 |
+
async def main2():
|
| 348 |
+
import random
|
| 349 |
+
import re
|
| 350 |
+
import log
|
| 351 |
+
from project_settings import project_path, log_directory
|
| 352 |
+
|
| 353 |
+
log.setup_size_rotating(log_directory=log_directory)
|
| 354 |
+
|
| 355 |
+
args = get_args()
|
| 356 |
+
|
| 357 |
+
output_video_dir = Path("output_video_dir")
|
| 358 |
+
output_video_dir.mkdir(parents=True, exist_ok=True)
|
| 359 |
+
read_me_file = output_video_dir / "README.txt"
|
| 360 |
+
|
| 361 |
+
client = VideoDownload()
|
| 362 |
+
|
| 363 |
+
flag = client.check_login()
|
| 364 |
+
print(f"flag: {flag}")
|
| 365 |
+
client.login_with_credentials_file(args.credentials_file)
|
| 366 |
+
# client.login_with_qrcode_url()
|
| 367 |
+
flag = client.check_login()
|
| 368 |
+
print(f"flag: {flag}")
|
| 369 |
+
|
| 370 |
+
js = await client.get_all_video_by_mix_id(
|
| 371 |
+
mix_id="7337883410737661989",
|
| 372 |
+
)
|
| 373 |
+
print(f"count: {len(js)}")
|
| 374 |
+
|
| 375 |
+
with open(read_me_file.as_posix(), "w", encoding="utf-8") as f:
|
| 376 |
+
for row in js:
|
| 377 |
+
aweme_id = row["aweme_id"]
|
| 378 |
+
create_time_str = row["create_time_str"]
|
| 379 |
+
title = row["title"]
|
| 380 |
+
desc = row["desc"]
|
| 381 |
+
url_list = row["url_list"]
|
| 382 |
+
tags = row["tags"]
|
| 383 |
+
video_url = random.sample(url_list, k=1)[0]
|
| 384 |
+
|
| 385 |
+
title_ = re.sub(r'[\\/:*?"<>|]', '_', title)
|
| 386 |
+
title_ = title_[:50]
|
| 387 |
+
filename = output_video_dir / f"{aweme_id}_{create_time_str}_{title_}.mp4"
|
| 388 |
+
filename.parent.mkdir(parents=True, exist_ok=True)
|
| 389 |
+
await client.download_video_by_url(filename, video_url)
|
| 390 |
+
|
| 391 |
+
content = f"{title}\n{desc}\n{tags}"
|
| 392 |
+
f.write(f"{content}\n\n\n")
|
| 393 |
+
f.flush()
|
| 394 |
+
return
|
| 395 |
+
|
| 396 |
+
|
| 397 |
if __name__ == "__main__":
|
| 398 |
+
asyncio.run(main2())
|
toolbox/douyin_selenium/tasks/bless_bag_collector.py
CHANGED
|
@@ -98,7 +98,7 @@ class BlessBagCollector(object):
|
|
| 98 |
return element
|
| 99 |
|
| 100 |
def get_bless_bag_card_button_element(self):
|
| 101 |
-
xpath = '//div[@id="lottery_close_cotainer"]//div[contains(text(), "一键发评论参与福袋") or contains(text(), "分享直播间") or contains(text(), "加入粉丝团(1钻)") or contains(text(), "已参与") or contains(text(), "知道了")]'
|
| 102 |
return self.get_element_by_xpath(xpath)
|
| 103 |
|
| 104 |
def get_bless_bag_card_content_element(self):
|
|
@@ -256,6 +256,8 @@ class BlessBagCollector(object):
|
|
| 256 |
elif bless_bag_card_content == "加入粉丝团(1钻)":
|
| 257 |
self.sleep_by_bless_bag_countdown()
|
| 258 |
# self.click_element(bless_bag_card_button_element)
|
|
|
|
|
|
|
| 259 |
elif bless_bag_card_content == "已参与":
|
| 260 |
self.try_to_close_bless_bag_card()
|
| 261 |
self.sleep_by_bless_bag_countdown()
|
|
|
|
| 98 |
return element
|
| 99 |
|
| 100 |
def get_bless_bag_card_button_element(self):
|
| 101 |
+
xpath = '//div[@id="lottery_close_cotainer"]//div[contains(text(), "一键发评论参与福袋") or contains(text(), "分享直播间") or contains(text(), "加入粉丝团(1钻)") or contains(text(), "已参与") or contains(text(), "知道了") or contains(text(), "提升亲密度")]'
|
| 102 |
return self.get_element_by_xpath(xpath)
|
| 103 |
|
| 104 |
def get_bless_bag_card_content_element(self):
|
|
|
|
| 256 |
elif bless_bag_card_content == "加入粉丝团(1钻)":
|
| 257 |
self.sleep_by_bless_bag_countdown()
|
| 258 |
# self.click_element(bless_bag_card_button_element)
|
| 259 |
+
elif bless_bag_card_content == "提升亲密度":
|
| 260 |
+
self.sleep_by_bless_bag_countdown()
|
| 261 |
elif bless_bag_card_content == "已参与":
|
| 262 |
self.try_to_close_bless_bag_card()
|
| 263 |
self.sleep_by_bless_bag_countdown()
|