Spaces:

qgyd2021
/

video_platform

Paused

App Files Files Community

HoneyTian commited on Dec 25, 2025

Commit

eeaf31b

1 Parent(s): 6071696

update

Browse files

Files changed (16) hide show

data/porter_tasks/porter_task_chenjieshen_douyin_live_record_to_baidu_netdisk.json +188 -0
requirements.txt +1 -0
tabs/video_upload_tasks.py +35 -0
toolbox/baidu_netdisk/__init__.py +6 -0
toolbox/baidu_netdisk/baidu_netdisk_client.py +152 -0
toolbox/baidu_netdisk/upload/__init__.py +6 -0
toolbox/baidu_netdisk/upload/upload.py +79 -0
toolbox/porter/tasks/__init__.py +2 -0
toolbox/porter/tasks/file_to_baidu_netdisk_task.py +91 -0
toolbox/porter/tasks/video_format_convert_task.py +135 -0
toolbox/porter/tasks/youtube_video_download_task.py +6 -0
toolbox/youtube_spider/__init__.py +6 -0
toolbox/youtube_spider/video/__init__.py +6 -0
toolbox/youtube_spider/video/video_download.py +82 -0
toolbox/youtube_spider/video/video_list.py +196 -0
toolbox/youtube_spider/youtube_spider_client.py +47 -0

data/porter_tasks/porter_task_chenjieshen_douyin_live_record_to_baidu_netdisk.json ADDED Viewed

	@@ -0,0 +1,188 @@

+[
+  {
+    "enable": false,
+    "type": "douyin_live_record",
+    "room_name": "陈杰森资本NewBoombap",
+    "room_id": "57544215290",
+    "sec_user_id": "MS4wLjABAAAATGoBrO7yiJ3q9go4fxq9JXjrnP1bFpdkgKckC1IpfXA_vrjSmL9ZtjmTju8ApwbT",
+    "key_of_credentials": "douyin_wentao_credentials",
+    "check_interval": 10,
+    "output_video_dir": "data/tasks/chenjieshen_douyin_live_record_to_baidu_netdisk/live_record/陈杰森",
+    "output_video_info_file": "data/tasks/chenjieshen_douyin_live_record_to_baidu_netdisk/live_record/陈杰森/file_info.json"
+  },
+  {
+    "enable": false,
+    "type": "douyin_live_record",
+    "room_name": "老陈的退路",
+    "room_id": "330025930592",
+    "sec_user_id": "MS4wLjABAAAAzjLy7Dd3Q-UiMGdkwymLhDIL1aFJtruvCUqrwmph9n8Zb55E4y9XH2WEFaLC1ZBl",
+    "key_of_credentials": "douyin_wentao_credentials",
+    "check_interval": 10,
+    "output_video_dir": "data/tasks/chenjieshen_douyin_live_record_to_baidu_netdisk/live_record/陈杰森",
+    "output_video_info_file": "data/tasks/chenjieshen_douyin_live_record_to_baidu_netdisk/live_record/陈杰森/file_info.json"
+  },
+  {
+    "enable": false,
+    "type": "douyin_live_record",
+    "room_name": "老陈come_back",
+    "room_id": "78835697536",
+    "sec_user_id": "MS4wLjABAAAAj3qTpPkes0t5az2nXd1oaag5TpOJeNh27pemT17WUVXHGUZKZfjqc3fFc8arL6V1",
+    "key_of_credentials": "douyin_wentao_credentials",
+    "check_interval": 10,
+    "output_video_dir": "data/tasks/chenjieshen_douyin_live_record_to_baidu_netdisk/live_record/陈杰森",
+    "output_video_info_file": "data/tasks/chenjieshen_douyin_live_record_to_baidu_netdisk/live_record/陈杰森/file_info.json"
+  },
+  {
+    "enable": false,
+    "type": "douyin_live_record",
+    "room_name": "清源第一帅",
+    "room_id": "654177813521",
+    "sec_user_id": "MS4wLjABAAAAtwkRNs5Krtm5cBhKzn60poflAFWdnYdwMf0I0M27rOGvNn2l16DfUH5vE3WL-ypz",
+    "key_of_credentials": "douyin_wentao_credentials",
+    "check_interval": 10,
+    "output_video_dir": "data/tasks/chenjieshen_douyin_live_record_to_baidu_netdisk/live_record/陈杰森",
+    "output_video_info_file": "data/tasks/chenjieshen_douyin_live_record_to_baidu_netdisk/live_record/陈杰森/file_info.json"
+  },
+  {
+    "enable": false,
+    "type": "douyin_live_record",
+    "room_name": "清源第二帅",
+    "room_id": "831568261978",
+    "sec_user_id": "MS4wLjABAAAALbRezHNzlaRYhk9QE-5ajImgcvfJGq9koAlD0O8VQwiC-N8Bqx2WDW07wLee2Bwd",
+    "key_of_credentials": "douyin_wentao_credentials",
+    "check_interval": 10,
+    "output_video_dir": "data/tasks/chenjieshen_douyin_live_record_to_baidu_netdisk/live_record/陈杰森",
+    "output_video_info_file": "data/tasks/chenjieshen_douyin_live_record_to_baidu_netdisk/live_record/陈杰森/file_info.json"
+  },
+  {
+    "enable": false,
+    "type": "douyin_live_record",
+    "room_name": "清源第三帅",
+    "room_id": "26569028431",
+    "sec_user_id": "MS4wLjABAAAA60pV9JuZNTZhQJeSI_AeaRUnKXs--TGPZEAtsgihwKnir46ncFVtLSO79JDnclkM",
+    "key_of_credentials": "douyin_wentao_credentials",
+    "check_interval": 10,
+    "output_video_dir": "data/tasks/chenjieshen_douyin_live_record_to_baidu_netdisk/live_record/陈杰森",
+    "output_video_info_file": "data/tasks/chenjieshen_douyin_live_record_to_baidu_netdisk/live_record/陈杰森/file_info.json"
+  },
+  {
+    "enable": false,
+    "type": "douyin_live_record",
+    "room_name": "清源第四帅",
+    "room_id": "91434255754",
+    "sec_user_id": "MS4wLjABAAAAakYu9rpF6vk50XbiKFnxzuXVv6fhVK5O3tr0gxjTJq2Nw5HfMplPv6ce6AhXxReG",
+    "key_of_credentials": "douyin_wentao_credentials",
+    "check_interval": 10,
+    "output_video_dir": "data/tasks/chenjieshen_douyin_live_record_to_baidu_netdisk/live_record/陈杰森",
+    "output_video_info_file": "data/tasks/chenjieshen_douyin_live_record_to_baidu_netdisk/live_record/陈杰森/file_info.json"
+  },
+  {
+    "enable": false,
+    "type": "douyin_live_record",
+    "room_name": "清源第五帅",
+    "room_id": "469245033355",
+    "sec_user_id": "MS4wLjABAAAA62G-uDXdlIPnk44P9gXhQQo1UmOCgZ08QWOP_-t4PjI",
+    "key_of_credentials": "douyin_wentao_credentials",
+    "check_interval": 10,
+    "output_video_dir": "data/tasks/chenjieshen_douyin_live_record_to_baidu_netdisk/live_record/陈杰森",
+    "output_video_info_file": "data/tasks/chenjieshen_douyin_live_record_to_baidu_netdisk/live_record/陈杰森/file_info.json"
+  },
+  {
+    "enable": false,
+    "type": "douyin_live_record",
+    "room_name": "清源人工智能研究院",
+    "room_id": "81728900292",
+    "sec_user_id": "MS4wLjABAAAAe_Hf6esmopqIMG7OhsMlNESTiqP1ot7lmGzK6Cu3pJSSGTOiHlI4cA33OPWg56Ql",
+    "key_of_credentials": "douyin_wentao_credentials",
+    "check_interval": 10,
+    "output_video_dir": "data/tasks/chenjieshen_douyin_live_record_to_baidu_netdisk/live_record/陈杰森",
+    "output_video_info_file": "data/tasks/chenjieshen_douyin_live_record_to_baidu_netdisk/live_record/陈杰森/file_info.json"
+  },
+  {
+    "enable": false,
+    "type": "douyin_live_record",
+    "room_name": "廖勇-清源政策研究院AIGC",
+    "room_id": "gubenqingyuan",
+    "sec_user_id": "MS4wLjABAAAA9ne6zoCLjnzpCRR1GhPVBUq2_7wIJO4GyZXlyW016yw",
+    "key_of_credentials": "douyin_wentao_credentials",
+    "check_interval": 10,
+    "output_video_dir": "data/tasks/chenjieshen_douyin_live_record_to_baidu_netdisk/live_record/陈杰森",
+    "output_video_info_file": "data/tasks/chenjieshen_douyin_live_record_to_baidu_netdisk/live_record/陈杰森/file_info.json"
+  },
+  {
+    "enable": false,
+    "type": "douyin_live_record",
+    "room_name": "小熊Bella与老爸",
+    "room_id": "139751520143",
+    "sec_user_id": "MS4wLjABAAAA49QFP6YhorLIIX9M-FiZeKxmqhqXlttluSsZeaxvxzU",
+    "key_of_credentials": "douyin_wentao_credentials",
+    "check_interval": 10,
+    "output_video_dir": "data/tasks/chenjieshen_douyin_live_record_to_baidu_netdisk/live_record/陈杰森",
+    "output_video_info_file": "data/tasks/chenjieshen_douyin_live_record_to_baidu_netdisk/live_record/陈杰森/file_info.json"
+  },
+  {
+    "enable": false,
+    "type": "douyin_live_record",
+    "room_name": "清源之虎",
+    "room_id": "998621457719",
+    "sec_user_id": "MS4wLjABAAAAWYFqu46IMCnfwgSjBK35RnQcKOYhqVX3YcP4rotpHkdxJn5JD59REMz5fwlccp2h",
+    "key_of_credentials": "douyin_wentao_credentials",
+    "check_interval": 10,
+    "output_video_dir": "data/tasks/chenjieshen_douyin_live_record_to_baidu_netdisk/live_record/陈杰森",
+    "output_video_info_file": "data/tasks/chenjieshen_douyin_live_record_to_baidu_netdisk/live_record/陈杰森/file_info.json"
+  },
+  {
+    "enable": false,
+    "type": "douyin_live_record",
+    "room_name": "老陈真是好人",
+    "room_id": "599130203190",
+    "sec_user_id": "MS4wLjABAAAA3ReegyjpprzeurqHUl8yLBkTlQ7L4TfoyjpDXeiy1RvCu7OmqWljOLmWgibTjs8y",
+    "key_of_credentials": "douyin_wentao_credentials",
+    "check_interval": 10,
+    "output_video_dir": "data/tasks/chenjieshen_douyin_live_record_to_baidu_netdisk/live_record/陈杰森",
+    "output_video_info_file": "data/tasks/chenjieshen_douyin_live_record_to_baidu_netdisk/live_record/陈杰森/file_info.json"
+  },
+  {
+    "enable": false,
+    "type": "douyin_live_record",
+    "room_name": "老陈小帮手",
+    "room_id": "738682070097",
+    "sec_user_id": "MS4wLjABAAAAtzZsG1mKtZvgE2j58ZB7FDRP1gzsHXuQDIN15bltkLM",
+    "key_of_credentials": "douyin_wentao_credentials",
+    "check_interval": 10,
+    "output_video_dir": "data/tasks/chenjieshen_douyin_live_record_to_baidu_netdisk/live_record/陈杰森",
+    "output_video_info_file": "data/tasks/chenjieshen_douyin_live_record_to_baidu_netdisk/live_record/陈杰森/file_info.json"
+  },
+  {
+    "enable": false,
+    "type": "douyin_live_record",
+    "room_name": "焦虑个锤子",
+    "room_id": "78933304321",
+    "sec_user_id": "MS4wLjABAAAAfr5os1dB5zdm36GK5nHVQkWReaELMueuYqliVDBo64Y",
+    "key_of_credentials": "douyin_wentao_credentials",
+    "check_interval": 10,
+    "output_video_dir": "data/tasks/chenjieshen_douyin_live_record_to_baidu_netdisk/live_record/陈杰森",
+    "output_video_info_file": "data/tasks/chenjieshen_douyin_live_record_to_baidu_netdisk/live_record/陈杰森/file_info.json"
+  },
+  {
+    "enable": false,
+    "type": "video_format_convert",
+    "task_name": "陈杰森-百度云盘-格式转换",
+    "video_info_file": "data/tasks/chenjieshen_douyin_live_record_to_baidu_netdisk/live_record/陈杰森/file_info.json",
+    "target_dir": "data/tasks/chenjieshen_douyin_live_record_to_baidu_netdisk/live_record/陈杰森/to_baidu_netdisk",
+    "check_interval": 10,
+    "remove_after_upload": true,
+    "format_pairs": [["flv", "mp4"]]
+  },
+  {
+    "enable": true,
+    "type": "file_to_baidu_netdisk",
+    "task_name": "陈杰森",
+    "src_dir": "data/tasks/chenjieshen_douyin_live_record_to_baidu_netdisk/live_record/陈杰森/to_baidu_netdisk",
+    "tgt_dir": "xianyu/customers/{task_name}/{date_str}",
+    "check_interval": 10,
+    "key_of_credentials": "baidu_netdisk_honeytian_credentials",
+    "remove_after_upload": true,
+    "exclude_files": "file_info.json"
+  }
+]

requirements.txt CHANGED Viewed

@@ -21,3 +21,4 @@ tenacity
 selenium
 webdriver-manager
 chinesecalendar

 selenium
 webdriver-manager
 chinesecalendar
+bypy

tabs/video_upload_tasks.py ADDED Viewed

	@@ -0,0 +1,35 @@

+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+import gradio as gr
+from project_settings import project_path
+def get_video_upload_tasks_tab():
+    with gr.TabItem("video_upload_tasks"):
+        with gr.Row():
+            tasks_src = gr.Textbox(label="tasks_src", max_lines=10)
+            tasks_platform = gr.Dropdown(choices=["douyin"], value="douyin", label="platform")
+            tasks_target_dir = gr.Dropdown(choices=["douyin"], value="douyin", label="target_dir")
+            tasks_delay = gr.DateTime(value="delay")
+            tasks_add_button = gr.Button("add_task", variant="primary")
+        def when_click_tasks_add_button(src: str, platform: str, target_dir: str, delay: str):
+            print(src)
+            return None
+        tasks_add_button.click(
+            fn=when_click_tasks_add_button,
+            inputs=[
+                tasks_src, tasks_platform, tasks_target_dir, tasks_delay,
+            ],
+            outputs=None,
+        )
+    return locals()
+if __name__ == "__main__":
+    with gr.Blocks() as block:
+        video_upload_tasks_components = get_video_upload_tasks_tab()
+        block.launch()

toolbox/baidu_netdisk/__init__.py ADDED Viewed

	@@ -0,0 +1,6 @@

+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+if __name__ == "__main__":
+    pass

toolbox/baidu_netdisk/baidu_netdisk_client.py ADDED Viewed

	@@ -0,0 +1,152 @@

+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+"""
+https://pypi.org/project/bypy/
+https://github.com/houtianze/bypy
+"""
+import argparse
+import json
+import logging
+import shutil
+import tempfile
+from pathlib import Path
+import bypy
+from bypy import const
+from project_settings import project_path, environment
+from toolbox.design_patterns.singleton import ParamsSingleton
+logger = logging.getLogger("toolbox")
+class BaiduNetdiskClient(ParamsSingleton):
+    def __init__(self, configdir: str = None):
+        if not self._initialized:
+            self.credentials = None
+            if configdir is None:
+                configdir = Path(tempfile.gettempdir()) / "baidu_netdisk/configdir"
+                if configdir.exists():
+                    shutil.rmtree(configdir.as_posix())
+            self.configdir = Path(configdir)
+            self.token_path = self.configdir / const.TokenFileName
+            self._bypy_client: bypy.ByPy = None
+            self.bypy_login()
+            self._initialized = True
+    @property
+    def bypy_client(self):
+        if self._bypy_client is None:
+            raise AssertionError(f"bypy not login yet!")
+        return self._bypy_client
+    def check_login(self):
+        if self._bypy_client is None:
+            return False
+        status_code = self._bypy_client.info()
+        if status_code == 0:
+            flag = True
+        else:
+            flag = False
+        return flag
+    def make_bypy_login_config(self):
+        if self.configdir.exists():
+            shutil.rmtree(self.configdir.as_posix())
+        if self._bypy_client is None:
+            logger.info(f"login by configdir: {self.configdir.as_posix()}")
+            self._bypy_client = bypy.ByPy(
+                configdir=self.configdir.as_posix(),
+                # debug=1,
+                # verbose=1,
+            )
+        # print tokens
+        with open(self.token_path.as_posix(), "r", encoding="utf-8") as f:
+            js = json.load(f)
+            js = json.dumps(js, ensure_ascii=False)
+            print(js)
+        return None
+    def bypy_login(self):
+        if not self.token_path.exists():
+            return False
+        # print tokens
+        # with open(self.token_path.as_posix(), "r", encoding="utf-8") as f:
+        #     js = json.load(f)
+        #     js = json.dumps(js, ensure_ascii=False)
+        #     print(js)
+        #     exit(0)
+        if self._bypy_client is None:
+            logger.info(f"login by configdir: {self.configdir.as_posix()}")
+            self._bypy_client = bypy.ByPy(
+                configdir=self.configdir.as_posix(),
+                # debug=1,
+                # verbose=1,
+            )
+        self._bypy_client.info()
+        return None
+    def login_with_credentials_info(self, credentials_info: dict):
+        self.credentials = credentials_info
+        self.token_path.parent.mkdir(parents=True, exist_ok=True)
+        with open(self.token_path.as_posix(), "w", encoding="utf-8") as f:
+            json.dump(credentials_info, f, ensure_ascii=False, indent=4)
+        self.bypy_login()
+        return True
+def get_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--key_of_credentials",
+        default="baidu_netdisk_honeytian_credentials",
+        type=str,
+    )
+    parser.add_argument(
+        "--config_dir",
+        default=(project_path / "dotenv/baidu_netdisk/config_dir").as_posix(),
+        type=str,
+    )
+    args = parser.parse_args()
+    return args
+def main():
+    args = get_args()
+    import log
+    from project_settings import environment, project_path, log_directory, time_zone_info
+    log.setup_size_rotating(log_directory=log_directory, tz_info=time_zone_info)
+    client = BaiduNetdiskClient(
+        configdir=args.config_dir
+    )
+    client.make_bypy_login_config()
+    flag = client.check_login()
+    print(f"flag: {flag}")
+    # credentials_info = environment.get(key=args.key_of_credentials, dtype=json.loads)
+    # client.login_with_credentials_info(credentials_info=credentials_info)
+    # flag = client.check_login()
+    # print(f"flag: {flag}")
+    return
+if __name__ == "__main__":
+    main()

toolbox/baidu_netdisk/upload/__init__.py ADDED Viewed

	@@ -0,0 +1,6 @@

+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+if __name__ == "__main__":
+    pass

toolbox/baidu_netdisk/upload/upload.py ADDED Viewed

	@@ -0,0 +1,79 @@

+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+import argparse
+import json
+from project_settings import project_path
+from toolbox.baidu_netdisk.baidu_netdisk_client import BaiduNetdiskClient
+class UploadClient(BaiduNetdiskClient):
+    def __init__(self, configdir: str = None):
+        super(UploadClient, self).__init__(configdir=configdir)
+    def upload_by_filename(self, src_file: str, tgt_file: str):
+        # self.bypy_client.upload(
+        #     r"C:\Users\tianx\Desktop\滴滴POC-2\lQLPJwCwSltNquvNBLTNDHKwOlnFBSTMn88JJBTGVuNCAA_3186_1204.png",
+        #     "xianyu/customers/佳小雨/20251224/lQLPJwCwSltNquvNBLTNDHKwOlnFBSTMn88JJBTGVuNCAA_3186_1204.png"
+        # )
+        self.bypy_client.upload(
+            src_file,
+            tgt_file
+        )
+        return tgt_file
+def get_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--key_of_credentials",
+        default="baidu_netdisk_honeytian_credentials",
+        type=str,
+    )
+    parser.add_argument(
+        "--config_dir",
+        default=(project_path / "dotenv/baidu_netdisk/config_dir").as_posix(),
+        type=str,
+    )
+    parser.add_argument(
+        "--src_file",
+        default=(project_path / "README.md").as_posix(),
+        type=str,
+    )
+    parser.add_argument(
+        "--tgt_file",
+        default="video_platform/upload/README.md",
+        type=str,
+    )
+    args = parser.parse_args()
+    return args
+def main():
+    args = get_args()
+    import log
+    from project_settings import environment, project_path, log_directory, time_zone_info
+    log.setup_size_rotating(log_directory=log_directory, tz_info=time_zone_info)
+    client = UploadClient(
+        # configdir=args.config_dir
+    )
+    flag = client.check_login()
+    print(f"flag: {flag}")
+    credentials_info = environment.get(key=args.key_of_credentials, dtype=json.loads)
+    client.login_with_credentials_info(credentials_info=credentials_info)
+    flag = client.check_login()
+    print(f"flag: {flag}")
+    client.upload_by_filename(
+        src_file=args.src_file,
+        tgt_file=args.tgt_file,
+    )
+    return
+if __name__ == "__main__":
+    main()

toolbox/porter/tasks/__init__.py CHANGED Viewed

@@ -5,6 +5,8 @@ from .douyin_live_info_collect_task import DouyinLiveInfoCollectTask
 from .douyin_video_download_task import DouyinVideoDownloadTask
 from .douyin_live_record_task import DouyinLiveRecordTask
 from .douyin_live_to_bilibili_live_task import DouyinLiveToBilibiliLiveTask
 from .video_to_bilibili_task import VideoToBilibiliTask
 from .video_to_youtube_task import VideoToYoutubeTask

 from .douyin_video_download_task import DouyinVideoDownloadTask
 from .douyin_live_record_task import DouyinLiveRecordTask
 from .douyin_live_to_bilibili_live_task import DouyinLiveToBilibiliLiveTask
+from .file_to_baidu_netdisk_task import FileToBaiduNetdiskTask
+from .video_format_convert_task import VideoFormatConvertTask
 from .video_to_bilibili_task import VideoToBilibiliTask
 from .video_to_youtube_task import VideoToYoutubeTask

toolbox/porter/tasks/file_to_baidu_netdisk_task.py ADDED Viewed

	@@ -0,0 +1,91 @@

+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+import aiofiles
+import asyncio
+import copy
+from datetime import datetime
+from zoneinfo import ZoneInfo  # Python 3.9+ 自带，无需安装
+import logging
+import json
+import os
+from pathlib import Path
+from typing import Coroutine, Dict, List, Tuple, Union, Iterable
+logger = logging.getLogger("toolbox")
+from toolbox.porter.tasks.base_task import BaseTask
+from toolbox.baidu_netdisk.upload.upload import UploadClient
+from project_settings import environment, project_path, time_zone_info
+@BaseTask.register("file_to_baidu_netdisk")
+class FileToBaiduNetdiskTask(BaseTask):
+    def __init__(self,
+                 task_name: str,
+                 src_dir: str,
+                 tgt_dir: str,
+                 check_interval: int,
+                 key_of_credentials: str,
+                 remove_after_upload: bool = False,
+                 exclude_files: List[str] = None,
+                 **kwargs
+                 ):
+        super().__init__(
+            flag=f"[{self.__class__.__name__}_{task_name}]",
+            check_interval=check_interval
+        )
+        self.task_name = task_name
+        self.tgt_dir: str = tgt_dir
+        # tgt_dir
+        # xianyu/customers/{task_name}/{date_str}/{time_str}
+        self.remove_after_upload = remove_after_upload
+        self.exclude_files = exclude_files or list()
+        self.key_of_credentials = key_of_credentials
+        if not os.path.isabs(src_dir):
+            self.src_dir: Path = project_path / src_dir
+        else:
+            self.src_dir: Path = Path(src_dir)
+        self.baidu_upload_client = UploadClient()
+        self.baidu_upload_client.login_with_credentials_info(
+            credentials_info=environment.get(self.key_of_credentials, dtype=json.loads)
+        )
+    async def do_task(self):
+        for filename in self.src_dir.glob("*"):
+            name = filename.name
+            if name in self.exclude_files:
+                continue
+            date_now_str = datetime.now().strftime("%Y%m%d")
+            time_now_str = datetime.now().strftime("%H%M%S")
+            kwargs = {
+                "task_name": self.task_name,
+                "date_str": date_now_str,
+                "time_str": time_now_str,
+            }
+            tgt_dir = self.tgt_dir.format(**kwargs)
+            tgt_file = Path(tgt_dir) / name
+            logger.info(f"{self.flag}上传视频：{filename.as_posix()}")
+            _ = await asyncio.to_thread(
+                self.baidu_upload_client.upload_by_filename,
+                src_file=filename.as_posix(),
+                tgt_file=tgt_file.as_posix()
+            )
+            if self.remove_after_upload:
+                logger.info(f"{self.flag}删除本地文件：{filename.as_posix()}")
+                os.remove(filename.as_posix())
+            logger.info(f"{self.flag}上传视频成功：{filename.as_posix()}")
+def main():
+    return
+if __name__ == "__main__":
+    main()

toolbox/porter/tasks/video_format_convert_task.py ADDED Viewed

	@@ -0,0 +1,135 @@

+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+import aiofiles
+import asyncio
+import copy
+from datetime import datetime
+from zoneinfo import ZoneInfo  # Python 3.9+ 自带，无需安装
+import logging
+import json
+import os
+from pathlib import Path
+import subprocess
+from typing import Coroutine, Dict, List, Tuple, Union, Iterable
+import time
+import uuid
+logger = logging.getLogger("toolbox")
+from toolbox.porter.tasks.base_task import BaseTask
+from project_settings import environment, project_path, time_zone_info
+@BaseTask.register("video_format_convert")
+class VideoFormatConvertTask(BaseTask):
+    def __init__(self,
+                 task_name: str,
+                 video_info_file: str,
+                 target_dir: str,
+                 check_interval: int,
+                 format_pairs: List[Tuple[str, str]],
+                 remove_after_upload: bool = False,
+                 **kwargs
+                 ):
+        super().__init__(
+            flag=f"[{self.__class__.__name__}_{task_name}]",
+            check_interval=check_interval
+        )
+        self.task_name = task_name
+        self.target_dir: Path = Path(target_dir)
+        self.format_pairs = format_pairs
+        self.remove_after_upload = remove_after_upload
+        if not os.path.isabs(video_info_file):
+            self.video_info_file = project_path / video_info_file
+        else:
+            self.video_info_file = Path(video_info_file)
+    async def save_video_info(self, video_info: dict) -> Dict[str, dict]:
+        self.video_info_file.parent.mkdir(parents=True, exist_ok=True)
+        video_info_ = json.dumps(video_info, ensure_ascii=False, indent=2)
+        async with aiofiles.open(self.video_info_file.as_posix(), "w", encoding="utf-8") as f:
+            await f.write(f"{video_info_}\n")
+        return video_info
+    async def load_video_info(self) -> Dict[str, dict]:
+        video_info = dict()
+        if self.video_info_file.exists():
+            async with aiofiles.open(self.video_info_file.as_posix(), "r", encoding="utf-8") as f:
+                data = await f.read()
+                video_info: dict = json.loads(data)
+        return video_info
+    async def do_task(self):
+        video_info = await self.load_video_info()
+        print(self.video_info_file)
+        new_video_info = copy.deepcopy(video_info)
+        for k, v in video_info.items():
+            create_time = v["create_time"]
+            filename = v["filename"]
+            # url_list = v["url_list"]
+            # video_id = v["video_id"]
+            title = v["title"]
+            desc = v["desc"]
+            tags = v["tags"]
+            format_convert_output_file = v.get("format_convert_output_file")
+            if format_convert_output_file is None:
+                logger.info(f"{self.flag}视频格式转换开始：{filename}")
+                output_file = self.convert_fmt(filename)
+                if self.remove_after_upload:
+                    logger.info(f"{self.flag}删除本地文件：{filename}")
+                    os.remove(filename)
+                logger.info(f"{self.flag}视频格式转换成功：{filename}")
+                v["format_convert_output_file"] = output_file
+                new_video_info[k] = v
+        await self.save_video_info(new_video_info)
+    def convert_fmt(self, filename: str) -> str:
+        input_file = Path(filename)
+        tgt_suffix_ = None
+        for src_suffix, tgt_suffix in self.format_pairs:
+            src_suffix = str(src_suffix).lstrip(".")
+            tgt_suffix = str(tgt_suffix).lstrip(".")
+            if input_file.with_suffix(f".{src_suffix}"):
+                tgt_suffix_ = tgt_suffix
+        if tgt_suffix_ is None:
+            return input_file.as_posix()
+        output_file = input_file.with_suffix(f".{tgt_suffix_}")
+        output_file = self.target_dir / output_file.name
+        output_file.parent.mkdir(parents=True, exist_ok=True)
+        idx = uuid.uuid4()
+        input_file_ = input_file.parent / f"{idx}{input_file.suffix}"
+        output_file_ = output_file.parent / f"{idx}{output_file.suffix}"
+        os.rename(input_file.as_posix(), input_file_.as_posix())
+        command = [
+            "ffmpeg",
+            "-i", input_file_.as_posix(),
+            "-c:v", "copy",  # 视频流不重新编码，速度快
+            "-c:a", "aac",  # 音频流转换为 AAC（mp4 通用）
+            "-strict", "experimental",  # 某些 ffmpeg 版本需要
+            "-y",
+            output_file_.as_posix()
+        ]
+        try:
+            subprocess.run(command, check=True)
+            logger.info(f"convert fmt success; output_file: {output_file}")
+        except subprocess.CalledProcessError as error:
+            logger.info(f"convert fmt failed; error type: {type(error)}, error text: {str(error)}")
+        finally:
+            os.rename(input_file_.as_posix(), input_file.as_posix())
+            os.rename(output_file_.as_posix(), output_file.as_posix())
+        return output_file.absolute().as_posix()
+if __name__ == "__main__":
+    pass

toolbox/porter/tasks/youtube_video_download_task.py ADDED Viewed

	@@ -0,0 +1,6 @@

+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+if __name__ == "__main__":
+    pass

toolbox/youtube_spider/__init__.py ADDED Viewed

	@@ -0,0 +1,6 @@

+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+if __name__ == "__main__":
+    pass

toolbox/youtube_spider/video/__init__.py ADDED Viewed

	@@ -0,0 +1,6 @@

+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+if __name__ == "__main__":
+    pass

toolbox/youtube_spider/video/video_download.py ADDED Viewed

	@@ -0,0 +1,82 @@

+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+import logging
+from pathlib import Path
+import time, random
+from pytubefix import YouTube
+import yt_dlp
+from toolbox.youtube_spider.youtube_spider_client import YoutubeSpiderClient
+logger = logging.getLogger("toolbox")
+class YoutubeVideoDownloadSpider(YoutubeSpiderClient):
+    def __init__(self):
+        super(YoutubeVideoDownloadSpider, self).__init__()
+        self.last_download_time_ts = time.time()
+        self.min_download_delta = 10
+    def delay_before_download(self):
+        now = time.time()
+        delta = now - self.last_download_time_ts
+        delay = self.min_download_delta - delta
+        delay = max(0, delay)
+        delay = delay + random.uniform(2, 5)
+        logger.info(f"Delay before downloading; delay: {delay}s.")
+        time.sleep(delay)
+        self.last_download_time_ts = time.time()
+        return delay
+    def download_by_video_id_by_pytube(self, video_id: str, target_file: str):
+        url = f"https://www.youtube.com/watch?v={video_id}"
+        yt = YouTube(url)
+        stream = yt.streams.get_highest_resolution()
+        target_file = Path(target_file)
+        target_file.parent.mkdir(parents=True, exist_ok=True)
+        self.delay_before_download()
+        logger.info(f"Downloading...")
+        stream.download(
+            output_path=target_file.parent.as_posix(),
+            filename=target_file.name
+        )
+        return target_file.as_posix()
+    def download_by_video_id_by_yt_dlp(self, video_id: str, target_file: str):
+        # 如果被检测为机器人bot，换个VPN或IP就可以了。
+        url = f"https://www.youtube.com/watch?v={video_id}"
+        # https://www.youtube.com/watch?v=e0QoiTSlwKY
+        target_file = Path(target_file)
+        target_file.parent.mkdir(parents=True, exist_ok=True)
+        ydl_opts = {
+            'outtmpl': target_file.as_posix(),
+            'format': 'bestvideo+bestaudio/best',  # 下载最佳视频+音频
+            'merge_output_format': 'mp4',  # 合并成 mp4
+            'noplaylist': True,  # 只下载单个视频，不下载播放列表
+        }
+        self.delay_before_download()
+        logger.info(f"Downloading...")
+        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+            ydl.download([url])
+        return target_file.as_posix()
+def main():
+    from pytube import YouTube
+    client = YoutubeVideoDownloadSpider()
+    # {'video_id': 'e0QoiTSlwKY', 'title': '12. 奶茶咖啡赛道 上 （产品端设计）', 'description': None, 'published_time_text': '2 weeks ago', 'video_length_text': '4:27'}
+    client.download_by_video_id_by_yt_dlp(video_id="e0QoiTSlwKY", target_file="./你好吗.mp4")
+    return
+if __name__ == "__main__":
+    main()

toolbox/youtube_spider/video/video_list.py ADDED Viewed

	@@ -0,0 +1,196 @@

+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+import json
+import re
+from bs4 import BeautifulSoup
+import requests
+from pandas.core.reshape.util import tile_compat
+from streamlink_cli.main import handle_url
+from toolbox.youtube_spider.youtube_spider_client import YoutubeSpiderClient
+class YoutubeVideoListSpider(YoutubeSpiderClient):
+    def __init__(self):
+        super(YoutubeVideoListSpider, self).__init__()
+    def get_front_page_video_list(self, channel_handle: str):
+        # url = "https://www.youtube.com/@JasonBear131/videos"
+        url = f"https://www.youtube.com/@{channel_handle}/videos"
+        response = requests.request(
+            "GET",
+            url=url
+        )
+        if response.status_code != 200:
+            raise AssertionError(f"request failed, status_code: {response.status_code}, text: {response.text}")
+        html_text = response.text
+        pattern = re.compile(r"ytInitialData\s*=\s*{")
+        match = pattern.search(html_text)
+        if not match:
+            raise AssertionError
+        start_index = match.end() - 1  # 指向第一个 {
+        # 手动做大括号匹配
+        brace_count = 0
+        end_index = start_index
+        for i in range(start_index, len(html_text)):
+            if html_text[i] == "{":
+                brace_count += 1
+            elif html_text[i] == "}":
+                brace_count -= 1
+            if brace_count == 0:
+                end_index = i + 1
+                break
+        json_str = html_text[start_index:end_index]
+        # print(json_str)
+        js = json.loads(json_str)
+        return js
+    def get_front_page_video_list_pretty(self, channel_handle: str):
+        js = self.get_front_page_video_list(channel_handle)
+        tabs = js["contents"]["twoColumnBrowseResultsRenderer"]["tabs"]
+        rich_grid_renderers = list()
+        for tab in tabs:
+            if len(tab.keys()) != 1:
+                raise AssertionError()
+            if "tabRenderer" in tab.keys():
+                tab_renderer = tab["tabRenderer"]
+                title = tab_renderer["title"]
+                if title == "Videos":
+                    rich_grid_renderers = tab_renderer["content"]["richGridRenderer"]["contents"]
+                    break
+            elif "expandableTabRenderer" in tab.keys():
+                expandable_tab_renderer = tab["expandableTabRenderer"]
+            else:
+                raise AssertionError()
+        video_grid_renderers = rich_grid_renderers[:-1]
+        continuation_grid_renderer = rich_grid_renderers[-1]
+        video_list = list()
+        for video_grid in video_grid_renderers:
+            video_renderer = video_grid["richItemRenderer"]["content"]["videoRenderer"]
+            # print(json.dumps(video_renderer, ensure_ascii=False, indent=4))
+            video_id = video_renderer["videoId"]
+            title = video_renderer["title"]["runs"][0]["text"]
+            published_time_text = video_renderer["publishedTimeText"]["simpleText"]
+            video_length_text = video_renderer["lengthText"]["simpleText"]
+            # print(f"video_id: {video_id}")
+            # print(f"title: {title}")
+            # print(f"published_time_text: {published_time_text}")
+            # print(f"video_length_text: {video_length_text}")
+            # print("-" * 75)
+            video_list.append({
+                "video_id": video_id,
+                "title": title,
+                "description": None,
+                "published_time_text": published_time_text,
+                "video_length_text": video_length_text,
+            })
+        # print(json.dumps(continuation_grid_renderer, ensure_ascii=False, indent=4))
+        continuation_item_renderer = continuation_grid_renderer["continuationItemRenderer"]
+        continuation_token = continuation_item_renderer["continuationEndpoint"]["continuationCommand"]["token"]
+        return video_list, continuation_token
+    def get_continuation_page_video_list(self, continuation_token: str):
+        url = "https://www.youtube.com/youtubei/v1/browse"
+        params = {
+            "prettyPrint": "false",
+        }
+        data = {
+            "context": {
+                "client": {
+                    "clientName": "WEB",
+                    "clientVersion": "2.20251222.04.00",
+                },
+            },
+            "continuation": continuation_token,
+        }
+        response = requests.request(
+            "POST",
+            url=url,
+            headers=self.headers,
+            params=params,
+            data=json.dumps(data),
+        )
+        if response.status_code != 200:
+            raise AssertionError(f"request failed, status_code: {response.status_code}, text: {response.text}")
+        js = response.json()
+        return js
+    def get_continuation_page_video_list_pretty(self, continuation_token: str):
+        js = self.get_continuation_page_video_list(continuation_token)
+        continuation_items = js["onResponseReceivedActions"][0]["appendContinuationItemsAction"]["continuationItems"]
+        video_grid_renderers = continuation_items[:-1]
+        continuation_grid_renderer = continuation_items[-1]
+        video_list = list()
+        for video_grid in video_grid_renderers:
+            video_renderer = video_grid["richItemRenderer"]["content"]["videoRenderer"]
+            # print(json.dumps(video_renderer, ensure_ascii=False, indent=4))
+            video_id = video_renderer["videoId"]
+            title = video_renderer["title"]["runs"][0]["text"]
+            published_time_text = video_renderer["publishedTimeText"]["simpleText"]
+            video_length_text = video_renderer["lengthText"]["simpleText"]
+            description = None
+            description_snippet = video_renderer.get("descriptionSnippet")
+            if description_snippet is not None:
+                description = description_snippet["runs"][0]["text"]
+            video_list.append({
+                "video_id": video_id,
+                "title": title,
+                "description": description,
+                "published_time_text": published_time_text,
+                "video_length_text": video_length_text,
+            })
+        # print(json.dumps(continuation_grid_renderer, ensure_ascii=False, indent=4))
+        continuation_item_renderer = continuation_grid_renderer["continuationItemRenderer"]
+        continuation_token = continuation_item_renderer["continuationEndpoint"]["continuationCommand"]["token"]
+        return video_list, continuation_token
+def main():
+    client = YoutubeVideoListSpider()
+    video_list, continuation_token = client.get_front_page_video_list_pretty("JasonBear131")
+    for video in video_list:
+        print(video)
+    print(continuation_token)
+    video_list, continuation_token = client.get_continuation_page_video_list_pretty(continuation_token)
+    for video in video_list:
+        print(video)
+    print(continuation_token)
+    return
+if __name__ == "__main__":
+    main()

toolbox/youtube_spider/youtube_spider_client.py ADDED Viewed

	@@ -0,0 +1,47 @@

+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+import json
+import os
+import argparse
+from pathlib import Path
+import httpx
+import logging
+import requests
+from typing import List
+from project_settings import project_path, environment
+from toolbox.design_patterns.singleton import ParamsSingleton
+logger = logging.getLogger("toolbox")
+class YoutubeSpiderClient(ParamsSingleton):
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
+    }
+    def __init__(self):
+        if not self._initialized:
+            self.credentials = None
+            self.cookies = None
+            self._session = requests.Session()
+            self._async_session = httpx.AsyncClient(
+                http2=True,
+                limits=httpx.Limits(max_keepalive_connections=100, keepalive_expiry=100),
+                headers=self.headers,
+                cookies=self.cookies,
+            )
+            self._initialized = True
+    @property
+    def session(self):
+        return self._session
+    @property
+    def async_session(self):
+        return self._async_session
+if __name__ == "__main__":
+    pass