HoneyTian commited on
Commit
eeaf31b
·
1 Parent(s): 6071696
data/porter_tasks/porter_task_chenjieshen_douyin_live_record_to_baidu_netdisk.json ADDED
@@ -0,0 +1,188 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "enable": false,
4
+ "type": "douyin_live_record",
5
+ "room_name": "陈杰森资本NewBoombap",
6
+ "room_id": "57544215290",
7
+ "sec_user_id": "MS4wLjABAAAATGoBrO7yiJ3q9go4fxq9JXjrnP1bFpdkgKckC1IpfXA_vrjSmL9ZtjmTju8ApwbT",
8
+ "key_of_credentials": "douyin_wentao_credentials",
9
+ "check_interval": 10,
10
+ "output_video_dir": "data/tasks/chenjieshen_douyin_live_record_to_baidu_netdisk/live_record/陈杰森",
11
+ "output_video_info_file": "data/tasks/chenjieshen_douyin_live_record_to_baidu_netdisk/live_record/陈杰森/file_info.json"
12
+ },
13
+ {
14
+ "enable": false,
15
+ "type": "douyin_live_record",
16
+ "room_name": "老陈的退路",
17
+ "room_id": "330025930592",
18
+ "sec_user_id": "MS4wLjABAAAAzjLy7Dd3Q-UiMGdkwymLhDIL1aFJtruvCUqrwmph9n8Zb55E4y9XH2WEFaLC1ZBl",
19
+ "key_of_credentials": "douyin_wentao_credentials",
20
+ "check_interval": 10,
21
+ "output_video_dir": "data/tasks/chenjieshen_douyin_live_record_to_baidu_netdisk/live_record/陈杰森",
22
+ "output_video_info_file": "data/tasks/chenjieshen_douyin_live_record_to_baidu_netdisk/live_record/陈杰森/file_info.json"
23
+ },
24
+ {
25
+ "enable": false,
26
+ "type": "douyin_live_record",
27
+ "room_name": "老陈come_back",
28
+ "room_id": "78835697536",
29
+ "sec_user_id": "MS4wLjABAAAAj3qTpPkes0t5az2nXd1oaag5TpOJeNh27pemT17WUVXHGUZKZfjqc3fFc8arL6V1",
30
+ "key_of_credentials": "douyin_wentao_credentials",
31
+ "check_interval": 10,
32
+ "output_video_dir": "data/tasks/chenjieshen_douyin_live_record_to_baidu_netdisk/live_record/陈杰森",
33
+ "output_video_info_file": "data/tasks/chenjieshen_douyin_live_record_to_baidu_netdisk/live_record/陈杰森/file_info.json"
34
+ },
35
+ {
36
+ "enable": false,
37
+ "type": "douyin_live_record",
38
+ "room_name": "清源第一帅",
39
+ "room_id": "654177813521",
40
+ "sec_user_id": "MS4wLjABAAAAtwkRNs5Krtm5cBhKzn60poflAFWdnYdwMf0I0M27rOGvNn2l16DfUH5vE3WL-ypz",
41
+ "key_of_credentials": "douyin_wentao_credentials",
42
+ "check_interval": 10,
43
+ "output_video_dir": "data/tasks/chenjieshen_douyin_live_record_to_baidu_netdisk/live_record/陈杰森",
44
+ "output_video_info_file": "data/tasks/chenjieshen_douyin_live_record_to_baidu_netdisk/live_record/陈杰森/file_info.json"
45
+ },
46
+ {
47
+ "enable": false,
48
+ "type": "douyin_live_record",
49
+ "room_name": "清源第二帅",
50
+ "room_id": "831568261978",
51
+ "sec_user_id": "MS4wLjABAAAALbRezHNzlaRYhk9QE-5ajImgcvfJGq9koAlD0O8VQwiC-N8Bqx2WDW07wLee2Bwd",
52
+ "key_of_credentials": "douyin_wentao_credentials",
53
+ "check_interval": 10,
54
+ "output_video_dir": "data/tasks/chenjieshen_douyin_live_record_to_baidu_netdisk/live_record/陈杰森",
55
+ "output_video_info_file": "data/tasks/chenjieshen_douyin_live_record_to_baidu_netdisk/live_record/陈杰森/file_info.json"
56
+ },
57
+ {
58
+ "enable": false,
59
+ "type": "douyin_live_record",
60
+ "room_name": "清源第三帅",
61
+ "room_id": "26569028431",
62
+ "sec_user_id": "MS4wLjABAAAA60pV9JuZNTZhQJeSI_AeaRUnKXs--TGPZEAtsgihwKnir46ncFVtLSO79JDnclkM",
63
+ "key_of_credentials": "douyin_wentao_credentials",
64
+ "check_interval": 10,
65
+ "output_video_dir": "data/tasks/chenjieshen_douyin_live_record_to_baidu_netdisk/live_record/陈杰森",
66
+ "output_video_info_file": "data/tasks/chenjieshen_douyin_live_record_to_baidu_netdisk/live_record/陈杰森/file_info.json"
67
+ },
68
+ {
69
+ "enable": false,
70
+ "type": "douyin_live_record",
71
+ "room_name": "清源第四帅",
72
+ "room_id": "91434255754",
73
+ "sec_user_id": "MS4wLjABAAAAakYu9rpF6vk50XbiKFnxzuXVv6fhVK5O3tr0gxjTJq2Nw5HfMplPv6ce6AhXxReG",
74
+ "key_of_credentials": "douyin_wentao_credentials",
75
+ "check_interval": 10,
76
+ "output_video_dir": "data/tasks/chenjieshen_douyin_live_record_to_baidu_netdisk/live_record/陈杰森",
77
+ "output_video_info_file": "data/tasks/chenjieshen_douyin_live_record_to_baidu_netdisk/live_record/陈杰森/file_info.json"
78
+ },
79
+ {
80
+ "enable": false,
81
+ "type": "douyin_live_record",
82
+ "room_name": "清源第五帅",
83
+ "room_id": "469245033355",
84
+ "sec_user_id": "MS4wLjABAAAA62G-uDXdlIPnk44P9gXhQQo1UmOCgZ08QWOP_-t4PjI",
85
+ "key_of_credentials": "douyin_wentao_credentials",
86
+ "check_interval": 10,
87
+ "output_video_dir": "data/tasks/chenjieshen_douyin_live_record_to_baidu_netdisk/live_record/陈杰森",
88
+ "output_video_info_file": "data/tasks/chenjieshen_douyin_live_record_to_baidu_netdisk/live_record/陈杰森/file_info.json"
89
+ },
90
+ {
91
+ "enable": false,
92
+ "type": "douyin_live_record",
93
+ "room_name": "清源人工智能研究院",
94
+ "room_id": "81728900292",
95
+ "sec_user_id": "MS4wLjABAAAAe_Hf6esmopqIMG7OhsMlNESTiqP1ot7lmGzK6Cu3pJSSGTOiHlI4cA33OPWg56Ql",
96
+ "key_of_credentials": "douyin_wentao_credentials",
97
+ "check_interval": 10,
98
+ "output_video_dir": "data/tasks/chenjieshen_douyin_live_record_to_baidu_netdisk/live_record/陈杰森",
99
+ "output_video_info_file": "data/tasks/chenjieshen_douyin_live_record_to_baidu_netdisk/live_record/陈杰森/file_info.json"
100
+ },
101
+ {
102
+ "enable": false,
103
+ "type": "douyin_live_record",
104
+ "room_name": "廖勇-清源政策研究院AIGC",
105
+ "room_id": "gubenqingyuan",
106
+ "sec_user_id": "MS4wLjABAAAA9ne6zoCLjnzpCRR1GhPVBUq2_7wIJO4GyZXlyW016yw",
107
+ "key_of_credentials": "douyin_wentao_credentials",
108
+ "check_interval": 10,
109
+ "output_video_dir": "data/tasks/chenjieshen_douyin_live_record_to_baidu_netdisk/live_record/陈杰森",
110
+ "output_video_info_file": "data/tasks/chenjieshen_douyin_live_record_to_baidu_netdisk/live_record/陈杰森/file_info.json"
111
+ },
112
+ {
113
+ "enable": false,
114
+ "type": "douyin_live_record",
115
+ "room_name": "小熊Bella与老爸",
116
+ "room_id": "139751520143",
117
+ "sec_user_id": "MS4wLjABAAAA49QFP6YhorLIIX9M-FiZeKxmqhqXlttluSsZeaxvxzU",
118
+ "key_of_credentials": "douyin_wentao_credentials",
119
+ "check_interval": 10,
120
+ "output_video_dir": "data/tasks/chenjieshen_douyin_live_record_to_baidu_netdisk/live_record/陈杰森",
121
+ "output_video_info_file": "data/tasks/chenjieshen_douyin_live_record_to_baidu_netdisk/live_record/陈杰森/file_info.json"
122
+ },
123
+ {
124
+ "enable": false,
125
+ "type": "douyin_live_record",
126
+ "room_name": "清源之虎",
127
+ "room_id": "998621457719",
128
+ "sec_user_id": "MS4wLjABAAAAWYFqu46IMCnfwgSjBK35RnQcKOYhqVX3YcP4rotpHkdxJn5JD59REMz5fwlccp2h",
129
+ "key_of_credentials": "douyin_wentao_credentials",
130
+ "check_interval": 10,
131
+ "output_video_dir": "data/tasks/chenjieshen_douyin_live_record_to_baidu_netdisk/live_record/陈杰森",
132
+ "output_video_info_file": "data/tasks/chenjieshen_douyin_live_record_to_baidu_netdisk/live_record/陈杰森/file_info.json"
133
+ },
134
+ {
135
+ "enable": false,
136
+ "type": "douyin_live_record",
137
+ "room_name": "老陈真是好人",
138
+ "room_id": "599130203190",
139
+ "sec_user_id": "MS4wLjABAAAA3ReegyjpprzeurqHUl8yLBkTlQ7L4TfoyjpDXeiy1RvCu7OmqWljOLmWgibTjs8y",
140
+ "key_of_credentials": "douyin_wentao_credentials",
141
+ "check_interval": 10,
142
+ "output_video_dir": "data/tasks/chenjieshen_douyin_live_record_to_baidu_netdisk/live_record/陈杰森",
143
+ "output_video_info_file": "data/tasks/chenjieshen_douyin_live_record_to_baidu_netdisk/live_record/陈杰森/file_info.json"
144
+ },
145
+ {
146
+ "enable": false,
147
+ "type": "douyin_live_record",
148
+ "room_name": "老陈小帮手",
149
+ "room_id": "738682070097",
150
+ "sec_user_id": "MS4wLjABAAAAtzZsG1mKtZvgE2j58ZB7FDRP1gzsHXuQDIN15bltkLM",
151
+ "key_of_credentials": "douyin_wentao_credentials",
152
+ "check_interval": 10,
153
+ "output_video_dir": "data/tasks/chenjieshen_douyin_live_record_to_baidu_netdisk/live_record/陈杰森",
154
+ "output_video_info_file": "data/tasks/chenjieshen_douyin_live_record_to_baidu_netdisk/live_record/陈杰森/file_info.json"
155
+ },
156
+ {
157
+ "enable": false,
158
+ "type": "douyin_live_record",
159
+ "room_name": "焦虑个锤子",
160
+ "room_id": "78933304321",
161
+ "sec_user_id": "MS4wLjABAAAAfr5os1dB5zdm36GK5nHVQkWReaELMueuYqliVDBo64Y",
162
+ "key_of_credentials": "douyin_wentao_credentials",
163
+ "check_interval": 10,
164
+ "output_video_dir": "data/tasks/chenjieshen_douyin_live_record_to_baidu_netdisk/live_record/陈杰森",
165
+ "output_video_info_file": "data/tasks/chenjieshen_douyin_live_record_to_baidu_netdisk/live_record/陈杰森/file_info.json"
166
+ },
167
+ {
168
+ "enable": false,
169
+ "type": "video_format_convert",
170
+ "task_name": "陈杰森-百度云盘-格式转换",
171
+ "video_info_file": "data/tasks/chenjieshen_douyin_live_record_to_baidu_netdisk/live_record/陈杰森/file_info.json",
172
+ "target_dir": "data/tasks/chenjieshen_douyin_live_record_to_baidu_netdisk/live_record/陈杰森/to_baidu_netdisk",
173
+ "check_interval": 10,
174
+ "remove_after_upload": true,
175
+ "format_pairs": [["flv", "mp4"]]
176
+ },
177
+ {
178
+ "enable": true,
179
+ "type": "file_to_baidu_netdisk",
180
+ "task_name": "陈杰森",
181
+ "src_dir": "data/tasks/chenjieshen_douyin_live_record_to_baidu_netdisk/live_record/陈杰森/to_baidu_netdisk",
182
+ "tgt_dir": "xianyu/customers/{task_name}/{date_str}",
183
+ "check_interval": 10,
184
+ "key_of_credentials": "baidu_netdisk_honeytian_credentials",
185
+ "remove_after_upload": true,
186
+ "exclude_files": "file_info.json"
187
+ }
188
+ ]
requirements.txt CHANGED
@@ -21,3 +21,4 @@ tenacity
21
  selenium
22
  webdriver-manager
23
  chinesecalendar
 
 
21
  selenium
22
  webdriver-manager
23
  chinesecalendar
24
+ bypy
tabs/video_upload_tasks.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/python3
2
+ # -*- coding: utf-8 -*-
3
+ import gradio as gr
4
+
5
+ from project_settings import project_path
6
+
7
+
8
+ def get_video_upload_tasks_tab():
9
+ with gr.TabItem("video_upload_tasks"):
10
+ with gr.Row():
11
+ tasks_src = gr.Textbox(label="tasks_src", max_lines=10)
12
+ tasks_platform = gr.Dropdown(choices=["douyin"], value="douyin", label="platform")
13
+ tasks_target_dir = gr.Dropdown(choices=["douyin"], value="douyin", label="target_dir")
14
+ tasks_delay = gr.DateTime(value="delay")
15
+
16
+ tasks_add_button = gr.Button("add_task", variant="primary")
17
+
18
+ def when_click_tasks_add_button(src: str, platform: str, target_dir: str, delay: str):
19
+ print(src)
20
+ return None
21
+
22
+ tasks_add_button.click(
23
+ fn=when_click_tasks_add_button,
24
+ inputs=[
25
+ tasks_src, tasks_platform, tasks_target_dir, tasks_delay,
26
+ ],
27
+ outputs=None,
28
+ )
29
+ return locals()
30
+
31
+
32
+ if __name__ == "__main__":
33
+ with gr.Blocks() as block:
34
+ video_upload_tasks_components = get_video_upload_tasks_tab()
35
+ block.launch()
toolbox/baidu_netdisk/__init__.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ #!/usr/bin/python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+
5
+ if __name__ == "__main__":
6
+ pass
toolbox/baidu_netdisk/baidu_netdisk_client.py ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ https://pypi.org/project/bypy/
5
+ https://github.com/houtianze/bypy
6
+ """
7
+ import argparse
8
+ import json
9
+ import logging
10
+ import shutil
11
+ import tempfile
12
+ from pathlib import Path
13
+
14
+ import bypy
15
+ from bypy import const
16
+
17
+ from project_settings import project_path, environment
18
+ from toolbox.design_patterns.singleton import ParamsSingleton
19
+
20
+
21
+ logger = logging.getLogger("toolbox")
22
+
23
+
24
+ class BaiduNetdiskClient(ParamsSingleton):
25
+
26
+ def __init__(self, configdir: str = None):
27
+ if not self._initialized:
28
+ self.credentials = None
29
+ if configdir is None:
30
+ configdir = Path(tempfile.gettempdir()) / "baidu_netdisk/configdir"
31
+ if configdir.exists():
32
+ shutil.rmtree(configdir.as_posix())
33
+ self.configdir = Path(configdir)
34
+
35
+ self.token_path = self.configdir / const.TokenFileName
36
+
37
+ self._bypy_client: bypy.ByPy = None
38
+ self.bypy_login()
39
+
40
+ self._initialized = True
41
+
42
+ @property
43
+ def bypy_client(self):
44
+ if self._bypy_client is None:
45
+ raise AssertionError(f"bypy not login yet!")
46
+ return self._bypy_client
47
+
48
+ def check_login(self):
49
+ if self._bypy_client is None:
50
+ return False
51
+
52
+ status_code = self._bypy_client.info()
53
+ if status_code == 0:
54
+ flag = True
55
+ else:
56
+ flag = False
57
+ return flag
58
+
59
+ def make_bypy_login_config(self):
60
+ if self.configdir.exists():
61
+ shutil.rmtree(self.configdir.as_posix())
62
+
63
+ if self._bypy_client is None:
64
+ logger.info(f"login by configdir: {self.configdir.as_posix()}")
65
+ self._bypy_client = bypy.ByPy(
66
+ configdir=self.configdir.as_posix(),
67
+ # debug=1,
68
+ # verbose=1,
69
+ )
70
+
71
+ # print tokens
72
+ with open(self.token_path.as_posix(), "r", encoding="utf-8") as f:
73
+ js = json.load(f)
74
+ js = json.dumps(js, ensure_ascii=False)
75
+ print(js)
76
+ return None
77
+
78
+
79
+ def bypy_login(self):
80
+ if not self.token_path.exists():
81
+ return False
82
+
83
+ # print tokens
84
+ # with open(self.token_path.as_posix(), "r", encoding="utf-8") as f:
85
+ # js = json.load(f)
86
+ # js = json.dumps(js, ensure_ascii=False)
87
+ # print(js)
88
+ # exit(0)
89
+
90
+ if self._bypy_client is None:
91
+ logger.info(f"login by configdir: {self.configdir.as_posix()}")
92
+ self._bypy_client = bypy.ByPy(
93
+ configdir=self.configdir.as_posix(),
94
+ # debug=1,
95
+ # verbose=1,
96
+ )
97
+
98
+ self._bypy_client.info()
99
+ return None
100
+
101
+ def login_with_credentials_info(self, credentials_info: dict):
102
+ self.credentials = credentials_info
103
+
104
+ self.token_path.parent.mkdir(parents=True, exist_ok=True)
105
+ with open(self.token_path.as_posix(), "w", encoding="utf-8") as f:
106
+ json.dump(credentials_info, f, ensure_ascii=False, indent=4)
107
+
108
+ self.bypy_login()
109
+ return True
110
+
111
+
112
+ def get_args():
113
+ parser = argparse.ArgumentParser()
114
+ parser.add_argument(
115
+ "--key_of_credentials",
116
+ default="baidu_netdisk_honeytian_credentials",
117
+ type=str,
118
+ )
119
+ parser.add_argument(
120
+ "--config_dir",
121
+ default=(project_path / "dotenv/baidu_netdisk/config_dir").as_posix(),
122
+ type=str,
123
+ )
124
+ args = parser.parse_args()
125
+ return args
126
+
127
+
128
+ def main():
129
+ args = get_args()
130
+
131
+ import log
132
+ from project_settings import environment, project_path, log_directory, time_zone_info
133
+
134
+ log.setup_size_rotating(log_directory=log_directory, tz_info=time_zone_info)
135
+
136
+ client = BaiduNetdiskClient(
137
+ configdir=args.config_dir
138
+ )
139
+ client.make_bypy_login_config()
140
+
141
+ flag = client.check_login()
142
+ print(f"flag: {flag}")
143
+ # credentials_info = environment.get(key=args.key_of_credentials, dtype=json.loads)
144
+ # client.login_with_credentials_info(credentials_info=credentials_info)
145
+ # flag = client.check_login()
146
+ # print(f"flag: {flag}")
147
+
148
+ return
149
+
150
+
151
+ if __name__ == "__main__":
152
+ main()
toolbox/baidu_netdisk/upload/__init__.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ #!/usr/bin/python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+
5
+ if __name__ == "__main__":
6
+ pass
toolbox/baidu_netdisk/upload/upload.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/python3
2
+ # -*- coding: utf-8 -*-
3
+ import argparse
4
+ import json
5
+
6
+ from project_settings import project_path
7
+ from toolbox.baidu_netdisk.baidu_netdisk_client import BaiduNetdiskClient
8
+
9
+
10
+ class UploadClient(BaiduNetdiskClient):
11
+ def __init__(self, configdir: str = None):
12
+ super(UploadClient, self).__init__(configdir=configdir)
13
+
14
+ def upload_by_filename(self, src_file: str, tgt_file: str):
15
+ # self.bypy_client.upload(
16
+ # r"C:\Users\tianx\Desktop\滴滴POC-2\lQLPJwCwSltNquvNBLTNDHKwOlnFBSTMn88JJBTGVuNCAA_3186_1204.png",
17
+ # "xianyu/customers/佳小雨/20251224/lQLPJwCwSltNquvNBLTNDHKwOlnFBSTMn88JJBTGVuNCAA_3186_1204.png"
18
+ # )
19
+ self.bypy_client.upload(
20
+ src_file,
21
+ tgt_file
22
+ )
23
+ return tgt_file
24
+
25
+
26
+ def get_args():
27
+ parser = argparse.ArgumentParser()
28
+ parser.add_argument(
29
+ "--key_of_credentials",
30
+ default="baidu_netdisk_honeytian_credentials",
31
+ type=str,
32
+ )
33
+ parser.add_argument(
34
+ "--config_dir",
35
+ default=(project_path / "dotenv/baidu_netdisk/config_dir").as_posix(),
36
+ type=str,
37
+ )
38
+ parser.add_argument(
39
+ "--src_file",
40
+ default=(project_path / "README.md").as_posix(),
41
+ type=str,
42
+ )
43
+ parser.add_argument(
44
+ "--tgt_file",
45
+ default="video_platform/upload/README.md",
46
+ type=str,
47
+ )
48
+ args = parser.parse_args()
49
+ return args
50
+
51
+
52
+ def main():
53
+ args = get_args()
54
+
55
+ import log
56
+ from project_settings import environment, project_path, log_directory, time_zone_info
57
+
58
+ log.setup_size_rotating(log_directory=log_directory, tz_info=time_zone_info)
59
+
60
+ client = UploadClient(
61
+ # configdir=args.config_dir
62
+ )
63
+
64
+ flag = client.check_login()
65
+ print(f"flag: {flag}")
66
+ credentials_info = environment.get(key=args.key_of_credentials, dtype=json.loads)
67
+ client.login_with_credentials_info(credentials_info=credentials_info)
68
+ flag = client.check_login()
69
+ print(f"flag: {flag}")
70
+
71
+ client.upload_by_filename(
72
+ src_file=args.src_file,
73
+ tgt_file=args.tgt_file,
74
+ )
75
+ return
76
+
77
+
78
+ if __name__ == "__main__":
79
+ main()
toolbox/porter/tasks/__init__.py CHANGED
@@ -5,6 +5,8 @@ from .douyin_live_info_collect_task import DouyinLiveInfoCollectTask
5
  from .douyin_video_download_task import DouyinVideoDownloadTask
6
  from .douyin_live_record_task import DouyinLiveRecordTask
7
  from .douyin_live_to_bilibili_live_task import DouyinLiveToBilibiliLiveTask
 
 
8
  from .video_to_bilibili_task import VideoToBilibiliTask
9
  from .video_to_youtube_task import VideoToYoutubeTask
10
 
 
5
  from .douyin_video_download_task import DouyinVideoDownloadTask
6
  from .douyin_live_record_task import DouyinLiveRecordTask
7
  from .douyin_live_to_bilibili_live_task import DouyinLiveToBilibiliLiveTask
8
+ from .file_to_baidu_netdisk_task import FileToBaiduNetdiskTask
9
+ from .video_format_convert_task import VideoFormatConvertTask
10
  from .video_to_bilibili_task import VideoToBilibiliTask
11
  from .video_to_youtube_task import VideoToYoutubeTask
12
 
toolbox/porter/tasks/file_to_baidu_netdisk_task.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/python3
2
+ # -*- coding: utf-8 -*-
3
+ import aiofiles
4
+ import asyncio
5
+ import copy
6
+ from datetime import datetime
7
+ from zoneinfo import ZoneInfo # Python 3.9+ 自带,无需安装
8
+ import logging
9
+ import json
10
+ import os
11
+ from pathlib import Path
12
+ from typing import Coroutine, Dict, List, Tuple, Union, Iterable
13
+
14
+ logger = logging.getLogger("toolbox")
15
+
16
+ from toolbox.porter.tasks.base_task import BaseTask
17
+ from toolbox.baidu_netdisk.upload.upload import UploadClient
18
+ from project_settings import environment, project_path, time_zone_info
19
+
20
+
21
+ @BaseTask.register("file_to_baidu_netdisk")
22
+ class FileToBaiduNetdiskTask(BaseTask):
23
+ def __init__(self,
24
+ task_name: str,
25
+ src_dir: str,
26
+ tgt_dir: str,
27
+ check_interval: int,
28
+ key_of_credentials: str,
29
+ remove_after_upload: bool = False,
30
+ exclude_files: List[str] = None,
31
+ **kwargs
32
+ ):
33
+ super().__init__(
34
+ flag=f"[{self.__class__.__name__}_{task_name}]",
35
+ check_interval=check_interval
36
+ )
37
+ self.task_name = task_name
38
+ self.tgt_dir: str = tgt_dir
39
+ # tgt_dir
40
+ # xianyu/customers/{task_name}/{date_str}/{time_str}
41
+
42
+ self.remove_after_upload = remove_after_upload
43
+ self.exclude_files = exclude_files or list()
44
+ self.key_of_credentials = key_of_credentials
45
+
46
+ if not os.path.isabs(src_dir):
47
+ self.src_dir: Path = project_path / src_dir
48
+ else:
49
+ self.src_dir: Path = Path(src_dir)
50
+
51
+ self.baidu_upload_client = UploadClient()
52
+ self.baidu_upload_client.login_with_credentials_info(
53
+ credentials_info=environment.get(self.key_of_credentials, dtype=json.loads)
54
+ )
55
+
56
+ async def do_task(self):
57
+ for filename in self.src_dir.glob("*"):
58
+ name = filename.name
59
+ if name in self.exclude_files:
60
+ continue
61
+
62
+ date_now_str = datetime.now().strftime("%Y%m%d")
63
+ time_now_str = datetime.now().strftime("%H%M%S")
64
+
65
+ kwargs = {
66
+ "task_name": self.task_name,
67
+ "date_str": date_now_str,
68
+ "time_str": time_now_str,
69
+ }
70
+
71
+ tgt_dir = self.tgt_dir.format(**kwargs)
72
+ tgt_file = Path(tgt_dir) / name
73
+
74
+ logger.info(f"{self.flag}上传视频:{filename.as_posix()}")
75
+ _ = await asyncio.to_thread(
76
+ self.baidu_upload_client.upload_by_filename,
77
+ src_file=filename.as_posix(),
78
+ tgt_file=tgt_file.as_posix()
79
+ )
80
+ if self.remove_after_upload:
81
+ logger.info(f"{self.flag}删除本地文件:{filename.as_posix()}")
82
+ os.remove(filename.as_posix())
83
+ logger.info(f"{self.flag}上传视频成功:{filename.as_posix()}")
84
+
85
+
86
+ def main():
87
+ return
88
+
89
+
90
+ if __name__ == "__main__":
91
+ main()
toolbox/porter/tasks/video_format_convert_task.py ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/python3
2
+ # -*- coding: utf-8 -*-
3
+ import aiofiles
4
+ import asyncio
5
+ import copy
6
+ from datetime import datetime
7
+ from zoneinfo import ZoneInfo # Python 3.9+ 自带,无需安装
8
+ import logging
9
+ import json
10
+ import os
11
+ from pathlib import Path
12
+ import subprocess
13
+ from typing import Coroutine, Dict, List, Tuple, Union, Iterable
14
+ import time
15
+ import uuid
16
+
17
+ logger = logging.getLogger("toolbox")
18
+
19
+ from toolbox.porter.tasks.base_task import BaseTask
20
+ from project_settings import environment, project_path, time_zone_info
21
+
22
+
23
+ @BaseTask.register("video_format_convert")
24
+ class VideoFormatConvertTask(BaseTask):
25
+ def __init__(self,
26
+ task_name: str,
27
+ video_info_file: str,
28
+ target_dir: str,
29
+ check_interval: int,
30
+ format_pairs: List[Tuple[str, str]],
31
+ remove_after_upload: bool = False,
32
+ **kwargs
33
+ ):
34
+ super().__init__(
35
+ flag=f"[{self.__class__.__name__}_{task_name}]",
36
+ check_interval=check_interval
37
+ )
38
+ self.task_name = task_name
39
+ self.target_dir: Path = Path(target_dir)
40
+ self.format_pairs = format_pairs
41
+ self.remove_after_upload = remove_after_upload
42
+
43
+ if not os.path.isabs(video_info_file):
44
+ self.video_info_file = project_path / video_info_file
45
+ else:
46
+ self.video_info_file = Path(video_info_file)
47
+
48
+ async def save_video_info(self, video_info: dict) -> Dict[str, dict]:
49
+ self.video_info_file.parent.mkdir(parents=True, exist_ok=True)
50
+ video_info_ = json.dumps(video_info, ensure_ascii=False, indent=2)
51
+ async with aiofiles.open(self.video_info_file.as_posix(), "w", encoding="utf-8") as f:
52
+ await f.write(f"{video_info_}\n")
53
+ return video_info
54
+
55
+ async def load_video_info(self) -> Dict[str, dict]:
56
+ video_info = dict()
57
+ if self.video_info_file.exists():
58
+ async with aiofiles.open(self.video_info_file.as_posix(), "r", encoding="utf-8") as f:
59
+ data = await f.read()
60
+ video_info: dict = json.loads(data)
61
+ return video_info
62
+
63
+ async def do_task(self):
64
+ video_info = await self.load_video_info()
65
+ print(self.video_info_file)
66
+
67
+ new_video_info = copy.deepcopy(video_info)
68
+ for k, v in video_info.items():
69
+ create_time = v["create_time"]
70
+ filename = v["filename"]
71
+ # url_list = v["url_list"]
72
+ # video_id = v["video_id"]
73
+ title = v["title"]
74
+ desc = v["desc"]
75
+ tags = v["tags"]
76
+
77
+ format_convert_output_file = v.get("format_convert_output_file")
78
+ if format_convert_output_file is None:
79
+ logger.info(f"{self.flag}视频格式转换开始:{filename}")
80
+ output_file = self.convert_fmt(filename)
81
+ if self.remove_after_upload:
82
+ logger.info(f"{self.flag}删除本地文件:{filename}")
83
+ os.remove(filename)
84
+ logger.info(f"{self.flag}视频格式转换成功:{filename}")
85
+ v["format_convert_output_file"] = output_file
86
+ new_video_info[k] = v
87
+ await self.save_video_info(new_video_info)
88
+
89
+ def convert_fmt(self, filename: str) -> str:
90
+ input_file = Path(filename)
91
+
92
+ tgt_suffix_ = None
93
+ for src_suffix, tgt_suffix in self.format_pairs:
94
+ src_suffix = str(src_suffix).lstrip(".")
95
+ tgt_suffix = str(tgt_suffix).lstrip(".")
96
+
97
+ if input_file.with_suffix(f".{src_suffix}"):
98
+ tgt_suffix_ = tgt_suffix
99
+
100
+ if tgt_suffix_ is None:
101
+ return input_file.as_posix()
102
+
103
+ output_file = input_file.with_suffix(f".{tgt_suffix_}")
104
+ output_file = self.target_dir / output_file.name
105
+ output_file.parent.mkdir(parents=True, exist_ok=True)
106
+
107
+ idx = uuid.uuid4()
108
+ input_file_ = input_file.parent / f"{idx}{input_file.suffix}"
109
+ output_file_ = output_file.parent / f"{idx}{output_file.suffix}"
110
+ os.rename(input_file.as_posix(), input_file_.as_posix())
111
+
112
+ command = [
113
+ "ffmpeg",
114
+ "-i", input_file_.as_posix(),
115
+ "-c:v", "copy", # 视频流不重新编码,速度快
116
+ "-c:a", "aac", # 音频流转换为 AAC(mp4 通用)
117
+ "-strict", "experimental", # 某些 ffmpeg 版本需要
118
+ "-y",
119
+ output_file_.as_posix()
120
+ ]
121
+
122
+ try:
123
+ subprocess.run(command, check=True)
124
+ logger.info(f"convert fmt success; output_file: {output_file}")
125
+ except subprocess.CalledProcessError as error:
126
+ logger.info(f"convert fmt failed; error type: {type(error)}, error text: {str(error)}")
127
+ finally:
128
+ os.rename(input_file_.as_posix(), input_file.as_posix())
129
+ os.rename(output_file_.as_posix(), output_file.as_posix())
130
+
131
+ return output_file.absolute().as_posix()
132
+
133
+
134
+ if __name__ == "__main__":
135
+ pass
toolbox/porter/tasks/youtube_video_download_task.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ #!/usr/bin/python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+
5
+ if __name__ == "__main__":
6
+ pass
toolbox/youtube_spider/__init__.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ #!/usr/bin/python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+
5
+ if __name__ == "__main__":
6
+ pass
toolbox/youtube_spider/video/__init__.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ #!/usr/bin/python3
2
+ # -*- coding: utf-8 -*-
3
+
4
+
5
+ if __name__ == "__main__":
6
+ pass
toolbox/youtube_spider/video/video_download.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/python3
2
+ # -*- coding: utf-8 -*-
3
+ import logging
4
+ from pathlib import Path
5
+ import time, random
6
+
7
+ from pytubefix import YouTube
8
+ import yt_dlp
9
+
10
+ from toolbox.youtube_spider.youtube_spider_client import YoutubeSpiderClient
11
+
12
+ logger = logging.getLogger("toolbox")
13
+
14
+
15
+ class YoutubeVideoDownloadSpider(YoutubeSpiderClient):
16
+ def __init__(self):
17
+ super(YoutubeVideoDownloadSpider, self).__init__()
18
+ self.last_download_time_ts = time.time()
19
+ self.min_download_delta = 10
20
+
21
+ def delay_before_download(self):
22
+ now = time.time()
23
+ delta = now - self.last_download_time_ts
24
+ delay = self.min_download_delta - delta
25
+ delay = max(0, delay)
26
+ delay = delay + random.uniform(2, 5)
27
+ logger.info(f"Delay before downloading; delay: {delay}s.")
28
+ time.sleep(delay)
29
+ self.last_download_time_ts = time.time()
30
+ return delay
31
+
32
+ def download_by_video_id_by_pytube(self, video_id: str, target_file: str):
33
+ url = f"https://www.youtube.com/watch?v={video_id}"
34
+
35
+ yt = YouTube(url)
36
+ stream = yt.streams.get_highest_resolution()
37
+
38
+ target_file = Path(target_file)
39
+ target_file.parent.mkdir(parents=True, exist_ok=True)
40
+
41
+ self.delay_before_download()
42
+ logger.info(f"Downloading...")
43
+ stream.download(
44
+ output_path=target_file.parent.as_posix(),
45
+ filename=target_file.name
46
+ )
47
+ return target_file.as_posix()
48
+
49
+ def download_by_video_id_by_yt_dlp(self, video_id: str, target_file: str):
50
+ # 如果被检测为机器人bot,换个VPN或IP就可以了。
51
+ url = f"https://www.youtube.com/watch?v={video_id}"
52
+ # https://www.youtube.com/watch?v=e0QoiTSlwKY
53
+
54
+ target_file = Path(target_file)
55
+ target_file.parent.mkdir(parents=True, exist_ok=True)
56
+
57
+ ydl_opts = {
58
+ 'outtmpl': target_file.as_posix(),
59
+ 'format': 'bestvideo+bestaudio/best', # 下载最佳视频+音频
60
+ 'merge_output_format': 'mp4', # 合并成 mp4
61
+ 'noplaylist': True, # 只下载单个视频,不下载播放列表
62
+ }
63
+
64
+ self.delay_before_download()
65
+ logger.info(f"Downloading...")
66
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
67
+ ydl.download([url])
68
+ return target_file.as_posix()
69
+
70
+
71
+ def main():
72
+ from pytube import YouTube
73
+
74
+ client = YoutubeVideoDownloadSpider()
75
+
76
+ # {'video_id': 'e0QoiTSlwKY', 'title': '12. 奶茶咖啡赛道 上 (产品端设计)', 'description': None, 'published_time_text': '2 weeks ago', 'video_length_text': '4:27'}
77
+ client.download_by_video_id_by_yt_dlp(video_id="e0QoiTSlwKY", target_file="./你好吗.mp4")
78
+ return
79
+
80
+
81
+ if __name__ == "__main__":
82
+ main()
toolbox/youtube_spider/video/video_list.py ADDED
@@ -0,0 +1,196 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/python3
2
+ # -*- coding: utf-8 -*-
3
+ import json
4
+ import re
5
+
6
+ from bs4 import BeautifulSoup
7
+ import requests
8
+ from pandas.core.reshape.util import tile_compat
9
+ from streamlink_cli.main import handle_url
10
+
11
+ from toolbox.youtube_spider.youtube_spider_client import YoutubeSpiderClient
12
+
13
+
14
+ class YoutubeVideoListSpider(YoutubeSpiderClient):
15
+ def __init__(self):
16
+ super(YoutubeVideoListSpider, self).__init__()
17
+
18
+ def get_front_page_video_list(self, channel_handle: str):
19
+ # url = "https://www.youtube.com/@JasonBear131/videos"
20
+ url = f"https://www.youtube.com/@{channel_handle}/videos"
21
+
22
+ response = requests.request(
23
+ "GET",
24
+ url=url
25
+ )
26
+
27
+ if response.status_code != 200:
28
+ raise AssertionError(f"request failed, status_code: {response.status_code}, text: {response.text}")
29
+
30
+ html_text = response.text
31
+
32
+ pattern = re.compile(r"ytInitialData\s*=\s*{")
33
+ match = pattern.search(html_text)
34
+ if not match:
35
+ raise AssertionError
36
+
37
+ start_index = match.end() - 1 # 指向第一个 {
38
+ # 手动做大括号匹配
39
+ brace_count = 0
40
+ end_index = start_index
41
+
42
+ for i in range(start_index, len(html_text)):
43
+ if html_text[i] == "{":
44
+ brace_count += 1
45
+ elif html_text[i] == "}":
46
+ brace_count -= 1
47
+
48
+ if brace_count == 0:
49
+ end_index = i + 1
50
+ break
51
+
52
+ json_str = html_text[start_index:end_index]
53
+ # print(json_str)
54
+ js = json.loads(json_str)
55
+
56
+ return js
57
+
58
+ def get_front_page_video_list_pretty(self, channel_handle: str):
59
+ js = self.get_front_page_video_list(channel_handle)
60
+
61
+ tabs = js["contents"]["twoColumnBrowseResultsRenderer"]["tabs"]
62
+
63
+ rich_grid_renderers = list()
64
+ for tab in tabs:
65
+ if len(tab.keys()) != 1:
66
+ raise AssertionError()
67
+ if "tabRenderer" in tab.keys():
68
+ tab_renderer = tab["tabRenderer"]
69
+ title = tab_renderer["title"]
70
+ if title == "Videos":
71
+ rich_grid_renderers = tab_renderer["content"]["richGridRenderer"]["contents"]
72
+ break
73
+ elif "expandableTabRenderer" in tab.keys():
74
+ expandable_tab_renderer = tab["expandableTabRenderer"]
75
+ else:
76
+ raise AssertionError()
77
+
78
+ video_grid_renderers = rich_grid_renderers[:-1]
79
+ continuation_grid_renderer = rich_grid_renderers[-1]
80
+
81
+ video_list = list()
82
+ for video_grid in video_grid_renderers:
83
+ video_renderer = video_grid["richItemRenderer"]["content"]["videoRenderer"]
84
+ # print(json.dumps(video_renderer, ensure_ascii=False, indent=4))
85
+
86
+ video_id = video_renderer["videoId"]
87
+ title = video_renderer["title"]["runs"][0]["text"]
88
+ published_time_text = video_renderer["publishedTimeText"]["simpleText"]
89
+ video_length_text = video_renderer["lengthText"]["simpleText"]
90
+
91
+ # print(f"video_id: {video_id}")
92
+ # print(f"title: {title}")
93
+ # print(f"published_time_text: {published_time_text}")
94
+ # print(f"video_length_text: {video_length_text}")
95
+ # print("-" * 75)
96
+ video_list.append({
97
+ "video_id": video_id,
98
+ "title": title,
99
+ "description": None,
100
+ "published_time_text": published_time_text,
101
+ "video_length_text": video_length_text,
102
+ })
103
+
104
+ # print(json.dumps(continuation_grid_renderer, ensure_ascii=False, indent=4))
105
+ continuation_item_renderer = continuation_grid_renderer["continuationItemRenderer"]
106
+ continuation_token = continuation_item_renderer["continuationEndpoint"]["continuationCommand"]["token"]
107
+ return video_list, continuation_token
108
+
109
+ def get_continuation_page_video_list(self, continuation_token: str):
110
+ url = "https://www.youtube.com/youtubei/v1/browse"
111
+
112
+ params = {
113
+ "prettyPrint": "false",
114
+ }
115
+
116
+ data = {
117
+ "context": {
118
+ "client": {
119
+ "clientName": "WEB",
120
+ "clientVersion": "2.20251222.04.00",
121
+ },
122
+ },
123
+ "continuation": continuation_token,
124
+ }
125
+
126
+ response = requests.request(
127
+ "POST",
128
+ url=url,
129
+ headers=self.headers,
130
+ params=params,
131
+ data=json.dumps(data),
132
+ )
133
+
134
+ if response.status_code != 200:
135
+ raise AssertionError(f"request failed, status_code: {response.status_code}, text: {response.text}")
136
+
137
+ js = response.json()
138
+ return js
139
+
140
+ def get_continuation_page_video_list_pretty(self, continuation_token: str):
141
+ js = self.get_continuation_page_video_list(continuation_token)
142
+
143
+
144
+ continuation_items = js["onResponseReceivedActions"][0]["appendContinuationItemsAction"]["continuationItems"]
145
+
146
+ video_grid_renderers = continuation_items[:-1]
147
+ continuation_grid_renderer = continuation_items[-1]
148
+
149
+ video_list = list()
150
+ for video_grid in video_grid_renderers:
151
+ video_renderer = video_grid["richItemRenderer"]["content"]["videoRenderer"]
152
+ # print(json.dumps(video_renderer, ensure_ascii=False, indent=4))
153
+
154
+ video_id = video_renderer["videoId"]
155
+ title = video_renderer["title"]["runs"][0]["text"]
156
+ published_time_text = video_renderer["publishedTimeText"]["simpleText"]
157
+ video_length_text = video_renderer["lengthText"]["simpleText"]
158
+
159
+ description = None
160
+ description_snippet = video_renderer.get("descriptionSnippet")
161
+ if description_snippet is not None:
162
+ description = description_snippet["runs"][0]["text"]
163
+
164
+ video_list.append({
165
+ "video_id": video_id,
166
+ "title": title,
167
+ "description": description,
168
+ "published_time_text": published_time_text,
169
+ "video_length_text": video_length_text,
170
+ })
171
+
172
+ # print(json.dumps(continuation_grid_renderer, ensure_ascii=False, indent=4))
173
+ continuation_item_renderer = continuation_grid_renderer["continuationItemRenderer"]
174
+ continuation_token = continuation_item_renderer["continuationEndpoint"]["continuationCommand"]["token"]
175
+ return video_list, continuation_token
176
+
177
+
178
+ def main():
179
+
180
+ client = YoutubeVideoListSpider()
181
+
182
+ video_list, continuation_token = client.get_front_page_video_list_pretty("JasonBear131")
183
+ for video in video_list:
184
+ print(video)
185
+ print(continuation_token)
186
+
187
+ video_list, continuation_token = client.get_continuation_page_video_list_pretty(continuation_token)
188
+ for video in video_list:
189
+ print(video)
190
+ print(continuation_token)
191
+
192
+ return
193
+
194
+
195
+ if __name__ == "__main__":
196
+ main()
toolbox/youtube_spider/youtube_spider_client.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/python3
2
+ # -*- coding: utf-8 -*-
3
+ import json
4
+ import os
5
+ import argparse
6
+ from pathlib import Path
7
+ import httpx
8
+ import logging
9
+ import requests
10
+ from typing import List
11
+
12
+ from project_settings import project_path, environment
13
+ from toolbox.design_patterns.singleton import ParamsSingleton
14
+
15
+ logger = logging.getLogger("toolbox")
16
+
17
+
18
+ class YoutubeSpiderClient(ParamsSingleton):
19
+ headers = {
20
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
21
+ }
22
+
23
+ def __init__(self):
24
+ if not self._initialized:
25
+ self.credentials = None
26
+ self.cookies = None
27
+
28
+ self._session = requests.Session()
29
+ self._async_session = httpx.AsyncClient(
30
+ http2=True,
31
+ limits=httpx.Limits(max_keepalive_connections=100, keepalive_expiry=100),
32
+ headers=self.headers,
33
+ cookies=self.cookies,
34
+ )
35
+ self._initialized = True
36
+
37
+ @property
38
+ def session(self):
39
+ return self._session
40
+
41
+ @property
42
+ def async_session(self):
43
+ return self._async_session
44
+
45
+
46
+ if __name__ == "__main__":
47
+ pass