HoneyTian commited on
Commit
96f3bb9
·
1 Parent(s): 38976a3
main.py CHANGED
@@ -46,6 +46,7 @@ from toolbox.os.command import Command
46
  from toolbox.porter.manager import PorterManager
47
  from tabs.fs_tab import get_fs_tab
48
  from tabs.shell_tab import get_shell_tab
 
49
  from tabs.youtube_player_tab import get_youtube_player_tab
50
 
51
 
@@ -136,6 +137,7 @@ def main():
136
  with gr.Tabs():
137
  _ = get_fs_tab()
138
  _ = get_shell_tab()
 
139
  _ = get_youtube_player_tab()
140
 
141
  # http://127.0.0.1:7870/
 
46
  from toolbox.porter.manager import PorterManager
47
  from tabs.fs_tab import get_fs_tab
48
  from tabs.shell_tab import get_shell_tab
49
+ from tabs.video_download import get_video_download_tab
50
  from tabs.youtube_player_tab import get_youtube_player_tab
51
 
52
 
 
137
  with gr.Tabs():
138
  _ = get_fs_tab()
139
  _ = get_shell_tab()
140
+ _ = get_video_download_tab()
141
  _ = get_youtube_player_tab()
142
 
143
  # http://127.0.0.1:7870/
tabs/fs_tab.py CHANGED
@@ -6,12 +6,14 @@ from project_settings import project_path
6
 
7
 
8
  def get_fs_tab():
 
9
  with gr.TabItem("fs"):
10
  with gr.Row():
11
  with gr.Column(scale=3):
12
  fs_filename = gr.Textbox(label="filename", max_lines=10)
13
  fs_file = gr.File(label="file")
14
- fs_file_dir = gr.Textbox(value="data/tasks", label="file_dir")
 
15
  fs_query = gr.Button("query", variant="primary")
16
  with gr.Column(scale=7):
17
  fs_filelist_dataset_state = gr.State(value=[])
 
6
 
7
 
8
  def get_fs_tab():
9
+ fs_file_dir_choices = ["data/tasks", "data/video"]
10
  with gr.TabItem("fs"):
11
  with gr.Row():
12
  with gr.Column(scale=3):
13
  fs_filename = gr.Textbox(label="filename", max_lines=10)
14
  fs_file = gr.File(label="file")
15
+ fs_file_dir = gr.Dropdown(choices=fs_file_dir_choices, value=fs_file_dir_choices[0],
16
+ allow_custom_value=True, label="file_dir")
17
  fs_query = gr.Button("query", variant="primary")
18
  with gr.Column(scale=7):
19
  fs_filelist_dataset_state = gr.State(value=[])
tabs/video_download.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/python3
2
+ # -*- coding: utf-8 -*-
3
+ import os
4
+ from pathlib import Path
5
+
6
+ import gradio as gr
7
+
8
+ from project_settings import project_path
9
+ from toolbox.douyin.video.download import VideoDownload
10
+
11
+
12
+ def get_client(platform: str):
13
+ platform = str(platform).lower()
14
+ if platform == "douyin":
15
+ client = VideoDownload()
16
+ else:
17
+ raise AssertionError(f"invalid platform: {platform}")
18
+ return client
19
+
20
+
21
+ def download_douyin_video(text: str, platform: str, filename: str):
22
+ if os.path.isabs(filename):
23
+ filename = Path(filename)
24
+ else:
25
+ filename = project_path / filename
26
+ filename.parent.mkdir(parents=True, exist_ok=True)
27
+
28
+ client = get_client(platform)
29
+
30
+ client.download_video_by_share_text(text, filename)
31
+ return filename
32
+
33
+
34
+ def get_video_download_tab():
35
+ download_platform_choices = ["Douyin"]
36
+
37
+ share_text = """6.66 g@B.TL 01/22 pDH:/ 骆驼祥子的大结局,是普通人无法逃脱的命运吗 # 老舍 # 骆驼祥子 https://v.douyin.com/Bocl1I_wcdg/ 复制此链接,打开Dou音搜索,直接观看视频!"""
38
+
39
+ with gr.TabItem("download"):
40
+ with gr.Row():
41
+ with gr.Column(scale=3):
42
+ download_text = gr.Textbox(value=share_text, label="text", lines=3, max_lines=20)
43
+ download_platform = gr.Dropdown(choices=download_platform_choices, value=download_platform_choices[0], label="platform")
44
+ download_filename = gr.Textbox(value="data/video/download/video.mp4", label="filename")
45
+
46
+ download_button = gr.Button(value="download", variant="primary")
47
+ with gr.Column(scale=3):
48
+ download_output_file = gr.Textbox(label="output_file")
49
+
50
+ download_button.click(
51
+ download_douyin_video,
52
+ inputs=[download_text, download_platform, download_filename],
53
+ outputs=[download_output_file],
54
+ )
55
+
56
+ return locals()
57
+
58
+
59
+ if __name__ == "__main__":
60
+ pass
toolbox/douyin/video/download.py CHANGED
@@ -6,6 +6,8 @@ from datetime import datetime
6
  import json
7
  import logging
8
  from pathlib import Path
 
 
9
  from zoneinfo import ZoneInfo
10
 
11
  from tenacity import before_sleep_log, retry, retry_if_exception_type, stop_after_attempt, wait_fixed
@@ -306,6 +308,75 @@ class VideoDownload(DouyinClient):
306
  result.append(task)
307
  return result
308
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
309
 
310
  def get_args():
311
  parser = argparse.ArgumentParser()
@@ -394,5 +465,19 @@ async def main2():
394
  return
395
 
396
 
 
 
 
 
 
 
 
 
 
 
 
 
 
397
  if __name__ == "__main__":
398
- asyncio.run(main2())
 
 
6
  import json
7
  import logging
8
  from pathlib import Path
9
+ import re
10
+ import requests
11
  from zoneinfo import ZoneInfo
12
 
13
  from tenacity import before_sleep_log, retry, retry_if_exception_type, stop_after_attempt, wait_fixed
 
308
  result.append(task)
309
  return result
310
 
311
+ @staticmethod
312
+ def get_share_url_by_share_text(text: str):
313
+ pattern = r"https://v\.douyin\.com/[A-Za-z0-9_\-]+/"
314
+
315
+ match = re.search(pattern, text, flags=re.IGNORECASE)
316
+ if match is None:
317
+ raise AssertionError(f"no share url found; text: {text}")
318
+ share_url = match.group(0)
319
+ return share_url
320
+
321
+ def get_video_download_url_by_share_url(self, share_url: str):
322
+ response = requests.request(
323
+ "GET",
324
+ url=share_url,
325
+ headers=self.headers
326
+ )
327
+ if response.status_code != 200:
328
+ raise AssertionError(f"invalid share_url: {share_url}, status_code: {response.status_code}")
329
+ video_id = response.url.split("?")[0].strip("/").split("/")[-1]
330
+ video_url = f"https://www.iesdouyin.com/share/video/{video_id}"
331
+ headers = {
332
+ "User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 17_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) EdgiOS/121.0.2277.107 Version/17.0 Mobile/15E148 Safari/604.1"
333
+ }
334
+ response = requests.request(
335
+ "GET",
336
+ url=video_url,
337
+ headers=headers
338
+ )
339
+ if response.status_code != 200:
340
+ raise AssertionError(f"request failed; video_url: {video_url}")
341
+
342
+ # 使用正则表达式提取视频信息
343
+ pattern = re.compile(
344
+ pattern=r"window\._ROUTER_DATA\s*=\s*(.*?)</script>",
345
+ flags=re.DOTALL
346
+ )
347
+ match = pattern.search(response.text)
348
+ if match is None:
349
+ raise AssertionError(f"pattern parse failed; text: {response.text}")
350
+
351
+ js = json.loads(match.group(1).strip())
352
+ data = js["loaderData"]["video_(id)/page"]["videoInfoRes"]["item_list"][0]
353
+ video_download_url = data["video"]["play_addr"]["url_list"][0].replace("playwm", "play")
354
+ return video_download_url
355
+
356
+ def download_video_by_video_download_url(self, video_download_url: str, filename: str):
357
+ headers = {
358
+ "User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 17_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) EdgiOS/121.0.2277.107 Version/17.0 Mobile/15E148 Safari/604.1"
359
+ }
360
+ response = requests.request(
361
+ "GET",
362
+ url=video_download_url,
363
+ headers=headers,
364
+ stream=True
365
+ )
366
+ if response.status_code != 200:
367
+ raise AssertionError(f"request failed; status_code: {response.status_code}, url: {video_download_url}")
368
+
369
+ with open(filename, "wb") as f:
370
+ for chunk in response.iter_content(chunk_size=8192):
371
+ f.write(chunk)
372
+ return filename
373
+
374
+ def download_video_by_share_text(self, text: str, filename: str):
375
+ share_url = self.get_share_url_by_share_text(text)
376
+ video_download_url = self.get_video_download_url_by_share_url(share_url)
377
+ self.download_video_by_video_download_url(video_download_url, filename)
378
+ return filename
379
+
380
 
381
  def get_args():
382
  parser = argparse.ArgumentParser()
 
465
  return
466
 
467
 
468
+ def main3():
469
+ client = VideoDownload()
470
+
471
+ text = """
472
+ 6.66 g@B.TL 01/22 pDH:/ 骆驼祥子的大结局,是普通人无法逃脱的命运吗 # 老舍 # 骆驼祥子 https://v.douyin.com/Bocl1I_wcdg/ 复制此链接,打开Dou音搜索,直接观看视频!
473
+ """
474
+
475
+ filename = client.download_video_by_share_text(text, "test.mp4")
476
+ print(filename)
477
+
478
+ return
479
+
480
+
481
  if __name__ == "__main__":
482
+ # asyncio.run(main2())
483
+ main3()