Spaces:
Paused
Paused
update
Browse files- main.py +2 -0
- tabs/fs_tab.py +3 -1
- tabs/video_download.py +60 -0
- toolbox/douyin/video/download.py +86 -1
main.py
CHANGED
|
@@ -46,6 +46,7 @@ from toolbox.os.command import Command
|
|
| 46 |
from toolbox.porter.manager import PorterManager
|
| 47 |
from tabs.fs_tab import get_fs_tab
|
| 48 |
from tabs.shell_tab import get_shell_tab
|
|
|
|
| 49 |
from tabs.youtube_player_tab import get_youtube_player_tab
|
| 50 |
|
| 51 |
|
|
@@ -136,6 +137,7 @@ def main():
|
|
| 136 |
with gr.Tabs():
|
| 137 |
_ = get_fs_tab()
|
| 138 |
_ = get_shell_tab()
|
|
|
|
| 139 |
_ = get_youtube_player_tab()
|
| 140 |
|
| 141 |
# http://127.0.0.1:7870/
|
|
|
|
| 46 |
from toolbox.porter.manager import PorterManager
|
| 47 |
from tabs.fs_tab import get_fs_tab
|
| 48 |
from tabs.shell_tab import get_shell_tab
|
| 49 |
+
from tabs.video_download import get_video_download_tab
|
| 50 |
from tabs.youtube_player_tab import get_youtube_player_tab
|
| 51 |
|
| 52 |
|
|
|
|
| 137 |
with gr.Tabs():
|
| 138 |
_ = get_fs_tab()
|
| 139 |
_ = get_shell_tab()
|
| 140 |
+
_ = get_video_download_tab()
|
| 141 |
_ = get_youtube_player_tab()
|
| 142 |
|
| 143 |
# http://127.0.0.1:7870/
|
tabs/fs_tab.py
CHANGED
|
@@ -6,12 +6,14 @@ from project_settings import project_path
|
|
| 6 |
|
| 7 |
|
| 8 |
def get_fs_tab():
|
|
|
|
| 9 |
with gr.TabItem("fs"):
|
| 10 |
with gr.Row():
|
| 11 |
with gr.Column(scale=3):
|
| 12 |
fs_filename = gr.Textbox(label="filename", max_lines=10)
|
| 13 |
fs_file = gr.File(label="file")
|
| 14 |
-
fs_file_dir = gr.
|
|
|
|
| 15 |
fs_query = gr.Button("query", variant="primary")
|
| 16 |
with gr.Column(scale=7):
|
| 17 |
fs_filelist_dataset_state = gr.State(value=[])
|
|
|
|
| 6 |
|
| 7 |
|
| 8 |
def get_fs_tab():
|
| 9 |
+
fs_file_dir_choices = ["data/tasks", "data/video"]
|
| 10 |
with gr.TabItem("fs"):
|
| 11 |
with gr.Row():
|
| 12 |
with gr.Column(scale=3):
|
| 13 |
fs_filename = gr.Textbox(label="filename", max_lines=10)
|
| 14 |
fs_file = gr.File(label="file")
|
| 15 |
+
fs_file_dir = gr.Dropdown(choices=fs_file_dir_choices, value=fs_file_dir_choices[0],
|
| 16 |
+
allow_custom_value=True, label="file_dir")
|
| 17 |
fs_query = gr.Button("query", variant="primary")
|
| 18 |
with gr.Column(scale=7):
|
| 19 |
fs_filelist_dataset_state = gr.State(value=[])
|
tabs/video_download.py
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/python3
|
| 2 |
+
# -*- coding: utf-8 -*-
|
| 3 |
+
import os
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
|
| 6 |
+
import gradio as gr
|
| 7 |
+
|
| 8 |
+
from project_settings import project_path
|
| 9 |
+
from toolbox.douyin.video.download import VideoDownload
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
def get_client(platform: str):
|
| 13 |
+
platform = str(platform).lower()
|
| 14 |
+
if platform == "douyin":
|
| 15 |
+
client = VideoDownload()
|
| 16 |
+
else:
|
| 17 |
+
raise AssertionError(f"invalid platform: {platform}")
|
| 18 |
+
return client
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
def download_douyin_video(text: str, platform: str, filename: str):
|
| 22 |
+
if os.path.isabs(filename):
|
| 23 |
+
filename = Path(filename)
|
| 24 |
+
else:
|
| 25 |
+
filename = project_path / filename
|
| 26 |
+
filename.parent.mkdir(parents=True, exist_ok=True)
|
| 27 |
+
|
| 28 |
+
client = get_client(platform)
|
| 29 |
+
|
| 30 |
+
client.download_video_by_share_text(text, filename)
|
| 31 |
+
return filename
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def get_video_download_tab():
|
| 35 |
+
download_platform_choices = ["Douyin"]
|
| 36 |
+
|
| 37 |
+
share_text = """6.66 g@B.TL 01/22 pDH:/ 骆驼祥子的大结局,是普通人无法逃脱的命运吗 # 老舍 # 骆驼祥子 https://v.douyin.com/Bocl1I_wcdg/ 复制此链接,打开Dou音搜索,直接观看视频!"""
|
| 38 |
+
|
| 39 |
+
with gr.TabItem("download"):
|
| 40 |
+
with gr.Row():
|
| 41 |
+
with gr.Column(scale=3):
|
| 42 |
+
download_text = gr.Textbox(value=share_text, label="text", lines=3, max_lines=20)
|
| 43 |
+
download_platform = gr.Dropdown(choices=download_platform_choices, value=download_platform_choices[0], label="platform")
|
| 44 |
+
download_filename = gr.Textbox(value="data/video/download/video.mp4", label="filename")
|
| 45 |
+
|
| 46 |
+
download_button = gr.Button(value="download", variant="primary")
|
| 47 |
+
with gr.Column(scale=3):
|
| 48 |
+
download_output_file = gr.Textbox(label="output_file")
|
| 49 |
+
|
| 50 |
+
download_button.click(
|
| 51 |
+
download_douyin_video,
|
| 52 |
+
inputs=[download_text, download_platform, download_filename],
|
| 53 |
+
outputs=[download_output_file],
|
| 54 |
+
)
|
| 55 |
+
|
| 56 |
+
return locals()
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
if __name__ == "__main__":
|
| 60 |
+
pass
|
toolbox/douyin/video/download.py
CHANGED
|
@@ -6,6 +6,8 @@ from datetime import datetime
|
|
| 6 |
import json
|
| 7 |
import logging
|
| 8 |
from pathlib import Path
|
|
|
|
|
|
|
| 9 |
from zoneinfo import ZoneInfo
|
| 10 |
|
| 11 |
from tenacity import before_sleep_log, retry, retry_if_exception_type, stop_after_attempt, wait_fixed
|
|
@@ -306,6 +308,75 @@ class VideoDownload(DouyinClient):
|
|
| 306 |
result.append(task)
|
| 307 |
return result
|
| 308 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 309 |
|
| 310 |
def get_args():
|
| 311 |
parser = argparse.ArgumentParser()
|
|
@@ -394,5 +465,19 @@ async def main2():
|
|
| 394 |
return
|
| 395 |
|
| 396 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 397 |
if __name__ == "__main__":
|
| 398 |
-
asyncio.run(main2())
|
|
|
|
|
|
| 6 |
import json
|
| 7 |
import logging
|
| 8 |
from pathlib import Path
|
| 9 |
+
import re
|
| 10 |
+
import requests
|
| 11 |
from zoneinfo import ZoneInfo
|
| 12 |
|
| 13 |
from tenacity import before_sleep_log, retry, retry_if_exception_type, stop_after_attempt, wait_fixed
|
|
|
|
| 308 |
result.append(task)
|
| 309 |
return result
|
| 310 |
|
| 311 |
+
@staticmethod
|
| 312 |
+
def get_share_url_by_share_text(text: str):
|
| 313 |
+
pattern = r"https://v\.douyin\.com/[A-Za-z0-9_\-]+/"
|
| 314 |
+
|
| 315 |
+
match = re.search(pattern, text, flags=re.IGNORECASE)
|
| 316 |
+
if match is None:
|
| 317 |
+
raise AssertionError(f"no share url found; text: {text}")
|
| 318 |
+
share_url = match.group(0)
|
| 319 |
+
return share_url
|
| 320 |
+
|
| 321 |
+
def get_video_download_url_by_share_url(self, share_url: str):
|
| 322 |
+
response = requests.request(
|
| 323 |
+
"GET",
|
| 324 |
+
url=share_url,
|
| 325 |
+
headers=self.headers
|
| 326 |
+
)
|
| 327 |
+
if response.status_code != 200:
|
| 328 |
+
raise AssertionError(f"invalid share_url: {share_url}, status_code: {response.status_code}")
|
| 329 |
+
video_id = response.url.split("?")[0].strip("/").split("/")[-1]
|
| 330 |
+
video_url = f"https://www.iesdouyin.com/share/video/{video_id}"
|
| 331 |
+
headers = {
|
| 332 |
+
"User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 17_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) EdgiOS/121.0.2277.107 Version/17.0 Mobile/15E148 Safari/604.1"
|
| 333 |
+
}
|
| 334 |
+
response = requests.request(
|
| 335 |
+
"GET",
|
| 336 |
+
url=video_url,
|
| 337 |
+
headers=headers
|
| 338 |
+
)
|
| 339 |
+
if response.status_code != 200:
|
| 340 |
+
raise AssertionError(f"request failed; video_url: {video_url}")
|
| 341 |
+
|
| 342 |
+
# 使用正则表达式提取视频信息
|
| 343 |
+
pattern = re.compile(
|
| 344 |
+
pattern=r"window\._ROUTER_DATA\s*=\s*(.*?)</script>",
|
| 345 |
+
flags=re.DOTALL
|
| 346 |
+
)
|
| 347 |
+
match = pattern.search(response.text)
|
| 348 |
+
if match is None:
|
| 349 |
+
raise AssertionError(f"pattern parse failed; text: {response.text}")
|
| 350 |
+
|
| 351 |
+
js = json.loads(match.group(1).strip())
|
| 352 |
+
data = js["loaderData"]["video_(id)/page"]["videoInfoRes"]["item_list"][0]
|
| 353 |
+
video_download_url = data["video"]["play_addr"]["url_list"][0].replace("playwm", "play")
|
| 354 |
+
return video_download_url
|
| 355 |
+
|
| 356 |
+
def download_video_by_video_download_url(self, video_download_url: str, filename: str):
|
| 357 |
+
headers = {
|
| 358 |
+
"User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 17_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) EdgiOS/121.0.2277.107 Version/17.0 Mobile/15E148 Safari/604.1"
|
| 359 |
+
}
|
| 360 |
+
response = requests.request(
|
| 361 |
+
"GET",
|
| 362 |
+
url=video_download_url,
|
| 363 |
+
headers=headers,
|
| 364 |
+
stream=True
|
| 365 |
+
)
|
| 366 |
+
if response.status_code != 200:
|
| 367 |
+
raise AssertionError(f"request failed; status_code: {response.status_code}, url: {video_download_url}")
|
| 368 |
+
|
| 369 |
+
with open(filename, "wb") as f:
|
| 370 |
+
for chunk in response.iter_content(chunk_size=8192):
|
| 371 |
+
f.write(chunk)
|
| 372 |
+
return filename
|
| 373 |
+
|
| 374 |
+
def download_video_by_share_text(self, text: str, filename: str):
|
| 375 |
+
share_url = self.get_share_url_by_share_text(text)
|
| 376 |
+
video_download_url = self.get_video_download_url_by_share_url(share_url)
|
| 377 |
+
self.download_video_by_video_download_url(video_download_url, filename)
|
| 378 |
+
return filename
|
| 379 |
+
|
| 380 |
|
| 381 |
def get_args():
|
| 382 |
parser = argparse.ArgumentParser()
|
|
|
|
| 465 |
return
|
| 466 |
|
| 467 |
|
| 468 |
+
def main3():
|
| 469 |
+
client = VideoDownload()
|
| 470 |
+
|
| 471 |
+
text = """
|
| 472 |
+
6.66 g@B.TL 01/22 pDH:/ 骆驼祥子的大结局,是普通人无法逃脱的命运吗 # 老舍 # 骆驼祥子 https://v.douyin.com/Bocl1I_wcdg/ 复制此链接,打开Dou音搜索,直接观看视频!
|
| 473 |
+
"""
|
| 474 |
+
|
| 475 |
+
filename = client.download_video_by_share_text(text, "test.mp4")
|
| 476 |
+
print(filename)
|
| 477 |
+
|
| 478 |
+
return
|
| 479 |
+
|
| 480 |
+
|
| 481 |
if __name__ == "__main__":
|
| 482 |
+
# asyncio.run(main2())
|
| 483 |
+
main3()
|