Spaces:
Sleeping
Sleeping
from youtube_transcript_api._errors import NoTranscriptFound
Browse files
app.py
CHANGED
|
@@ -6,7 +6,10 @@ from docx import Document
|
|
| 6 |
import os
|
| 7 |
from openai import OpenAI
|
| 8 |
import json
|
|
|
|
| 9 |
from youtube_transcript_api import YouTubeTranscriptApi
|
|
|
|
|
|
|
| 10 |
|
| 11 |
from moviepy.editor import VideoFileClip
|
| 12 |
from pytube import YouTube
|
|
@@ -218,6 +221,16 @@ def extract_youtube_id(url):
|
|
| 218 |
else:
|
| 219 |
return None
|
| 220 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 221 |
def process_transcript_and_screenshots(video_id):
|
| 222 |
print("====process_transcript_and_screenshots====")
|
| 223 |
service = init_drive_service()
|
|
@@ -229,7 +242,11 @@ def process_transcript_and_screenshots(video_id):
|
|
| 229 |
exists, file_id = check_file_exists(service, folder_id, file_name)
|
| 230 |
if not exists:
|
| 231 |
# 从YouTube获取逐字稿并上传
|
| 232 |
-
transcript =
|
|
|
|
|
|
|
|
|
|
|
|
|
| 233 |
transcript_text = json.dumps(transcript, ensure_ascii=False, indent=2)
|
| 234 |
file_id = upload_content_directly(service, file_name, folder_id, transcript_text)
|
| 235 |
print("逐字稿已上传到Google Drive")
|
|
|
|
| 6 |
import os
|
| 7 |
from openai import OpenAI
|
| 8 |
import json
|
| 9 |
+
|
| 10 |
from youtube_transcript_api import YouTubeTranscriptApi
|
| 11 |
+
from youtube_transcript_api._errors import NoTranscriptFound
|
| 12 |
+
|
| 13 |
|
| 14 |
from moviepy.editor import VideoFileClip
|
| 15 |
from pytube import YouTube
|
|
|
|
| 221 |
else:
|
| 222 |
return None
|
| 223 |
|
| 224 |
+
def get_transcript(video_id):
|
| 225 |
+
languages = ['zh-TW', 'zh-Hant', 'en'] # 優先順序列表
|
| 226 |
+
for language in languages:
|
| 227 |
+
try:
|
| 228 |
+
transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=[language])
|
| 229 |
+
return transcript # 成功獲取字幕,直接返回結果
|
| 230 |
+
except NoTranscriptFound:
|
| 231 |
+
continue # 當前語言的字幕沒有找到,繼續嘗試下一個語言
|
| 232 |
+
return None # 所有嘗試都失敗,返回None
|
| 233 |
+
|
| 234 |
def process_transcript_and_screenshots(video_id):
|
| 235 |
print("====process_transcript_and_screenshots====")
|
| 236 |
service = init_drive_service()
|
|
|
|
| 242 |
exists, file_id = check_file_exists(service, folder_id, file_name)
|
| 243 |
if not exists:
|
| 244 |
# 从YouTube获取逐字稿并上传
|
| 245 |
+
transcript = get_transcript(video_id)
|
| 246 |
+
if transcript:
|
| 247 |
+
print("成功獲取字幕")
|
| 248 |
+
else:
|
| 249 |
+
print("沒有找到字幕")
|
| 250 |
transcript_text = json.dumps(transcript, ensure_ascii=False, indent=2)
|
| 251 |
file_id = upload_content_directly(service, file_name, folder_id, transcript_text)
|
| 252 |
print("逐字稿已上传到Google Drive")
|