Spaces:
Sleeping
Sleeping
folder_id = create_folder_if_not_exists(service, video_id, parent_folder_id)
Browse files
app.py
CHANGED
|
@@ -14,6 +14,8 @@ import os
|
|
| 14 |
|
| 15 |
from google.oauth2 import service_account
|
| 16 |
from googleapiclient.discovery import build
|
|
|
|
|
|
|
| 17 |
|
| 18 |
from urllib.parse import urlparse, parse_qs
|
| 19 |
|
|
@@ -55,6 +57,24 @@ def init_drive_service():
|
|
| 55 |
service = build('drive', 'v3', credentials=credentials)
|
| 56 |
return service
|
| 57 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
# 检查Google Drive上是否存在文件
|
| 59 |
def check_file_exists(service, folder_name, file_name):
|
| 60 |
query = f"name = '{file_name}' and '{folder_name}' in parents and trashed = false"
|
|
@@ -136,17 +156,20 @@ def process_youtube_link(link):
|
|
| 136 |
# 假设您已经获取了 YouTube 视频的逐字稿并存储在变量 `transcript` 中
|
| 137 |
video_id = extract_youtube_id(link)
|
| 138 |
service = init_drive_service()
|
| 139 |
-
|
|
|
|
|
|
|
|
|
|
| 140 |
file_name = f"{video_id}_transcript.txt"
|
| 141 |
|
| 142 |
# 检查逐字稿是否存在
|
| 143 |
-
exists, file_id = check_file_exists(service,
|
| 144 |
if not exists:
|
| 145 |
# 获取逐字稿
|
| 146 |
transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'])
|
| 147 |
transcript_text = "\n".join([f"{item['start']}: {item['text']}" for item in transcript])
|
| 148 |
# 上传到Google Drive
|
| 149 |
-
upload_to_drive(service, file_name,
|
| 150 |
print("逐字稿已上传到Google Drive")
|
| 151 |
else:
|
| 152 |
print("逐字稿已存在于Google Drive中")
|
|
|
|
| 14 |
|
| 15 |
from google.oauth2 import service_account
|
| 16 |
from googleapiclient.discovery import build
|
| 17 |
+
from googleapiclient.http import MediaFileUpload
|
| 18 |
+
|
| 19 |
|
| 20 |
from urllib.parse import urlparse, parse_qs
|
| 21 |
|
|
|
|
| 57 |
service = build('drive', 'v3', credentials=credentials)
|
| 58 |
return service
|
| 59 |
|
| 60 |
+
def create_folder_if_not_exists(service, folder_name, parent_id):
|
| 61 |
+
"""检查是否存在特定名称的文件夹,如果不存在则创建"""
|
| 62 |
+
query = f"mimeType='application/vnd.google-apps.folder' and name='{folder_name}' and '{parent_id}' in parents and trashed=false"
|
| 63 |
+
response = service.files().list(q=query, spaces='drive', fields="files(id, name)").execute()
|
| 64 |
+
folders = response.get('files', [])
|
| 65 |
+
if not folders:
|
| 66 |
+
# 文件夹不存在,创建新文件夹
|
| 67 |
+
file_metadata = {
|
| 68 |
+
'name': folder_name,
|
| 69 |
+
'mimeType': 'application/vnd.google-apps.folder',
|
| 70 |
+
'parents': [parent_id]
|
| 71 |
+
}
|
| 72 |
+
folder = service.files().create(body=file_metadata, fields='id').execute()
|
| 73 |
+
return folder.get('id')
|
| 74 |
+
else:
|
| 75 |
+
# 文件夹已存在
|
| 76 |
+
return folders[0]['id']
|
| 77 |
+
|
| 78 |
# 检查Google Drive上是否存在文件
|
| 79 |
def check_file_exists(service, folder_name, file_name):
|
| 80 |
query = f"name = '{file_name}' and '{folder_name}' in parents and trashed = false"
|
|
|
|
| 156 |
# 假设您已经获取了 YouTube 视频的逐字稿并存储在变量 `transcript` 中
|
| 157 |
video_id = extract_youtube_id(link)
|
| 158 |
service = init_drive_service()
|
| 159 |
+
parent_folder_id = '1GgI4YVs0KckwStVQkLa1NZ8IpaEMurkL' # youtube逐字稿圖檔的ID
|
| 160 |
+
|
| 161 |
+
# 检查/创建视频ID命名的子文件夹
|
| 162 |
+
folder_id = create_folder_if_not_exists(service, video_id, parent_folder_id)
|
| 163 |
file_name = f"{video_id}_transcript.txt"
|
| 164 |
|
| 165 |
# 检查逐字稿是否存在
|
| 166 |
+
exists, file_id = check_file_exists(service, folder_id, file_name)
|
| 167 |
if not exists:
|
| 168 |
# 获取逐字稿
|
| 169 |
transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['en'])
|
| 170 |
transcript_text = "\n".join([f"{item['start']}: {item['text']}" for item in transcript])
|
| 171 |
# 上传到Google Drive
|
| 172 |
+
upload_to_drive(service, file_name, folder_id, transcript_text)
|
| 173 |
print("逐字稿已上传到Google Drive")
|
| 174 |
else:
|
| 175 |
print("逐字稿已存在于Google Drive中")
|