Spaces:
Sleeping
Sleeping
print("逐字稿已存在于Google Drive中")
Browse files
app.py
CHANGED
|
@@ -108,6 +108,19 @@ def upload_content_directly(service, file_name, folder_id, content):
|
|
| 108 |
# 执行上传
|
| 109 |
service.files().create(body=file_metadata, media_body=media, fields='id').execute()
|
| 110 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 111 |
|
| 112 |
def process_file(file):
|
| 113 |
# 读取文件
|
|
@@ -178,6 +191,7 @@ def process_youtube_link(link):
|
|
| 178 |
file_name = f"{video_id}_transcript.txt"
|
| 179 |
|
| 180 |
# 检查逐字稿是否存在
|
|
|
|
| 181 |
exists, file_id = check_file_exists(service, folder_id, file_name)
|
| 182 |
if not exists:
|
| 183 |
# 获取逐字稿
|
|
@@ -188,9 +202,9 @@ def process_youtube_link(link):
|
|
| 188 |
print("逐字稿已上传到Google Drive")
|
| 189 |
else:
|
| 190 |
print("逐字稿已存在于Google Drive中")
|
|
|
|
|
|
|
| 191 |
|
| 192 |
-
# 再取得 transcript
|
| 193 |
-
transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['zh-TW'])
|
| 194 |
# 基于逐字稿生成其他所需的输出
|
| 195 |
questions = generate_questions(transcript)
|
| 196 |
df_summarise = generate_df_summarise(transcript)
|
|
|
|
| 108 |
# 执行上传
|
| 109 |
service.files().create(body=file_metadata, media_body=media, fields='id').execute()
|
| 110 |
|
| 111 |
+
def download_file_as_string(service, file_id):
|
| 112 |
+
"""
|
| 113 |
+
从Google Drive下载文件并将其作为字符串返回。
|
| 114 |
+
"""
|
| 115 |
+
request = service.files().get_media(fileId=file_id)
|
| 116 |
+
fh = io.BytesIO()
|
| 117 |
+
downloader = MediaIoBaseDownload(fh, request)
|
| 118 |
+
done = False
|
| 119 |
+
while done is False:
|
| 120 |
+
status, done = downloader.next_chunk()
|
| 121 |
+
fh.seek(0)
|
| 122 |
+
content = fh.read().decode('utf-8')
|
| 123 |
+
return content
|
| 124 |
|
| 125 |
def process_file(file):
|
| 126 |
# 读取文件
|
|
|
|
| 191 |
file_name = f"{video_id}_transcript.txt"
|
| 192 |
|
| 193 |
# 检查逐字稿是否存在
|
| 194 |
+
transcript = None
|
| 195 |
exists, file_id = check_file_exists(service, folder_id, file_name)
|
| 196 |
if not exists:
|
| 197 |
# 获取逐字稿
|
|
|
|
| 202 |
print("逐字稿已上传到Google Drive")
|
| 203 |
else:
|
| 204 |
print("逐字稿已存在于Google Drive中")
|
| 205 |
+
transcript_text = download_file_as_string(service, file_id)
|
| 206 |
+
transcript = json.loads(transcript_text)
|
| 207 |
|
|
|
|
|
|
|
| 208 |
# 基于逐字稿生成其他所需的输出
|
| 209 |
questions = generate_questions(transcript)
|
| 210 |
df_summarise = generate_df_summarise(transcript)
|