Spaces:
Sleeping
Sleeping
html_content += f"<img src='{entry['screenshot_path']}' width='500px' />"
Browse files
app.py
CHANGED
|
@@ -66,6 +66,9 @@ def process_youtube_link(link):
|
|
| 66 |
# 使用 YouTube API 获取逐字稿
|
| 67 |
# 假设您已经获取了 YouTube 视频的逐字稿并存储在变量 `transcript` 中
|
| 68 |
video_id = link.split("=")[-1]
|
|
|
|
|
|
|
|
|
|
| 69 |
transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['zh-TW'])
|
| 70 |
# 基于逐字稿生成其他所需的输出
|
| 71 |
questions = generate_questions(transcript)
|
|
@@ -76,14 +79,19 @@ def process_youtube_link(link):
|
|
| 76 |
start_time = format_seconds_to_time(entry['start'])
|
| 77 |
end_time = format_seconds_to_time(entry['start'] + entry['duration'])
|
| 78 |
embed_url = get_embedded_youtube_link(video_id, entry['start'])
|
|
|
|
|
|
|
| 79 |
line = {
|
| 80 |
"start_time": start_time,
|
| 81 |
"end_time": end_time,
|
| 82 |
"text": entry['text'],
|
| 83 |
"embed_url": embed_url,
|
| 84 |
-
"time_sec": entry['start']
|
|
|
|
| 85 |
}
|
| 86 |
formatted_transcript.append(line)
|
|
|
|
|
|
|
| 87 |
|
| 88 |
html_content = format_transcript_to_html(formatted_transcript)
|
| 89 |
print("=====html_content=====")
|
|
@@ -102,7 +110,7 @@ def format_transcript_to_html(formatted_transcript):
|
|
| 102 |
for entry in formatted_transcript:
|
| 103 |
html_content += f"<h3>{entry['start_time']} - {entry['end_time']}</h3>"
|
| 104 |
html_content += f"<p>{entry['text']}</p>"
|
| 105 |
-
html_content += f"<
|
| 106 |
return html_content
|
| 107 |
|
| 108 |
def get_embedded_youtube_link(video_id, start_time):
|
|
|
|
| 66 |
# 使用 YouTube API 获取逐字稿
|
| 67 |
# 假设您已经获取了 YouTube 视频的逐字稿并存储在变量 `transcript` 中
|
| 68 |
video_id = link.split("=")[-1]
|
| 69 |
+
# 先下載 video
|
| 70 |
+
download_youtube_video(video_id, output_path=OUTPUT_PATH)
|
| 71 |
+
# 再取得 transcript
|
| 72 |
transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=['zh-TW'])
|
| 73 |
# 基于逐字稿生成其他所需的输出
|
| 74 |
questions = generate_questions(transcript)
|
|
|
|
| 79 |
start_time = format_seconds_to_time(entry['start'])
|
| 80 |
end_time = format_seconds_to_time(entry['start'] + entry['duration'])
|
| 81 |
embed_url = get_embedded_youtube_link(video_id, entry['start'])
|
| 82 |
+
# 截圖
|
| 83 |
+
screenshot_path = screenshot_youtube_video(video_id, entry['start'])
|
| 84 |
line = {
|
| 85 |
"start_time": start_time,
|
| 86 |
"end_time": end_time,
|
| 87 |
"text": entry['text'],
|
| 88 |
"embed_url": embed_url,
|
| 89 |
+
"time_sec": entry['start'],
|
| 90 |
+
"screenshot_path": screenshot_path
|
| 91 |
}
|
| 92 |
formatted_transcript.append(line)
|
| 93 |
+
|
| 94 |
+
|
| 95 |
|
| 96 |
html_content = format_transcript_to_html(formatted_transcript)
|
| 97 |
print("=====html_content=====")
|
|
|
|
| 110 |
for entry in formatted_transcript:
|
| 111 |
html_content += f"<h3>{entry['start_time']} - {entry['end_time']}</h3>"
|
| 112 |
html_content += f"<p>{entry['text']}</p>"
|
| 113 |
+
html_content += f"<img src='{entry['screenshot_path']}' width='500px' />"
|
| 114 |
return html_content
|
| 115 |
|
| 116 |
def get_embedded_youtube_link(video_id, start_time):
|