Spaces:
Sleeping
Sleeping
lines=40,
Browse files
app.py
CHANGED
|
@@ -256,8 +256,9 @@ def process_transcript_and_screenshots(video_id):
|
|
| 256 |
service = init_drive_service()
|
| 257 |
parent_folder_id = '1GgI4YVs0KckwStVQkLa1NZ8IpaEMurkL'
|
| 258 |
folder_id = create_folder_if_not_exists(service, video_id, parent_folder_id)
|
|
|
|
|
|
|
| 259 |
file_name = f'{video_id}_transcript.json'
|
| 260 |
-
|
| 261 |
# 检查逐字稿是否存在
|
| 262 |
exists, file_id = check_file_exists(service, folder_id, file_name)
|
| 263 |
if not exists:
|
|
@@ -298,8 +299,6 @@ def process_youtube_link(link):
|
|
| 298 |
video_id = extract_youtube_id(link)
|
| 299 |
global VIDEO_ID
|
| 300 |
VIDEO_ID = video_id
|
| 301 |
-
|
| 302 |
-
|
| 303 |
download_youtube_video(video_id, output_path=OUTPUT_PATH)
|
| 304 |
|
| 305 |
try:
|
|
@@ -345,6 +344,7 @@ def process_youtube_link(link):
|
|
| 345 |
summary_json = get_video_id_summary(video_id, formatted_simple_transcript)
|
| 346 |
summary = summary_json["summary"]
|
| 347 |
html_content = format_transcript_to_html(formatted_transcript)
|
|
|
|
| 348 |
first_image = formatted_transcript[0]['screenshot_path']
|
| 349 |
first_text = formatted_transcript[0]['text']
|
| 350 |
mind_map_json = get_mind_map(video_id, formatted_simple_transcript)
|
|
@@ -360,6 +360,7 @@ def process_youtube_link(link):
|
|
| 360 |
mind_map, \
|
| 361 |
mind_map_html, \
|
| 362 |
html_content, \
|
|
|
|
| 363 |
first_image, \
|
| 364 |
first_text,
|
| 365 |
|
|
@@ -372,7 +373,12 @@ def format_transcript_to_html(formatted_transcript):
|
|
| 372 |
html_content += f"<img src='{entry['screenshot_path']}' width='500px' />"
|
| 373 |
return html_content
|
| 374 |
|
| 375 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 376 |
|
| 377 |
def get_embedded_youtube_link(video_id, start_time):
|
| 378 |
int_start_time = int(start_time)
|
|
@@ -863,6 +869,8 @@ with gr.Blocks() as demo:
|
|
| 863 |
next_button = gr.Button("Next")
|
| 864 |
prev_button.click(fn=prev_slide, inputs=[], outputs=[slide_image, slide_text])
|
| 865 |
next_button.click(fn=next_slide, inputs=[], outputs=[slide_image, slide_text])
|
|
|
|
|
|
|
| 866 |
with gr.Tab("本文"):
|
| 867 |
df_string_output = gr.Textbox(lines=40, label="Data Text")
|
| 868 |
with gr.Tab("重點"):
|
|
@@ -907,7 +915,8 @@ with gr.Blocks() as demo:
|
|
| 907 |
df_summarise,
|
| 908 |
mind_map,
|
| 909 |
mind_map_html,
|
| 910 |
-
transcript_html,
|
|
|
|
| 911 |
slide_image,
|
| 912 |
slide_text
|
| 913 |
]
|
|
@@ -925,6 +934,7 @@ with gr.Blocks() as demo:
|
|
| 925 |
mind_map,
|
| 926 |
mind_map_html,
|
| 927 |
transcript_html,
|
|
|
|
| 928 |
slide_image,
|
| 929 |
slide_text
|
| 930 |
]
|
|
|
|
| 256 |
service = init_drive_service()
|
| 257 |
parent_folder_id = '1GgI4YVs0KckwStVQkLa1NZ8IpaEMurkL'
|
| 258 |
folder_id = create_folder_if_not_exists(service, video_id, parent_folder_id)
|
| 259 |
+
|
| 260 |
+
# 逐字稿文件名
|
| 261 |
file_name = f'{video_id}_transcript.json'
|
|
|
|
| 262 |
# 检查逐字稿是否存在
|
| 263 |
exists, file_id = check_file_exists(service, folder_id, file_name)
|
| 264 |
if not exists:
|
|
|
|
| 299 |
video_id = extract_youtube_id(link)
|
| 300 |
global VIDEO_ID
|
| 301 |
VIDEO_ID = video_id
|
|
|
|
|
|
|
| 302 |
download_youtube_video(video_id, output_path=OUTPUT_PATH)
|
| 303 |
|
| 304 |
try:
|
|
|
|
| 344 |
summary_json = get_video_id_summary(video_id, formatted_simple_transcript)
|
| 345 |
summary = summary_json["summary"]
|
| 346 |
html_content = format_transcript_to_html(formatted_transcript)
|
| 347 |
+
simple_html_content = format_simple_transcript_to_html(formatted_simple_transcript)
|
| 348 |
first_image = formatted_transcript[0]['screenshot_path']
|
| 349 |
first_text = formatted_transcript[0]['text']
|
| 350 |
mind_map_json = get_mind_map(video_id, formatted_simple_transcript)
|
|
|
|
| 360 |
mind_map, \
|
| 361 |
mind_map_html, \
|
| 362 |
html_content, \
|
| 363 |
+
simple_html_content, \
|
| 364 |
first_image, \
|
| 365 |
first_text,
|
| 366 |
|
|
|
|
| 373 |
html_content += f"<img src='{entry['screenshot_path']}' width='500px' />"
|
| 374 |
return html_content
|
| 375 |
|
| 376 |
+
def format_simple_transcript_to_html(formatted_transcript):
|
| 377 |
+
html_content = ""
|
| 378 |
+
for entry in formatted_transcript:
|
| 379 |
+
html_content += f"<h3>{entry['start_time']} - {entry['end_time']}</h3>"
|
| 380 |
+
html_content += f"<p>{entry['text']}</p>"
|
| 381 |
+
return html_content
|
| 382 |
|
| 383 |
def get_embedded_youtube_link(video_id, start_time):
|
| 384 |
int_start_time = int(start_time)
|
|
|
|
| 869 |
next_button = gr.Button("Next")
|
| 870 |
prev_button.click(fn=prev_slide, inputs=[], outputs=[slide_image, slide_text])
|
| 871 |
next_button.click(fn=next_slide, inputs=[], outputs=[slide_image, slide_text])
|
| 872 |
+
with gr.Tab("逐字稿"):
|
| 873 |
+
simple_html_content = gr.HTML(label="Simple Transcript", lines=40)
|
| 874 |
with gr.Tab("本文"):
|
| 875 |
df_string_output = gr.Textbox(lines=40, label="Data Text")
|
| 876 |
with gr.Tab("重點"):
|
|
|
|
| 915 |
df_summarise,
|
| 916 |
mind_map,
|
| 917 |
mind_map_html,
|
| 918 |
+
transcript_html,
|
| 919 |
+
simple_html_content,
|
| 920 |
slide_image,
|
| 921 |
slide_text
|
| 922 |
]
|
|
|
|
| 934 |
mind_map,
|
| 935 |
mind_map_html,
|
| 936 |
transcript_html,
|
| 937 |
+
simple_html_content,
|
| 938 |
slide_image,
|
| 939 |
slide_text
|
| 940 |
]
|