Spaces:
Sleeping
Sleeping
formatted_simple_transcript.append(simple_line)
Browse files
app.py
CHANGED
|
@@ -282,6 +282,7 @@ def process_youtube_link(link):
|
|
| 282 |
transcript = process_transcript_and_screenshots(video_id)
|
| 283 |
|
| 284 |
formatted_transcript = []
|
|
|
|
| 285 |
screenshot_paths = []
|
| 286 |
for entry in transcript:
|
| 287 |
start_time = format_seconds_to_time(entry['start'])
|
|
@@ -297,6 +298,13 @@ def process_youtube_link(link):
|
|
| 297 |
"screenshot_path": screenshot_path
|
| 298 |
}
|
| 299 |
formatted_transcript.append(line)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 300 |
screenshot_paths.append(screenshot_path)
|
| 301 |
|
| 302 |
html_content = format_transcript_to_html(formatted_transcript)
|
|
@@ -305,10 +313,9 @@ def process_youtube_link(link):
|
|
| 305 |
print("=====html_content=====")
|
| 306 |
|
| 307 |
# 基于逐字稿生成其他所需的输出
|
| 308 |
-
questions = generate_questions(
|
| 309 |
-
# 将 DataFrame 转换为纯文本,並分行
|
| 310 |
df_string_output = json.dumps(formatted_transcript, ensure_ascii=False, indent=2)
|
| 311 |
-
df_summarise = generate_df_summarise(
|
| 312 |
|
| 313 |
global TRANSCRIPTS
|
| 314 |
TRANSCRIPTS = formatted_transcript
|
|
|
|
| 282 |
transcript = process_transcript_and_screenshots(video_id)
|
| 283 |
|
| 284 |
formatted_transcript = []
|
| 285 |
+
formatted_simple_transcript =[]
|
| 286 |
screenshot_paths = []
|
| 287 |
for entry in transcript:
|
| 288 |
start_time = format_seconds_to_time(entry['start'])
|
|
|
|
| 298 |
"screenshot_path": screenshot_path
|
| 299 |
}
|
| 300 |
formatted_transcript.append(line)
|
| 301 |
+
# formatted_simple_transcript 只要 start_time, end_time, text
|
| 302 |
+
simple_line = {
|
| 303 |
+
"start_time": start_time,
|
| 304 |
+
"end_time": end_time,
|
| 305 |
+
"text": entry['text']
|
| 306 |
+
}
|
| 307 |
+
formatted_simple_transcript.append(simple_line)
|
| 308 |
screenshot_paths.append(screenshot_path)
|
| 309 |
|
| 310 |
html_content = format_transcript_to_html(formatted_transcript)
|
|
|
|
| 313 |
print("=====html_content=====")
|
| 314 |
|
| 315 |
# 基于逐字稿生成其他所需的输出
|
| 316 |
+
questions = generate_questions(formatted_simple_transcript)
|
|
|
|
| 317 |
df_string_output = json.dumps(formatted_transcript, ensure_ascii=False, indent=2)
|
| 318 |
+
df_summarise = generate_df_summarise(formatted_simple_transcript)
|
| 319 |
|
| 320 |
global TRANSCRIPTS
|
| 321 |
TRANSCRIPTS = formatted_transcript
|