Spaces:
Sleeping
Sleeping
trsnacript admin
Browse files
app.py
CHANGED
|
@@ -492,6 +492,7 @@ def process_transcript_and_screenshots_on_gcs(video_id):
|
|
| 492 |
is_new_transcript = False
|
| 493 |
is_transcript_exists = GCS_SERVICE.check_file_exists(bucket_name, transcript_blob_name)
|
| 494 |
if not is_transcript_exists:
|
|
|
|
| 495 |
# 从YouTube获取逐字稿并上传
|
| 496 |
try:
|
| 497 |
transcript = get_transcript(video_id)
|
|
@@ -571,7 +572,6 @@ def process_youtube_link(password, link):
|
|
| 571 |
VIDEO_ID = video_id
|
| 572 |
|
| 573 |
try:
|
| 574 |
-
# transcript = process_transcript_and_screenshots(video_id)
|
| 575 |
transcript = process_transcript_and_screenshots_on_gcs(video_id)
|
| 576 |
except Exception as e:
|
| 577 |
error_msg = f" {video_id} 逐字稿錯誤: {str(e)}"
|
|
@@ -579,17 +579,14 @@ def process_youtube_link(password, link):
|
|
| 579 |
print(error_msg)
|
| 580 |
raise gr.Error(error_msg)
|
| 581 |
|
|
|
|
| 582 |
formatted_transcript = []
|
| 583 |
formatted_simple_transcript =[]
|
| 584 |
-
screenshot_paths = []
|
| 585 |
for entry in transcript:
|
| 586 |
start_time = format_seconds_to_time(entry['start'])
|
| 587 |
end_time = format_seconds_to_time(entry['start'] + entry['duration'])
|
| 588 |
embed_url = get_embedded_youtube_link(video_id, entry['start'])
|
| 589 |
img_file_id = entry['img_file_id']
|
| 590 |
-
# img_file_id =""
|
| 591 |
-
# 先取消 Google Drive 的图片
|
| 592 |
-
# screenshot_path = f"https://lh3.googleusercontent.com/d/{img_file_id}=s4000"
|
| 593 |
screenshot_path = img_file_id
|
| 594 |
line = {
|
| 595 |
"start_time": start_time,
|
|
@@ -606,7 +603,6 @@ def process_youtube_link(password, link):
|
|
| 606 |
"text": entry['text']
|
| 607 |
}
|
| 608 |
formatted_simple_transcript.append(simple_line)
|
| 609 |
-
screenshot_paths.append(screenshot_path)
|
| 610 |
|
| 611 |
global TRANSCRIPTS
|
| 612 |
TRANSCRIPTS = formatted_transcript
|
|
@@ -639,7 +635,7 @@ def process_youtube_link(password, link):
|
|
| 639 |
questions[0] if len(questions) > 0 else "", \
|
| 640 |
questions[1] if len(questions) > 1 else "", \
|
| 641 |
questions[2] if len(questions) > 2 else "", \
|
| 642 |
-
|
| 643 |
summary, \
|
| 644 |
key_moments_html, \
|
| 645 |
mind_map, \
|
|
@@ -1380,11 +1376,11 @@ def delete_LLM_content(video_id, kind):
|
|
| 1380 |
bucket_name = 'video_ai_assistant'
|
| 1381 |
file_name = f'{video_id}_{kind}.json'
|
| 1382 |
blob_name = f"{video_id}/{file_name}"
|
| 1383 |
-
# 检查
|
| 1384 |
is_file_exists = GCS_SERVICE.check_file_exists(bucket_name, blob_name)
|
| 1385 |
if is_file_exists:
|
| 1386 |
delete_blob(gcs_client, bucket_name, blob_name)
|
| 1387 |
-
print("
|
| 1388 |
return gr.update(value="", interactive=False)
|
| 1389 |
|
| 1390 |
def update_LLM_content(video_id, new_content, kind):
|
|
@@ -1398,28 +1394,47 @@ def update_LLM_content(video_id, new_content, kind):
|
|
| 1398 |
reading_passage_json = {"reading_passage": str(new_content)}
|
| 1399 |
reading_passage_text = json.dumps(reading_passage_json, ensure_ascii=False, indent=2)
|
| 1400 |
upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, reading_passage_text)
|
|
|
|
| 1401 |
elif kind == "summary":
|
| 1402 |
summary_json = {"summary": str(new_content)}
|
| 1403 |
summary_text = json.dumps(summary_json, ensure_ascii=False, indent=2)
|
| 1404 |
upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, summary_text)
|
|
|
|
| 1405 |
elif kind == "mind_map":
|
| 1406 |
mind_map_json = {"mind_map": str(new_content)}
|
| 1407 |
mind_map_text = json.dumps(mind_map_json, ensure_ascii=False, indent=2)
|
| 1408 |
upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, mind_map_text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1409 |
|
| 1410 |
print(f"{kind} 已更新到GCS")
|
| 1411 |
-
return gr.update(value=
|
| 1412 |
|
| 1413 |
def create_LLM_content(video_id, df_string, kind):
|
| 1414 |
print(f"===create_{kind}===")
|
|
|
|
|
|
|
| 1415 |
if kind == "reading_passage":
|
| 1416 |
content = generate_reading_passage(df_string)
|
|
|
|
| 1417 |
elif kind == "summary":
|
| 1418 |
content = generate_summarise(df_string)
|
|
|
|
| 1419 |
elif kind == "mind_map":
|
| 1420 |
content = generate_mind_map(df_string)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1421 |
|
| 1422 |
-
update_LLM_content(video_id, content, kind)
|
| 1423 |
return gr.update(value=content, interactive=False)
|
| 1424 |
|
| 1425 |
|
|
@@ -2016,7 +2031,7 @@ with gr.Blocks(theme=gr.themes.Base(primary_hue=gr.themes.colors.orange, seconda
|
|
| 2016 |
with gr.Row() as admin:
|
| 2017 |
password = gr.Textbox(label="Password", type="password", elem_id="password_input", visible=True)
|
| 2018 |
youtube_link = gr.Textbox(label="Enter YouTube Link", elem_id="youtube_link_input", visible=True)
|
| 2019 |
-
video_id = gr.Textbox(label="video_id", visible=
|
| 2020 |
# file_upload = gr.File(label="Upload your CSV or Word file", visible=False)
|
| 2021 |
# web_link = gr.Textbox(label="Enter Web Page Link", visible=False)
|
| 2022 |
user_data = gr.Textbox(label="User Data", elem_id="user_data_input", visible=True)
|
|
@@ -2183,7 +2198,14 @@ with gr.Blocks(theme=gr.themes.Base(primary_hue=gr.themes.colors.orange, seconda
|
|
| 2183 |
|
| 2184 |
with gr.Accordion("See Details", open=False) as see_details:
|
| 2185 |
with gr.Tab("本文"):
|
| 2186 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2187 |
with gr.Tab("逐字稿"):
|
| 2188 |
simple_html_content = gr.HTML(label="Simple Transcript")
|
| 2189 |
with gr.Tab("圖文"):
|
|
@@ -2374,6 +2396,28 @@ with gr.Blocks(theme=gr.themes.Base(primary_hue=gr.themes.colors.orange, seconda
|
|
| 2374 |
outputs=[df_summarise]
|
| 2375 |
)
|
| 2376 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2377 |
# 教師版
|
| 2378 |
worksheet_content_btn.click(
|
| 2379 |
get_ai_content,
|
|
|
|
| 492 |
is_new_transcript = False
|
| 493 |
is_transcript_exists = GCS_SERVICE.check_file_exists(bucket_name, transcript_blob_name)
|
| 494 |
if not is_transcript_exists:
|
| 495 |
+
print("逐字稿文件不存在于GCS中,重新建立")
|
| 496 |
# 从YouTube获取逐字稿并上传
|
| 497 |
try:
|
| 498 |
transcript = get_transcript(video_id)
|
|
|
|
| 572 |
VIDEO_ID = video_id
|
| 573 |
|
| 574 |
try:
|
|
|
|
| 575 |
transcript = process_transcript_and_screenshots_on_gcs(video_id)
|
| 576 |
except Exception as e:
|
| 577 |
error_msg = f" {video_id} 逐字稿錯誤: {str(e)}"
|
|
|
|
| 579 |
print(error_msg)
|
| 580 |
raise gr.Error(error_msg)
|
| 581 |
|
| 582 |
+
original_transcript = json.dumps(transcript, ensure_ascii=False, indent=2)
|
| 583 |
formatted_transcript = []
|
| 584 |
formatted_simple_transcript =[]
|
|
|
|
| 585 |
for entry in transcript:
|
| 586 |
start_time = format_seconds_to_time(entry['start'])
|
| 587 |
end_time = format_seconds_to_time(entry['start'] + entry['duration'])
|
| 588 |
embed_url = get_embedded_youtube_link(video_id, entry['start'])
|
| 589 |
img_file_id = entry['img_file_id']
|
|
|
|
|
|
|
|
|
|
| 590 |
screenshot_path = img_file_id
|
| 591 |
line = {
|
| 592 |
"start_time": start_time,
|
|
|
|
| 603 |
"text": entry['text']
|
| 604 |
}
|
| 605 |
formatted_simple_transcript.append(simple_line)
|
|
|
|
| 606 |
|
| 607 |
global TRANSCRIPTS
|
| 608 |
TRANSCRIPTS = formatted_transcript
|
|
|
|
| 635 |
questions[0] if len(questions) > 0 else "", \
|
| 636 |
questions[1] if len(questions) > 1 else "", \
|
| 637 |
questions[2] if len(questions) > 2 else "", \
|
| 638 |
+
original_transcript, \
|
| 639 |
summary, \
|
| 640 |
key_moments_html, \
|
| 641 |
mind_map, \
|
|
|
|
| 1376 |
bucket_name = 'video_ai_assistant'
|
| 1377 |
file_name = f'{video_id}_{kind}.json'
|
| 1378 |
blob_name = f"{video_id}/{file_name}"
|
| 1379 |
+
# 检查 file 是否存在
|
| 1380 |
is_file_exists = GCS_SERVICE.check_file_exists(bucket_name, blob_name)
|
| 1381 |
if is_file_exists:
|
| 1382 |
delete_blob(gcs_client, bucket_name, blob_name)
|
| 1383 |
+
print(f"{file_name}已从GCS中删除")
|
| 1384 |
return gr.update(value="", interactive=False)
|
| 1385 |
|
| 1386 |
def update_LLM_content(video_id, new_content, kind):
|
|
|
|
| 1394 |
reading_passage_json = {"reading_passage": str(new_content)}
|
| 1395 |
reading_passage_text = json.dumps(reading_passage_json, ensure_ascii=False, indent=2)
|
| 1396 |
upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, reading_passage_text)
|
| 1397 |
+
updated_content = reading_passage_text
|
| 1398 |
elif kind == "summary":
|
| 1399 |
summary_json = {"summary": str(new_content)}
|
| 1400 |
summary_text = json.dumps(summary_json, ensure_ascii=False, indent=2)
|
| 1401 |
upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, summary_text)
|
| 1402 |
+
updated_content = summary_text
|
| 1403 |
elif kind == "mind_map":
|
| 1404 |
mind_map_json = {"mind_map": str(new_content)}
|
| 1405 |
mind_map_text = json.dumps(mind_map_json, ensure_ascii=False, indent=2)
|
| 1406 |
upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, mind_map_text)
|
| 1407 |
+
updated_content = mind_map_text
|
| 1408 |
+
elif kind == "transcript":
|
| 1409 |
+
if isinstance(new_content, str):
|
| 1410 |
+
transcript_json = json.loads(new_content)
|
| 1411 |
+
else:
|
| 1412 |
+
transcript_json = new_content
|
| 1413 |
+
transcript_text = json.dumps(transcript_json, ensure_ascii=False, indent=2)
|
| 1414 |
+
upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, transcript_text)
|
| 1415 |
+
updated_content = transcript_text
|
| 1416 |
|
| 1417 |
print(f"{kind} 已更新到GCS")
|
| 1418 |
+
return gr.update(value=updated_content, interactive=False)
|
| 1419 |
|
| 1420 |
def create_LLM_content(video_id, df_string, kind):
|
| 1421 |
print(f"===create_{kind}===")
|
| 1422 |
+
print(f"video_id: {video_id}")
|
| 1423 |
+
|
| 1424 |
if kind == "reading_passage":
|
| 1425 |
content = generate_reading_passage(df_string)
|
| 1426 |
+
update_LLM_content(video_id, content, kind)
|
| 1427 |
elif kind == "summary":
|
| 1428 |
content = generate_summarise(df_string)
|
| 1429 |
+
update_LLM_content(video_id, content, kind)
|
| 1430 |
elif kind == "mind_map":
|
| 1431 |
content = generate_mind_map(df_string)
|
| 1432 |
+
update_LLM_content(video_id, content, kind)
|
| 1433 |
+
elif kind == "transcript":
|
| 1434 |
+
content = process_transcript_and_screenshots_on_gcs(video_id)
|
| 1435 |
+
update_LLM_content(video_id, content, kind)
|
| 1436 |
+
content = json.dumps(content, ensure_ascii=False, indent=2)
|
| 1437 |
|
|
|
|
| 1438 |
return gr.update(value=content, interactive=False)
|
| 1439 |
|
| 1440 |
|
|
|
|
| 2031 |
with gr.Row() as admin:
|
| 2032 |
password = gr.Textbox(label="Password", type="password", elem_id="password_input", visible=True)
|
| 2033 |
youtube_link = gr.Textbox(label="Enter YouTube Link", elem_id="youtube_link_input", visible=True)
|
| 2034 |
+
video_id = gr.Textbox(label="video_id", visible=True)
|
| 2035 |
# file_upload = gr.File(label="Upload your CSV or Word file", visible=False)
|
| 2036 |
# web_link = gr.Textbox(label="Enter Web Page Link", visible=False)
|
| 2037 |
user_data = gr.Textbox(label="User Data", elem_id="user_data_input", visible=True)
|
|
|
|
| 2198 |
|
| 2199 |
with gr.Accordion("See Details", open=False) as see_details:
|
| 2200 |
with gr.Tab("本文"):
|
| 2201 |
+
with gr.Row() as transcript_admmin:
|
| 2202 |
+
transcript_kind = gr.Textbox(value="transcript", show_label=False)
|
| 2203 |
+
transcript_edit_button = gr.Button("編輯", size="sm", variant="primary")
|
| 2204 |
+
transcript_update_button = gr.Button("更新", size="sm", variant="primary")
|
| 2205 |
+
transcript_delete_button = gr.Button("刪除", size="sm", variant="primary")
|
| 2206 |
+
transcript_create_button = gr.Button("建立", size="sm", variant="primary")
|
| 2207 |
+
with gr.Row():
|
| 2208 |
+
df_string_output = gr.Textbox(lines=40, label="Data Text", )
|
| 2209 |
with gr.Tab("逐字稿"):
|
| 2210 |
simple_html_content = gr.HTML(label="Simple Transcript")
|
| 2211 |
with gr.Tab("圖文"):
|
|
|
|
| 2396 |
outputs=[df_summarise]
|
| 2397 |
)
|
| 2398 |
|
| 2399 |
+
# transcript event
|
| 2400 |
+
transcript_create_button.click(
|
| 2401 |
+
create_LLM_content,
|
| 2402 |
+
inputs=[video_id, df_string_output, transcript_kind],
|
| 2403 |
+
outputs=[df_string_output]
|
| 2404 |
+
)
|
| 2405 |
+
transcript_delete_button.click(
|
| 2406 |
+
delete_LLM_content,
|
| 2407 |
+
inputs=[video_id, transcript_kind],
|
| 2408 |
+
outputs=[df_string_output]
|
| 2409 |
+
)
|
| 2410 |
+
transcript_edit_button.click(
|
| 2411 |
+
enable_edit_mode,
|
| 2412 |
+
inputs=[],
|
| 2413 |
+
outputs=[df_string_output]
|
| 2414 |
+
)
|
| 2415 |
+
transcript_update_button.click(
|
| 2416 |
+
update_LLM_content,
|
| 2417 |
+
inputs=[video_id, df_string_output, transcript_kind],
|
| 2418 |
+
outputs=[df_string_output]
|
| 2419 |
+
)
|
| 2420 |
+
|
| 2421 |
# 教師版
|
| 2422 |
worksheet_content_btn.click(
|
| 2423 |
get_ai_content,
|