Spaces:
Running
Running
summary_json = {"summary": str(summary)}
Browse files
app.py
CHANGED
|
@@ -527,7 +527,7 @@ def process_youtube_link(link):
|
|
| 527 |
simple_html_content = format_simple_transcript_to_html(formatted_simple_transcript)
|
| 528 |
first_image = formatted_transcript[0]['screenshot_path']
|
| 529 |
first_text = formatted_transcript[0]['text']
|
| 530 |
-
mind_map_json = get_mind_map(video_id, formatted_simple_transcript)
|
| 531 |
mind_map = mind_map_json["mind_map"]
|
| 532 |
mind_map_html = get_mind_map_html(mind_map)
|
| 533 |
|
|
@@ -596,27 +596,47 @@ def process_web_link(link):
|
|
| 596 |
soup = BeautifulSoup(response.content, 'html.parser')
|
| 597 |
return soup.get_text()
|
| 598 |
|
| 599 |
-
def get_mind_map(video_id, df_string):
|
| 600 |
-
|
| 601 |
-
|
| 602 |
-
|
| 603 |
-
|
| 604 |
-
|
| 605 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 606 |
|
| 607 |
-
|
| 608 |
-
|
| 609 |
-
|
| 610 |
-
|
| 611 |
-
|
| 612 |
-
|
| 613 |
-
|
| 614 |
-
|
| 615 |
-
|
| 616 |
-
|
| 617 |
-
|
| 618 |
-
|
| 619 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 620 |
|
| 621 |
return mind_map_json
|
| 622 |
|
|
@@ -712,7 +732,8 @@ def get_video_id_summary(video_id, df_string, source):
|
|
| 712 |
# 检查 summary_file 是否存在
|
| 713 |
is_summary_file_exists = gcs_check_file_exists(gcs_client, bucket_name, summary_file_blob_name)
|
| 714 |
if not is_summary_file_exists:
|
| 715 |
-
|
|
|
|
| 716 |
summary_text = json.dumps(summary_json, ensure_ascii=False, indent=2)
|
| 717 |
upload_file_to_gcs_with_json_string(gcs_client, bucket_name, summary_file_blob_name, summary_text)
|
| 718 |
print("summary已上传到GCS")
|
|
@@ -733,7 +754,6 @@ def get_video_id_summary(video_id, df_string, source):
|
|
| 733 |
exists, file_id = check_file_exists(service, folder_id, file_name)
|
| 734 |
if not exists:
|
| 735 |
summary = generate_summarise(df_string)
|
| 736 |
-
# processed_summary = processed_video_summary_to_json(summary)
|
| 737 |
summary_json = {"summary": str(summary)}
|
| 738 |
summary_text = json.dumps(summary_json, ensure_ascii=False, indent=2)
|
| 739 |
|
|
@@ -746,14 +766,6 @@ def get_video_id_summary(video_id, df_string, source):
|
|
| 746 |
print("===get_video_id_summary error===")
|
| 747 |
print(error_msg)
|
| 748 |
print("===get_video_id_summary error===")
|
| 749 |
-
|
| 750 |
-
|
| 751 |
-
|
| 752 |
-
# 存在 local at OUTPUT_PATH as {video_id}_summary.json
|
| 753 |
-
# with open(f'{OUTPUT_PATH}/{video_id}_summary.json', 'w') as f:
|
| 754 |
-
# f.write(summary_text)
|
| 755 |
-
# print(f"summary已存在 local at {OUTPUT_PATH}/{video_id}_summary.json")
|
| 756 |
-
# file_id = upload_file_directly(service, file_name, folder_id, f'{OUTPUT_PATH}/{video_id}_summary.json')
|
| 757 |
else:
|
| 758 |
# 逐字稿已存在,下载逐字稿内容
|
| 759 |
print("summary已存在Google Drive中")
|
|
|
|
| 527 |
simple_html_content = format_simple_transcript_to_html(formatted_simple_transcript)
|
| 528 |
first_image = formatted_transcript[0]['screenshot_path']
|
| 529 |
first_text = formatted_transcript[0]['text']
|
| 530 |
+
mind_map_json = get_mind_map(video_id, formatted_simple_transcript, source)
|
| 531 |
mind_map = mind_map_json["mind_map"]
|
| 532 |
mind_map_html = get_mind_map_html(mind_map)
|
| 533 |
|
|
|
|
| 596 |
soup = BeautifulSoup(response.content, 'html.parser')
|
| 597 |
return soup.get_text()
|
| 598 |
|
| 599 |
+
def get_mind_map(video_id, df_string, source):
|
| 600 |
+
if source == "gcs":
|
| 601 |
+
print("===get_mind_map on gcs===")
|
| 602 |
+
gcs_client = init_gcs_client(GCS_KEY)
|
| 603 |
+
bucket_name = 'video_ai_assistant'
|
| 604 |
+
file_name = f'{video_id}_mind_map.json'
|
| 605 |
+
blob_name = f"{video_id}/{file_name}"
|
| 606 |
+
# 检查檔案是否存在
|
| 607 |
+
is_file_exists = gcs_check_file_exists(gcs_client, bucket_name, blob_name)
|
| 608 |
+
if not is_file_exists:
|
| 609 |
+
mind_map = generate_mind_map(df_string)
|
| 610 |
+
mind_map_json = {"mind_map": str(mind_map)}
|
| 611 |
+
mind_map_text = json.dumps(mind_map_json, ensure_ascii=False, indent=2)
|
| 612 |
+
upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, mind_map_text)
|
| 613 |
+
print("mind_map已上傳到GCS")
|
| 614 |
+
else:
|
| 615 |
+
# mindmap已存在,下载内容
|
| 616 |
+
print("mind_map已存在于GCS中")
|
| 617 |
+
mind_map_text = download_blob_to_string(gcs_client, bucket_name, blob_name)
|
| 618 |
+
mind_map_json = json.loads(mind_map_text)
|
| 619 |
|
| 620 |
+
elif source == "drive":
|
| 621 |
+
print("===get_mind_map on drive===")
|
| 622 |
+
service = init_drive_service()
|
| 623 |
+
parent_folder_id = '1GgI4YVs0KckwStVQkLa1NZ8IpaEMurkL'
|
| 624 |
+
folder_id = create_folder_if_not_exists(service, video_id, parent_folder_id)
|
| 625 |
+
file_name = f'{video_id}_mind_map.json'
|
| 626 |
+
|
| 627 |
+
# 检查檔案是否存在
|
| 628 |
+
exists, file_id = check_file_exists(service, folder_id, file_name)
|
| 629 |
+
if not exists:
|
| 630 |
+
mind_map = generate_mind_map(df_string)
|
| 631 |
+
mind_map_json = {"mind_map": str(mind_map)}
|
| 632 |
+
mind_map_text = json.dumps(mind_map_json, ensure_ascii=False, indent=2)
|
| 633 |
+
upload_content_directly(service, file_name, folder_id, mind_map_text)
|
| 634 |
+
print("mind_map已上傳到Google Drive")
|
| 635 |
+
else:
|
| 636 |
+
# mindmap已存在,下载内容
|
| 637 |
+
print("mind_map已存在于Google Drive中")
|
| 638 |
+
mind_map_text = download_file_as_string(service, file_id)
|
| 639 |
+
mind_map_json = json.loads(mind_map_text)
|
| 640 |
|
| 641 |
return mind_map_json
|
| 642 |
|
|
|
|
| 732 |
# 检查 summary_file 是否存在
|
| 733 |
is_summary_file_exists = gcs_check_file_exists(gcs_client, bucket_name, summary_file_blob_name)
|
| 734 |
if not is_summary_file_exists:
|
| 735 |
+
summary = generate_summarise(df_string)
|
| 736 |
+
summary_json = {"summary": str(summary)}
|
| 737 |
summary_text = json.dumps(summary_json, ensure_ascii=False, indent=2)
|
| 738 |
upload_file_to_gcs_with_json_string(gcs_client, bucket_name, summary_file_blob_name, summary_text)
|
| 739 |
print("summary已上传到GCS")
|
|
|
|
| 754 |
exists, file_id = check_file_exists(service, folder_id, file_name)
|
| 755 |
if not exists:
|
| 756 |
summary = generate_summarise(df_string)
|
|
|
|
| 757 |
summary_json = {"summary": str(summary)}
|
| 758 |
summary_text = json.dumps(summary_json, ensure_ascii=False, indent=2)
|
| 759 |
|
|
|
|
| 766 |
print("===get_video_id_summary error===")
|
| 767 |
print(error_msg)
|
| 768 |
print("===get_video_id_summary error===")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 769 |
else:
|
| 770 |
# 逐字稿已存在,下载逐字稿内容
|
| 771 |
print("summary已存在Google Drive中")
|