Spaces:

JunyiAcademy
/

vaitor2

Sleeping

App Files Files Community

youngtsai commited on Apr 6, 2024

Commit

9b92004

1 Parent(s): 41af362

trsnacript admin

Browse files

Files changed (1) hide show

app.py +57 -13

app.py CHANGED Viewed

@@ -492,6 +492,7 @@ def process_transcript_and_screenshots_on_gcs(video_id):
     is_new_transcript = False
     is_transcript_exists = GCS_SERVICE.check_file_exists(bucket_name, transcript_blob_name)
     if not is_transcript_exists:
         # 从YouTube获取逐字稿并上传
         try:
             transcript = get_transcript(video_id)
@@ -571,7 +572,6 @@ def process_youtube_link(password, link):
     VIDEO_ID = video_id
     try:
-        # transcript = process_transcript_and_screenshots(video_id)
         transcript = process_transcript_and_screenshots_on_gcs(video_id)
     except Exception as e:
         error_msg = f" {video_id} 逐字稿錯誤: {str(e)}"
@@ -579,17 +579,14 @@ def process_youtube_link(password, link):
         print(error_msg)
         raise gr.Error(error_msg)
     formatted_transcript = []
     formatted_simple_transcript =[]
-    screenshot_paths = []
     for entry in transcript:
         start_time = format_seconds_to_time(entry['start'])
         end_time = format_seconds_to_time(entry['start'] + entry['duration'])
         embed_url = get_embedded_youtube_link(video_id, entry['start'])
         img_file_id = entry['img_file_id']
-        # img_file_id =""
-        # 先取消 Google Drive 的图片
-        # screenshot_path = f"https://lh3.googleusercontent.com/d/{img_file_id}=s4000"
         screenshot_path = img_file_id
         line = {
             "start_time": start_time,
@@ -606,7 +603,6 @@ def process_youtube_link(password, link):
             "text": entry['text']
         }
         formatted_simple_transcript.append(simple_line)
-        screenshot_paths.append(screenshot_path)
     global TRANSCRIPTS
     TRANSCRIPTS = formatted_transcript
@@ -639,7 +635,7 @@ def process_youtube_link(password, link):
         questions[0] if len(questions) > 0 else "", \
         questions[1] if len(questions) > 1 else "", \
         questions[2] if len(questions) > 2 else "", \
-        formatted_transcript_json, \
         summary, \
         key_moments_html, \
         mind_map, \
@@ -1380,11 +1376,11 @@ def delete_LLM_content(video_id, kind):
     bucket_name = 'video_ai_assistant'
     file_name = f'{video_id}_{kind}.json'
     blob_name = f"{video_id}/{file_name}"
-    # 检查 reading_passage 是否存在
     is_file_exists = GCS_SERVICE.check_file_exists(bucket_name, blob_name)
     if is_file_exists:
         delete_blob(gcs_client, bucket_name, blob_name)
-        print("reading_passage已从GCS中删除")
     return gr.update(value="", interactive=False)
 def update_LLM_content(video_id, new_content, kind):
@@ -1398,28 +1394,47 @@ def update_LLM_content(video_id, new_content, kind):
         reading_passage_json = {"reading_passage": str(new_content)}
         reading_passage_text = json.dumps(reading_passage_json, ensure_ascii=False, indent=2)
         upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, reading_passage_text)
     elif kind == "summary":
         summary_json = {"summary": str(new_content)}
         summary_text = json.dumps(summary_json, ensure_ascii=False, indent=2)
         upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, summary_text)
     elif kind == "mind_map":
         mind_map_json = {"mind_map": str(new_content)}
         mind_map_text = json.dumps(mind_map_json, ensure_ascii=False, indent=2)
         upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, mind_map_text)
     print(f"{kind} 已更新到GCS")
-    return gr.update(value=new_content, interactive=False)
 def create_LLM_content(video_id, df_string, kind):
     print(f"===create_{kind}===")
     if kind == "reading_passage":
         content = generate_reading_passage(df_string)
     elif kind == "summary":
         content = generate_summarise(df_string)
     elif kind == "mind_map":
         content = generate_mind_map(df_string)
-    update_LLM_content(video_id, content, kind)
     return gr.update(value=content, interactive=False)
@@ -2016,7 +2031,7 @@ with gr.Blocks(theme=gr.themes.Base(primary_hue=gr.themes.colors.orange, seconda
     with gr.Row() as admin:
         password = gr.Textbox(label="Password", type="password", elem_id="password_input", visible=True)
         youtube_link = gr.Textbox(label="Enter YouTube Link", elem_id="youtube_link_input", visible=True)
-        video_id = gr.Textbox(label="video_id", visible=False)
         # file_upload = gr.File(label="Upload your CSV or Word file", visible=False)
         # web_link = gr.Textbox(label="Enter Web Page Link", visible=False)
         user_data = gr.Textbox(label="User Data", elem_id="user_data_input", visible=True)
@@ -2183,7 +2198,14 @@ with gr.Blocks(theme=gr.themes.Base(primary_hue=gr.themes.colors.orange, seconda
     with gr.Accordion("See Details", open=False) as see_details:
         with gr.Tab("本文"):
-            df_string_output = gr.Textbox(lines=40, label="Data Text")
         with gr.Tab("逐字稿"):
             simple_html_content = gr.HTML(label="Simple Transcript")
         with gr.Tab("圖文"):
@@ -2374,6 +2396,28 @@ with gr.Blocks(theme=gr.themes.Base(primary_hue=gr.themes.colors.orange, seconda
         outputs=[df_summarise]
     )
     # 教師版
     worksheet_content_btn.click(
         get_ai_content,

     is_new_transcript = False
     is_transcript_exists = GCS_SERVICE.check_file_exists(bucket_name, transcript_blob_name)
     if not is_transcript_exists:
+        print("逐字稿文件不存在于GCS中，重新建立")
         # 从YouTube获取逐字稿并上传
         try:
             transcript = get_transcript(video_id)
     VIDEO_ID = video_id
     try:
         transcript = process_transcript_and_screenshots_on_gcs(video_id)
     except Exception as e:
         error_msg = f" {video_id} 逐字稿錯誤: {str(e)}"
         print(error_msg)
         raise gr.Error(error_msg)
+    original_transcript = json.dumps(transcript, ensure_ascii=False, indent=2)
     formatted_transcript = []
     formatted_simple_transcript =[]
     for entry in transcript:
         start_time = format_seconds_to_time(entry['start'])
         end_time = format_seconds_to_time(entry['start'] + entry['duration'])
         embed_url = get_embedded_youtube_link(video_id, entry['start'])
         img_file_id = entry['img_file_id']
         screenshot_path = img_file_id
         line = {
             "start_time": start_time,
             "text": entry['text']
         }
         formatted_simple_transcript.append(simple_line)
     global TRANSCRIPTS
     TRANSCRIPTS = formatted_transcript
         questions[0] if len(questions) > 0 else "", \
         questions[1] if len(questions) > 1 else "", \
         questions[2] if len(questions) > 2 else "", \
+        original_transcript, \
         summary, \
         key_moments_html, \
         mind_map, \
     bucket_name = 'video_ai_assistant'
     file_name = f'{video_id}_{kind}.json'
     blob_name = f"{video_id}/{file_name}"
+    # 检查 file 是否存在
     is_file_exists = GCS_SERVICE.check_file_exists(bucket_name, blob_name)
     if is_file_exists:
         delete_blob(gcs_client, bucket_name, blob_name)
+        print(f"{file_name}已从GCS中删除")
     return gr.update(value="", interactive=False)
 def update_LLM_content(video_id, new_content, kind):
         reading_passage_json = {"reading_passage": str(new_content)}
         reading_passage_text = json.dumps(reading_passage_json, ensure_ascii=False, indent=2)
         upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, reading_passage_text)
+        updated_content = reading_passage_text
     elif kind == "summary":
         summary_json = {"summary": str(new_content)}
         summary_text = json.dumps(summary_json, ensure_ascii=False, indent=2)
         upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, summary_text)
+        updated_content = summary_text
     elif kind == "mind_map":
         mind_map_json = {"mind_map": str(new_content)}
         mind_map_text = json.dumps(mind_map_json, ensure_ascii=False, indent=2)
         upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, mind_map_text)
+        updated_content = mind_map_text
+    elif kind == "transcript":
+        if isinstance(new_content, str):
+            transcript_json = json.loads(new_content)
+        else:
+            transcript_json = new_content
+        transcript_text = json.dumps(transcript_json, ensure_ascii=False, indent=2)
+        upload_file_to_gcs_with_json_string(gcs_client, bucket_name, blob_name, transcript_text)
+        updated_content = transcript_text
     print(f"{kind} 已更新到GCS")
+    return gr.update(value=updated_content, interactive=False)
 def create_LLM_content(video_id, df_string, kind):
     print(f"===create_{kind}===")
+    print(f"video_id: {video_id}")
     if kind == "reading_passage":
         content = generate_reading_passage(df_string)
+        update_LLM_content(video_id, content, kind)
     elif kind == "summary":
         content = generate_summarise(df_string)
+        update_LLM_content(video_id, content, kind)
     elif kind == "mind_map":
         content = generate_mind_map(df_string)
+        update_LLM_content(video_id, content, kind)
+    elif kind == "transcript":
+        content = process_transcript_and_screenshots_on_gcs(video_id)
+        update_LLM_content(video_id, content, kind)
+        content = json.dumps(content, ensure_ascii=False, indent=2)
     return gr.update(value=content, interactive=False)
     with gr.Row() as admin:
         password = gr.Textbox(label="Password", type="password", elem_id="password_input", visible=True)
         youtube_link = gr.Textbox(label="Enter YouTube Link", elem_id="youtube_link_input", visible=True)
+        video_id = gr.Textbox(label="video_id", visible=True)
         # file_upload = gr.File(label="Upload your CSV or Word file", visible=False)
         # web_link = gr.Textbox(label="Enter Web Page Link", visible=False)
         user_data = gr.Textbox(label="User Data", elem_id="user_data_input", visible=True)
     with gr.Accordion("See Details", open=False) as see_details:
         with gr.Tab("本文"):
+            with gr.Row() as transcript_admmin:
+                transcript_kind = gr.Textbox(value="transcript", show_label=False)
+                transcript_edit_button = gr.Button("編輯", size="sm", variant="primary")
+                transcript_update_button = gr.Button("更新", size="sm", variant="primary")
+                transcript_delete_button = gr.Button("刪除", size="sm", variant="primary")
+                transcript_create_button = gr.Button("建立", size="sm", variant="primary")
+            with gr.Row():
+                df_string_output = gr.Textbox(lines=40, label="Data Text", )
         with gr.Tab("逐字稿"):
             simple_html_content = gr.HTML(label="Simple Transcript")
         with gr.Tab("圖文"):
         outputs=[df_summarise]
     )
+    # transcript event
+    transcript_create_button.click(
+        create_LLM_content,
+        inputs=[video_id, df_string_output, transcript_kind],
+        outputs=[df_string_output]
+    )
+    transcript_delete_button.click(
+        delete_LLM_content,
+        inputs=[video_id, transcript_kind],
+        outputs=[df_string_output]
+    )
+    transcript_edit_button.click(
+        enable_edit_mode,
+        inputs=[],
+        outputs=[df_string_output]
+    )
+    transcript_update_button.click(
+        update_LLM_content,
+        inputs=[video_id, df_string_output, transcript_kind],
+        outputs=[df_string_output]
+    )
     # 教師版
     worksheet_content_btn.click(
         get_ai_content,