Spaces:
Running
Running
mind_map
Browse files
app.py
CHANGED
|
@@ -347,6 +347,8 @@ def process_youtube_link(link):
|
|
| 347 |
html_content = format_transcript_to_html(formatted_transcript)
|
| 348 |
first_image = formatted_transcript[0]['screenshot_path']
|
| 349 |
first_text = formatted_transcript[0]['text']
|
|
|
|
|
|
|
| 350 |
|
| 351 |
# 确保返回与 UI 组件预期匹配的输出
|
| 352 |
return questions[0] if len(questions) > 0 else "", \
|
|
@@ -354,9 +356,10 @@ def process_youtube_link(link):
|
|
| 354 |
questions[2] if len(questions) > 2 else "", \
|
| 355 |
formatted_transcript_json, \
|
| 356 |
summary, \
|
|
|
|
| 357 |
html_content, \
|
| 358 |
first_image, \
|
| 359 |
-
first_text
|
| 360 |
|
| 361 |
|
| 362 |
def format_transcript_to_html(formatted_transcript):
|
|
@@ -404,6 +407,57 @@ def process_web_link(link):
|
|
| 404 |
soup = BeautifulSoup(response.content, 'html.parser')
|
| 405 |
return soup.get_text()
|
| 406 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 407 |
def processed_video_summary_to_json(summary):
|
| 408 |
"""
|
| 409 |
整體格式為:
|
|
@@ -446,7 +500,6 @@ def processed_video_summary_to_json(summary):
|
|
| 446 |
|
| 447 |
return summary_json
|
| 448 |
|
| 449 |
-
|
| 450 |
# get video_id_summary.json content
|
| 451 |
def get_video_id_summary(video_id, df_string):
|
| 452 |
print("===get_video_id_summary===")
|
|
@@ -777,6 +830,8 @@ with gr.Blocks() as demo:
|
|
| 777 |
btn_3 = gr.Button()
|
| 778 |
gr.Markdown("## 重新生成問題")
|
| 779 |
btn_create_question = gr.Button("Create Questions")
|
|
|
|
|
|
|
| 780 |
|
| 781 |
send_button.click(
|
| 782 |
respond,
|
|
@@ -794,7 +849,7 @@ with gr.Blocks() as demo:
|
|
| 794 |
file_upload.change(process_file, inputs=file_upload, outputs=[btn_1, btn_2, btn_3, df_summarise, df_string_output])
|
| 795 |
|
| 796 |
# 当输入 YouTube 链接时触发
|
| 797 |
-
youtube_link.change(process_youtube_link, inputs=youtube_link, outputs=[btn_1, btn_2, btn_3, df_string_output, df_summarise, transcript_html, slide_image, slide_text])
|
| 798 |
|
| 799 |
# 当输入网页链接时触发
|
| 800 |
web_link.change(process_web_link, inputs=web_link, outputs=[btn_1, btn_2, btn_3, df_summarise, df_string_output])
|
|
|
|
| 347 |
html_content = format_transcript_to_html(formatted_transcript)
|
| 348 |
first_image = formatted_transcript[0]['screenshot_path']
|
| 349 |
first_text = formatted_transcript[0]['text']
|
| 350 |
+
mind_map_json = get_mind_map(video_id, formatted_simple_transcript)
|
| 351 |
+
mind_map = mind_map_json["mind_map"]
|
| 352 |
|
| 353 |
# 确保返回与 UI 组件预期匹配的输出
|
| 354 |
return questions[0] if len(questions) > 0 else "", \
|
|
|
|
| 356 |
questions[2] if len(questions) > 2 else "", \
|
| 357 |
formatted_transcript_json, \
|
| 358 |
summary, \
|
| 359 |
+
mind_map, \
|
| 360 |
html_content, \
|
| 361 |
first_image, \
|
| 362 |
+
first_text,
|
| 363 |
|
| 364 |
|
| 365 |
def format_transcript_to_html(formatted_transcript):
|
|
|
|
| 407 |
soup = BeautifulSoup(response.content, 'html.parser')
|
| 408 |
return soup.get_text()
|
| 409 |
|
| 410 |
+
def get_mind_map(video_id, df_string):
|
| 411 |
+
# 先抓 g drive 看看有沒有 {video_id}_mind_map.json
|
| 412 |
+
print("===get_mind_map===")
|
| 413 |
+
service = init_drive_service()
|
| 414 |
+
parent_folder_id = '1GgI4YVs0KckwStVQkLa1NZ8IpaEMurkL'
|
| 415 |
+
folder_id = create_folder_if_not_exists(service, video_id, parent_folder_id)
|
| 416 |
+
file_name = f'{video_id}_mind_map.json'
|
| 417 |
+
|
| 418 |
+
# 检查檔案是否存在
|
| 419 |
+
exists, file_id = check_file_exists(service, folder_id, file_name)
|
| 420 |
+
if not exists:
|
| 421 |
+
mind_map = generate_mind_map(df_string)
|
| 422 |
+
mind_map_json = {"mind_map": str(mind_map)}
|
| 423 |
+
mind_map_text = json.dumps(mind_map_json, ensure_ascii=False, indent=2)
|
| 424 |
+
upload_content_directly(service, file_name, folder_id, mind_map_text)
|
| 425 |
+
print("mind_map已上傳到Google Drive")
|
| 426 |
+
else:
|
| 427 |
+
# mindmap已存在,下载内容
|
| 428 |
+
print("mind_map已存在于Google Drive中")
|
| 429 |
+
mind_map_text = download_file_as_string(service, file_id)
|
| 430 |
+
mind_map_json = json.loads(mind_map_text)
|
| 431 |
+
|
| 432 |
+
return mind_map_json
|
| 433 |
+
|
| 434 |
+
def generate_mind_map(df_string):
|
| 435 |
+
# 使用 OpenAI 生成基于上传数据的问题
|
| 436 |
+
sys_content = "你是一個擅長資料分析跟影片教學的老師,user 為學生,請精讀資料文本,自行判斷資料的種類,使用 zh-TW"
|
| 437 |
+
user_content = f"""
|
| 438 |
+
請根據 {df_string} 文本建立 markdown 心智圖
|
| 439 |
+
注意:不需要前後文敘述,直接給出 markdown 文本即可
|
| 440 |
+
這對我很重要
|
| 441 |
+
"""
|
| 442 |
+
messages = [
|
| 443 |
+
{"role": "system", "content": sys_content},
|
| 444 |
+
{"role": "user", "content": user_content}
|
| 445 |
+
]
|
| 446 |
+
|
| 447 |
+
request_payload = {
|
| 448 |
+
"model": "gpt-4-1106-preview",
|
| 449 |
+
"messages": messages,
|
| 450 |
+
"max_tokens": 4000,
|
| 451 |
+
}
|
| 452 |
+
|
| 453 |
+
response = client.chat.completions.create(**request_payload)
|
| 454 |
+
mind_map = response.choices[0].message.content.strip()
|
| 455 |
+
print("=====mind_map=====")
|
| 456 |
+
print(mind_map)
|
| 457 |
+
print("=====mind_map=====")
|
| 458 |
+
|
| 459 |
+
return mind_map
|
| 460 |
+
|
| 461 |
def processed_video_summary_to_json(summary):
|
| 462 |
"""
|
| 463 |
整體格式為:
|
|
|
|
| 500 |
|
| 501 |
return summary_json
|
| 502 |
|
|
|
|
| 503 |
# get video_id_summary.json content
|
| 504 |
def get_video_id_summary(video_id, df_string):
|
| 505 |
print("===get_video_id_summary===")
|
|
|
|
| 830 |
btn_3 = gr.Button()
|
| 831 |
gr.Markdown("## 重新生成問題")
|
| 832 |
btn_create_question = gr.Button("Create Questions")
|
| 833 |
+
with gr.Tab("心智圖"):
|
| 834 |
+
mind_map = gr.Textbox()
|
| 835 |
|
| 836 |
send_button.click(
|
| 837 |
respond,
|
|
|
|
| 849 |
file_upload.change(process_file, inputs=file_upload, outputs=[btn_1, btn_2, btn_3, df_summarise, df_string_output])
|
| 850 |
|
| 851 |
# 当输入 YouTube 链接时触发
|
| 852 |
+
youtube_link.change(process_youtube_link, inputs=youtube_link, outputs=[btn_1, btn_2, btn_3, df_string_output, df_summarise, mind_map, transcript_html, slide_image, slide_text])
|
| 853 |
|
| 854 |
# 当输入网页链接时触发
|
| 855 |
web_link.change(process_web_link, inputs=web_link, outputs=[btn_1, btn_2, btn_3, df_summarise, df_string_output])
|