Spaces:
Sleeping
Sleeping
def summary_add_markdown_version(video_id):
Browse files
app.py
CHANGED
|
@@ -1549,6 +1549,132 @@ def create_LLM_content(video_id, df_string, kind):
|
|
| 1549 |
|
| 1550 |
return gr.update(value=content, interactive=False)
|
| 1551 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1552 |
# AI 生成教學素材
|
| 1553 |
def get_meta_data(video_id, source="gcs"):
|
| 1554 |
if source == "gcs":
|
|
@@ -2440,22 +2566,30 @@ with gr.Blocks(theme=gr.themes.Base(primary_hue=gr.themes.colors.orange, seconda
|
|
| 2440 |
df_string_output = gr.Textbox(lines=40, label="Data Text", interactive=False, show_copy_button=True)
|
| 2441 |
with gr.Tab("文章本文"):
|
| 2442 |
with gr.Row() as reading_passage_admin:
|
| 2443 |
-
|
| 2444 |
-
|
| 2445 |
-
|
| 2446 |
-
|
| 2447 |
-
|
| 2448 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2449 |
with gr.Row():
|
| 2450 |
reading_passage_text = gr.Textbox(label="reading_passage", lines=40, interactive=False, show_copy_button=True)
|
| 2451 |
with gr.Tab("重點摘要本文"):
|
| 2452 |
with gr.Row() as summary_admmin:
|
| 2453 |
-
|
| 2454 |
-
|
| 2455 |
-
|
| 2456 |
-
|
| 2457 |
-
|
| 2458 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2459 |
with gr.Row():
|
| 2460 |
summary_text = gr.Textbox(label="Summary", lines=40, interactive=False, show_copy_button=True)
|
| 2461 |
with gr.Tab("關鍵時刻本文"):
|
|
@@ -2656,6 +2790,11 @@ with gr.Blocks(theme=gr.themes.Base(primary_hue=gr.themes.colors.orange, seconda
|
|
| 2656 |
# web_link.change(process_web_link, inputs=web_link, outputs=[btn_1, btn_2, btn_3, df_summarise, df_string_output])
|
| 2657 |
|
| 2658 |
# reading_passage event
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2659 |
reading_passage_get_button.click(
|
| 2660 |
get_LLM_content,
|
| 2661 |
inputs=[video_id, reading_passage_kind],
|
|
@@ -2683,6 +2822,11 @@ with gr.Blocks(theme=gr.themes.Base(primary_hue=gr.themes.colors.orange, seconda
|
|
| 2683 |
)
|
| 2684 |
|
| 2685 |
# summary event
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2686 |
summary_get_button.click(
|
| 2687 |
get_LLM_content,
|
| 2688 |
inputs=[video_id, summary_kind],
|
|
|
|
| 1549 |
|
| 1550 |
return gr.update(value=content, interactive=False)
|
| 1551 |
|
| 1552 |
+
# ---- LLM refresh CRUD ----
|
| 1553 |
+
def reading_passage_add_latex_version(video_id):
|
| 1554 |
+
# 確認 GCS 是否有 reading_passage.json
|
| 1555 |
+
print("===reading_passage_convert_to_latex===")
|
| 1556 |
+
gcs_client = GCS_CLIENT
|
| 1557 |
+
bucket_name = 'video_ai_assistant'
|
| 1558 |
+
file_name = f'{video_id}_reading_passage.json'
|
| 1559 |
+
blob_name = f"{video_id}/{file_name}"
|
| 1560 |
+
print(f"blob_name: {blob_name}")
|
| 1561 |
+
|
| 1562 |
+
# 检查檔案是否存在
|
| 1563 |
+
is_file_exists = GCS_SERVICE.check_file_exists(bucket_name, blob_name)
|
| 1564 |
+
if not is_file_exists:
|
| 1565 |
+
raise gr.Error("reading_passage 不存在!")
|
| 1566 |
+
|
| 1567 |
+
# 逐字稿已存在,下载逐字稿内容
|
| 1568 |
+
print("reading_passage 已存在于GCS中,轉換 Latex 模式")
|
| 1569 |
+
reading_passage_text = download_blob_to_string(gcs_client, bucket_name, blob_name)
|
| 1570 |
+
reading_passage_json = json.loads(reading_passage_text)
|
| 1571 |
+
original_reading_passage = reading_passage_json["reading_passage"]
|
| 1572 |
+
sys_content = "你是一個擅長資料分析跟影片教學的老師,user 為學生,請精讀資料文本,自行判斷資料的種類,使用 zh-TW"
|
| 1573 |
+
user_content = f"""
|
| 1574 |
+
請根據 {original_reading_passage}
|
| 1575 |
+
敘述中,請把數學或是專業術語,用 Latex 包覆($...$),並且不要去改原本的文章
|
| 1576 |
+
加減乘除、根號、次方、化學符號、物理符號等等的運算式口語也換成 LATEX 符號
|
| 1577 |
+
請一定要使用繁體中文 zh-TW,並用台灣人的口語
|
| 1578 |
+
產生的結果不要前後文解釋,也不要敘述這篇文章怎麼產生的
|
| 1579 |
+
只需要專注提供 Reading Passage,字數在 200~500 字以內
|
| 1580 |
+
"""
|
| 1581 |
+
messages = [
|
| 1582 |
+
{"role": "system", "content": sys_content},
|
| 1583 |
+
{"role": "user", "content": user_content}
|
| 1584 |
+
]
|
| 1585 |
+
|
| 1586 |
+
request_payload = {
|
| 1587 |
+
"model": "gpt-4-turbo",
|
| 1588 |
+
"messages": messages,
|
| 1589 |
+
"max_tokens": 4000,
|
| 1590 |
+
}
|
| 1591 |
+
|
| 1592 |
+
response = OPEN_AI_CLIENT.chat.completions.create(**request_payload)
|
| 1593 |
+
new_reading_passage = response.choices[0].message.content.strip()
|
| 1594 |
+
print("=====new_reading_passage=====")
|
| 1595 |
+
print(new_reading_passage)
|
| 1596 |
+
print("=====new_reading_passage=====")
|
| 1597 |
+
reading_passage_json["reading_passage"] = new_reading_passage
|
| 1598 |
+
reading_passage_text = json.dumps(reading_passage_json, ensure_ascii=False, indent=2)
|
| 1599 |
+
# 另存為 reading_passage_latex.json
|
| 1600 |
+
new_file_name = f'{video_id}_reading_passage_latex.json'
|
| 1601 |
+
new_blob_name = f"{video_id}/{new_file_name}"
|
| 1602 |
+
upload_file_to_gcs_with_json_string(gcs_client, bucket_name, new_blob_name, reading_passage_text)
|
| 1603 |
+
|
| 1604 |
+
return new_reading_passage
|
| 1605 |
+
|
| 1606 |
+
def summary_add_markdown_version(video_id):
|
| 1607 |
+
# 確認 GCS 是否有 summary.json
|
| 1608 |
+
print("===summary_convert_to_markdown===")
|
| 1609 |
+
gcs_client = GCS_CLIENT
|
| 1610 |
+
bucket_name = 'video_ai_assistant'
|
| 1611 |
+
file_name = f'{video_id}_summary.json'
|
| 1612 |
+
blob_name = f"{video_id}/{file_name}"
|
| 1613 |
+
print(f"blob_name: {blob_name}")
|
| 1614 |
+
|
| 1615 |
+
# 检查檔案是否存在
|
| 1616 |
+
is_file_exists = GCS_SERVICE.check_file_exists(bucket_name, blob_name)
|
| 1617 |
+
if not is_file_exists:
|
| 1618 |
+
raise gr.Error("summary 不存在!")
|
| 1619 |
+
|
| 1620 |
+
# 逐字稿已存在,下载逐字稿内容
|
| 1621 |
+
print("summary 已存在于GCS中,轉換 Markdown 模式")
|
| 1622 |
+
summary_text = download_blob_to_string(gcs_client, bucket_name, blob_name)
|
| 1623 |
+
summary_json = json.loads(summary_text)
|
| 1624 |
+
original_summary = summary_json["summary"]
|
| 1625 |
+
sys_content = "你是一個擅長資料分析跟影片教學的老師,user 為學生,請精讀資料文本,自行判斷資料的種類,使用 zh-TW"
|
| 1626 |
+
user_content = f"""
|
| 1627 |
+
請根據 {original_summary}
|
| 1628 |
+
轉換格式為 Markdown
|
| 1629 |
+
只保留:📚 整體摘要、🔖 重點概念、💡 為什麼我們要學這個、❓ 延伸小問題
|
| 1630 |
+
其他的不要保留
|
| 1631 |
+
整體摘要在一百字以內
|
| 1632 |
+
重點概念轉成 bullet points
|
| 1633 |
+
以及可能的結論與結尾延伸小問題提供學生作反思
|
| 1634 |
+
敘述中,請把數學或是專業術語,用 Latex 包覆($...$)
|
| 1635 |
+
加減乘除、根號、次方等等的運算式口語也換成 LATEX 數學符號
|
| 1636 |
+
|
| 1637 |
+
整體格式為:
|
| 1638 |
+
## 📚 整體摘要
|
| 1639 |
+
- (一個 bullet point....)
|
| 1640 |
+
|
| 1641 |
+
## 🔖 重點概念
|
| 1642 |
+
- xxx
|
| 1643 |
+
- xxx
|
| 1644 |
+
- xxx
|
| 1645 |
+
|
| 1646 |
+
## 💡 為什麼我們要學這個?
|
| 1647 |
+
- (一個 bullet point....)
|
| 1648 |
+
|
| 1649 |
+
## ❓ 延伸小問題
|
| 1650 |
+
- (一個 bullet point....)
|
| 1651 |
+
"""
|
| 1652 |
+
messages = [
|
| 1653 |
+
{"role": "system", "content": sys_content},
|
| 1654 |
+
{"role": "user", "content": user_content}
|
| 1655 |
+
]
|
| 1656 |
+
request_payload = {
|
| 1657 |
+
"model": "gpt-4-turbo",
|
| 1658 |
+
"messages": messages,
|
| 1659 |
+
"max_tokens": 4000,
|
| 1660 |
+
}
|
| 1661 |
+
response = OPEN_AI_CLIENT.chat.completions.create(**request_payload)
|
| 1662 |
+
new_summary = response.choices[0].message.content.strip()
|
| 1663 |
+
print("=====new_summary=====")
|
| 1664 |
+
print(new_summary)
|
| 1665 |
+
print("=====new_summary=====")
|
| 1666 |
+
summary_json["summary"] = new_summary
|
| 1667 |
+
summary_text = json.dumps(summary_json, ensure_ascii=False, indent=2)
|
| 1668 |
+
# 另存為 summary_markdown.json
|
| 1669 |
+
new_file_name = f'{video_id}_summary_markdown.json'
|
| 1670 |
+
new_blob_name = f"{video_id}/{new_file_name}"
|
| 1671 |
+
upload_file_to_gcs_with_json_string(gcs_client, bucket_name, new_blob_name, summary_text)
|
| 1672 |
+
|
| 1673 |
+
return new_summary
|
| 1674 |
+
|
| 1675 |
+
|
| 1676 |
+
|
| 1677 |
+
|
| 1678 |
# AI 生成教學素材
|
| 1679 |
def get_meta_data(video_id, source="gcs"):
|
| 1680 |
if source == "gcs":
|
|
|
|
| 2566 |
df_string_output = gr.Textbox(lines=40, label="Data Text", interactive=False, show_copy_button=True)
|
| 2567 |
with gr.Tab("文章本文"):
|
| 2568 |
with gr.Row() as reading_passage_admin:
|
| 2569 |
+
with gr.Column():
|
| 2570 |
+
with gr.Row():
|
| 2571 |
+
reading_passage_kind = gr.Textbox(value="reading_passage", show_label=False)
|
| 2572 |
+
with gr.Row():
|
| 2573 |
+
reading_passage_text_to_latex = gr.Button("新增 LaTeX", size="sm", variant="primary")
|
| 2574 |
+
reading_passage_get_button = gr.Button("取得", size="sm", variant="primary")
|
| 2575 |
+
reading_passage_edit_button = gr.Button("編輯", size="sm", variant="primary")
|
| 2576 |
+
reading_passage_update_button = gr.Button("更新", size="sm", variant="primary")
|
| 2577 |
+
reading_passage_delete_button = gr.Button("刪除", size="sm", variant="primary")
|
| 2578 |
+
reading_passage_create_button = gr.Button("建立", size="sm", variant="primary")
|
| 2579 |
with gr.Row():
|
| 2580 |
reading_passage_text = gr.Textbox(label="reading_passage", lines=40, interactive=False, show_copy_button=True)
|
| 2581 |
with gr.Tab("重點摘要本文"):
|
| 2582 |
with gr.Row() as summary_admmin:
|
| 2583 |
+
with gr.Column():
|
| 2584 |
+
with gr.Row():
|
| 2585 |
+
summary_kind = gr.Textbox(value="summary", show_label=False)
|
| 2586 |
+
with gr.Row():
|
| 2587 |
+
summary_to_markdown = gr.Button("新增 Markdown", size="sm", variant="primary")
|
| 2588 |
+
summary_get_button = gr.Button("取得", size="sm", variant="primary")
|
| 2589 |
+
summary_edit_button = gr.Button("編輯", size="sm", variant="primary")
|
| 2590 |
+
summary_update_button = gr.Button("更新", size="sm", variant="primary")
|
| 2591 |
+
summary_delete_button = gr.Button("刪除", size="sm", variant="primary")
|
| 2592 |
+
summary_create_button = gr.Button("建立", size="sm", variant="primary")
|
| 2593 |
with gr.Row():
|
| 2594 |
summary_text = gr.Textbox(label="Summary", lines=40, interactive=False, show_copy_button=True)
|
| 2595 |
with gr.Tab("關鍵時刻本文"):
|
|
|
|
| 2790 |
# web_link.change(process_web_link, inputs=web_link, outputs=[btn_1, btn_2, btn_3, df_summarise, df_string_output])
|
| 2791 |
|
| 2792 |
# reading_passage event
|
| 2793 |
+
reading_passage_text_to_latex.click(
|
| 2794 |
+
reading_passage_add_latex_version,
|
| 2795 |
+
inputs=[video_id],
|
| 2796 |
+
outputs=[reading_passage_text]
|
| 2797 |
+
)
|
| 2798 |
reading_passage_get_button.click(
|
| 2799 |
get_LLM_content,
|
| 2800 |
inputs=[video_id, reading_passage_kind],
|
|
|
|
| 2822 |
)
|
| 2823 |
|
| 2824 |
# summary event
|
| 2825 |
+
summary_to_markdown.click(
|
| 2826 |
+
summary_add_markdown_version,
|
| 2827 |
+
inputs=[video_id],
|
| 2828 |
+
outputs=[summary_text]
|
| 2829 |
+
)
|
| 2830 |
summary_get_button.click(
|
| 2831 |
get_LLM_content,
|
| 2832 |
inputs=[video_id, summary_kind],
|