Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,10 +1,17 @@
|
|
| 1 |
import os
|
| 2 |
import sys
|
|
|
|
|
|
|
| 3 |
|
| 4 |
-
# 1.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
os.system(f"{sys.executable} -m spacy download en_core_web_sm")
|
| 6 |
|
| 7 |
-
#
|
| 8 |
try:
|
| 9 |
import huggingface_hub
|
| 10 |
if not hasattr(huggingface_hub, 'HfFolder'):
|
|
@@ -22,7 +29,7 @@ import spacy
|
|
| 22 |
from fastcoref import FCoref
|
| 23 |
from deep_translator import GoogleTranslator
|
| 24 |
|
| 25 |
-
#
|
| 26 |
print("🚀 [System] 正在初始化 NLP 詞性解析器...")
|
| 27 |
nlp = spacy.load("en_core_web_sm")
|
| 28 |
|
|
@@ -34,7 +41,7 @@ except:
|
|
| 34 |
model = FCoref('biu-nlp/f-coref', device='cpu')
|
| 35 |
print("✅ [System] 備用路徑加載成功!")
|
| 36 |
|
| 37 |
-
#
|
| 38 |
def coref_learning_pipeline(user_input):
|
| 39 |
if not user_input.strip():
|
| 40 |
return "等待輸入...", "等待輸入...", "等待輸入..."
|
|
@@ -42,56 +49,45 @@ def coref_learning_pipeline(user_input):
|
|
| 42 |
try:
|
| 43 |
# A. 判斷語言並進行中翻英橋接
|
| 44 |
has_chinese = any('\u4e00' <= char <= '\u9fff' for char in user_input)
|
|
|
|
| 45 |
if has_chinese:
|
| 46 |
working_text = GoogleTranslator(source='zh-CN', target='en').translate(user_input)
|
| 47 |
mode_notice = "中文輸入模式(已啟動 AI 跨語言橋接)"
|
| 48 |
|
| 49 |
-
#
|
| 50 |
-
translation_text = working_text
|
| 51 |
-
translation_label = "📖 完整文本翻譯 (中 ➔ 英)"
|
| 52 |
else:
|
| 53 |
working_text = user_input
|
| 54 |
mode_notice = "英文原語模式"
|
| 55 |
|
| 56 |
-
#
|
| 57 |
-
|
| 58 |
-
|
| 59 |
|
| 60 |
# B. 執行 AI 指代消解運算
|
| 61 |
preds = model.predict(texts=[working_text])
|
| 62 |
clusters = preds[0].get_clusters()
|
| 63 |
|
| 64 |
-
# C.
|
| 65 |
-
translation_text = GoogleTranslator(source='en', target='zh-TW').translate(working_text)
|
| 66 |
-
|
| 67 |
-
# D. 💥 欄位二:全新重構【AI 智慧單字本】(利用詞性精準抓取名詞,消滅人名)
|
| 68 |
vocab_output = ""
|
| 69 |
doc = nlp(working_text)
|
| 70 |
extracted_words = set()
|
| 71 |
-
|
| 72 |
-
# E. 欄位三:生成「AI 語意共指報告」
|
| 73 |
-
report_text = f"✨ 系統狀態:{mode_notice}\n"
|
| 74 |
-
report_text += f"📝 英文運算空間: {working_text}\n"
|
| 75 |
|
| 76 |
for token in doc:
|
| 77 |
-
# 只抓普通名詞 (NOUN),排除專有名詞/人名 (PROPN) 與代名詞 (PRON)
|
| 78 |
if token.pos_ == "NOUN" and len(token.text) > 2:
|
| 79 |
-
# 統一轉成單數原型原型,畫面更漂亮(例如把 lambs 變成 lamb)
|
| 80 |
extracted_words.add(token.lemma_.lower())
|
| 81 |
|
| 82 |
if extracted_words:
|
| 83 |
for word in sorted(extracted_words):
|
| 84 |
try:
|
| 85 |
-
# 翻成繁體中文
|
| 86 |
word_zh = GoogleTranslator(source='en', target='zh-TW').translate(word)
|
| 87 |
-
# 輸出格式:【Farmer ➔ 農夫】
|
| 88 |
vocab_output += f"🔸 {word.capitalize()} ➔ {word_zh}\n"
|
| 89 |
except:
|
| 90 |
pass
|
| 91 |
else:
|
| 92 |
vocab_output = "ℹ️ 未偵測到適合學習的核心英文單字。"
|
| 93 |
|
| 94 |
-
#
|
| 95 |
report_text = f"✨ 系統狀態:{mode_notice}\n"
|
| 96 |
report_text += f"📝 英文運算空間: {working_text}\n"
|
| 97 |
report_text += "-----------------------------------------\n"
|
|
@@ -109,31 +105,33 @@ def coref_learning_pipeline(user_input):
|
|
| 109 |
report_text += f" 🔗 鏈結 {i+1} (中): {cluster_str_zh}\n"
|
| 110 |
report_text += f" └─ (英): {cluster_str_en}\n"
|
| 111 |
|
|
|
|
| 112 |
return translation_text, vocab_output, report_text
|
| 113 |
|
| 114 |
except Exception as e:
|
| 115 |
return f"錯誤: {str(e)}", "無法整合單字", f"運行異常: {str(e)}"
|
| 116 |
|
| 117 |
-
#
|
| 118 |
with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="indigo")) as demo:
|
| 119 |
-
gr.Markdown("# 🤖 AI 跨語言智慧語意學習終端")
|
| 120 |
-
gr.Markdown("### 🚀 專
|
| 121 |
|
| 122 |
with gr.Row():
|
| 123 |
with gr.Column(scale=1):
|
| 124 |
txt_input = gr.Textbox(
|
| 125 |
label="📥 請輸入中文或英文段落 (Input Text)",
|
| 126 |
-
placeholder="例如:Mary is a
|
| 127 |
lines=5
|
| 128 |
)
|
| 129 |
btn_submit = gr.Button("🔥 執行多維度 AI 語意解析", variant="primary")
|
| 130 |
|
| 131 |
with gr.Column(scale=1):
|
| 132 |
-
|
| 133 |
-
|
| 134 |
out_vocab = gr.Textbox(label="📚 AI 智慧單字本 (Vocabulary Booklet)", lines=5)
|
| 135 |
out_report = gr.Textbox(label="🎯 AI 語意消解報告 (Coreference Report)", lines=5)
|
| 136 |
|
|
|
|
| 137 |
btn_submit.click(
|
| 138 |
fn=coref_learning_pipeline,
|
| 139 |
inputs=txt_input,
|
|
|
|
| 1 |
import os
|
| 2 |
import sys
|
| 3 |
+
import warnings
|
| 4 |
+
import logging
|
| 5 |
|
| 6 |
+
# 1. 抑制所有討厭的非同步與資源回收警告 (解決 ValueError: Invalid file descriptor: -1)
|
| 7 |
+
warnings.filterwarnings("ignore", category=RuntimeWarning)
|
| 8 |
+
warnings.filterwarnings("ignore", message="Exception ignored in")
|
| 9 |
+
logging.getLogger("asyncio").setLevel(logging.ERROR)
|
| 10 |
+
|
| 11 |
+
# 2. 強制下載 Spacy 英文模型
|
| 12 |
os.system(f"{sys.executable} -m spacy download en_core_web_sm")
|
| 13 |
|
| 14 |
+
# 3. 解決新舊版本 Hub 相容性的 Mock 補丁
|
| 15 |
try:
|
| 16 |
import huggingface_hub
|
| 17 |
if not hasattr(huggingface_hub, 'HfFolder'):
|
|
|
|
| 29 |
from fastcoref import FCoref
|
| 30 |
from deep_translator import GoogleTranslator
|
| 31 |
|
| 32 |
+
# 4. 初始化 Spacy NLP 詞性解析器與指代模型
|
| 33 |
print("🚀 [System] 正在初始化 NLP 詞性解析器...")
|
| 34 |
nlp = spacy.load("en_core_web_sm")
|
| 35 |
|
|
|
|
| 41 |
model = FCoref('biu-nlp/f-coref', device='cpu')
|
| 42 |
print("✅ [System] 備用路徑加載成功!")
|
| 43 |
|
| 44 |
+
# 5. 核心運算邏輯
|
| 45 |
def coref_learning_pipeline(user_input):
|
| 46 |
if not user_input.strip():
|
| 47 |
return "等待輸入...", "等待輸入...", "等待輸入..."
|
|
|
|
| 49 |
try:
|
| 50 |
# A. 判斷語言並進行中翻英橋接
|
| 51 |
has_chinese = any('\u4e00' <= char <= '\u9fff' for char in user_input)
|
| 52 |
+
|
| 53 |
if has_chinese:
|
| 54 |
working_text = GoogleTranslator(source='zh-CN', target='en').translate(user_input)
|
| 55 |
mode_notice = "中文輸入模式(已啟動 AI 跨語言橋接)"
|
| 56 |
|
| 57 |
+
# ✨ 關鍵修正:輸入中文時,完整文本翻譯欄位輸出「英文」
|
| 58 |
+
translation_text = f"【英文對照】\n{working_text}"
|
|
|
|
| 59 |
else:
|
| 60 |
working_text = user_input
|
| 61 |
mode_notice = "英文原語模式"
|
| 62 |
|
| 63 |
+
# ✨ 關鍵修正:輸入英文時,完整文本翻譯欄位輸出「繁體中文」
|
| 64 |
+
translated_zh = GoogleTranslator(source='en', target='zh-TW').translate(working_text)
|
| 65 |
+
translation_text = f"【中文翻譯】\n{translated_zh}"
|
| 66 |
|
| 67 |
# B. 執行 AI 指代消解運算
|
| 68 |
preds = model.predict(texts=[working_text])
|
| 69 |
clusters = preds[0].get_clusters()
|
| 70 |
|
| 71 |
+
# C. 建立【AI 智慧單字本】
|
|
|
|
|
|
|
|
|
|
| 72 |
vocab_output = ""
|
| 73 |
doc = nlp(working_text)
|
| 74 |
extracted_words = set()
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
|
| 76 |
for token in doc:
|
|
|
|
| 77 |
if token.pos_ == "NOUN" and len(token.text) > 2:
|
|
|
|
| 78 |
extracted_words.add(token.lemma_.lower())
|
| 79 |
|
| 80 |
if extracted_words:
|
| 81 |
for word in sorted(extracted_words):
|
| 82 |
try:
|
|
|
|
| 83 |
word_zh = GoogleTranslator(source='en', target='zh-TW').translate(word)
|
|
|
|
| 84 |
vocab_output += f"🔸 {word.capitalize()} ➔ {word_zh}\n"
|
| 85 |
except:
|
| 86 |
pass
|
| 87 |
else:
|
| 88 |
vocab_output = "ℹ️ 未偵測到適合學習的核心英文單字。"
|
| 89 |
|
| 90 |
+
# D. 生成「AI 語意共指報告」
|
| 91 |
report_text = f"✨ 系統狀態:{mode_notice}\n"
|
| 92 |
report_text += f"📝 英文運算空間: {working_text}\n"
|
| 93 |
report_text += "-----------------------------------------\n"
|
|
|
|
| 105 |
report_text += f" 🔗 鏈結 {i+1} (中): {cluster_str_zh}\n"
|
| 106 |
report_text += f" └─ (英): {cluster_str_en}\n"
|
| 107 |
|
| 108 |
+
# 💥 這裡非常重要!必須依序回傳給前端的三個框框
|
| 109 |
return translation_text, vocab_output, report_text
|
| 110 |
|
| 111 |
except Exception as e:
|
| 112 |
return f"錯誤: {str(e)}", "無法整合單字", f"運行異常: {str(e)}"
|
| 113 |
|
| 114 |
+
# 6. 精美 UI 介面設計
|
| 115 |
with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="indigo")) as demo:
|
| 116 |
+
gr.Markdown("# 🤖 Janice's AI 跨語言智慧語意學習終端")
|
| 117 |
+
gr.Markdown("### 🚀 專題亮點:結合核心指代消解 (Coreference Resolution) 與 NLP 智慧名詞提取技術")
|
| 118 |
|
| 119 |
with gr.Row():
|
| 120 |
with gr.Column(scale=1):
|
| 121 |
txt_input = gr.Textbox(
|
| 122 |
label="📥 請輸入中文或英文段落 (Input Text)",
|
| 123 |
+
placeholder="例如:Mary is a Farmer. Mary had a little lamb.",
|
| 124 |
lines=5
|
| 125 |
)
|
| 126 |
btn_submit = gr.Button("🔥 執行多維度 AI 語意解析", variant="primary")
|
| 127 |
|
| 128 |
with gr.Column(scale=1):
|
| 129 |
+
# 前端三個輸出元件定義
|
| 130 |
+
out_translation = gr.Textbox(label="📖 完整文本翻譯/對照 (Translation/Context)", lines=3)
|
| 131 |
out_vocab = gr.Textbox(label="📚 AI 智慧單字本 (Vocabulary Booklet)", lines=5)
|
| 132 |
out_report = gr.Textbox(label="🎯 AI 語意消解報告 (Coreference Report)", lines=5)
|
| 133 |
|
| 134 |
+
# 點擊按鈕時,將三個輸出結果精準對接到各自的 Textbox 組件
|
| 135 |
btn_submit.click(
|
| 136 |
fn=coref_learning_pipeline,
|
| 137 |
inputs=txt_input,
|