Spaces:

JJS341
/

Coreference-Bot

Running

File size: 5,953 Bytes

3808d14
 
d6000e4
 
3808d14
d6000e4
 
 
 
 
 
3808d14
 
d6000e4
b9c7347
 
 
 
 
 
 
 
 
 
 
 
 
91694e5
3808d14
b9c7347
3aebd6c
d6000e4
91694e5
 
 
4eb2611
b9c7347
 
91694e5
4eb2611
b9c7347
4eb2611
0de769a
d6000e4
4eb2611
3808d14
4eb2611
0821432
3808d14
4eb2611
0821432
d6000e4
0821432
 
4eb2611
6b87ab3
d6000e4
 
0821432
 
4eb2611
6b87ab3
d6000e4
 
 
a4e3ada
4eb2611
0821432
55fce49
 
d6000e4
4eb2611
91694e5
 
 
 
 
 
 
 
 
 
 
 
 
 
4eb2611
91694e5
75183ac
d6000e4
4eb2611
 
 
55fce49
4eb2611
8d0b3cb
4eb2611
55fce49
bd6b4d0
 
9f1ede3
bd6b4d0
 
75183ac
4eb2611
 
a4e3ada
d6000e4
4eb2611
0821432
3808d14
4eb2611
 
0acc491
 
ba1029c
d6000e4
4eb2611
 
 
 
 
d6000e4
4eb2611
 
 
 
 
d6000e4
91694e5
 
4eb2611
 
 
 
 
 
3808d14
c8bb4b7
0acc491

import os
import sys
import warnings
import logging

# 1. 抑制所有討厭的非同步與資源回收警告 (解決 ValueError: Invalid file descriptor: -1)
warnings.filterwarnings("ignore", category=RuntimeWarning)
warnings.filterwarnings("ignore", message="Exception ignored in")
logging.getLogger("asyncio").setLevel(logging.ERROR)

# 2. 強制下載 Spacy 英文模型
os.system(f"{sys.executable} -m spacy download en_core_web_sm")

# 3. 解決新舊版本 Hub 相容性的 Mock 補丁
try:
    import huggingface_hub
    if not hasattr(huggingface_hub, 'HfFolder'):
        class MockHfFolder:
            @staticmethod
            def get_token(): return os.getenv("HF_TOKEN")
            @staticmethod
            def save_token(token): pass
        huggingface_hub.HfFolder = MockHfFolder
except:
    pass

import gradio as gr
import spacy
from fastcoref import FCoref
from deep_translator import GoogleTranslator

# 4. 初始化 Spacy NLP 詞性解析器與指代模型
print("🚀 [System] 正在初始化 NLP 詞性解析器...")
nlp = spacy.load("en_core_web_sm")

print("🚀 [System] 正在初始化 F-Coref 核心大腦...")
try:
    model = FCoref(model_name_or_path='biu-nlp/f-coref', device='cpu')
    print("✅ [System] 所有模型加載成功！")
except:
    model = FCoref('biu-nlp/f-coref', device='cpu')
    print("✅ [System] 備用路徑加載成功！")

# 5. 核心運算邏輯
def coref_learning_pipeline(user_input):
    if not user_input.strip():
        return "等待輸入...", "等待輸入...", "等待輸入..."
    
    try:
        # A. 判斷語言並進行中翻英橋接
        has_chinese = any('\u4e00' <= char <= '\u9fff' for char in user_input)
        
        if has_chinese:
            working_text = GoogleTranslator(source='zh-CN', target='en').translate(user_input)
            mode_notice = "中文輸入模式（已啟動 AI 跨語言橋接）"
            
            # ✨ 關鍵修正：輸入中文時，完整文本翻譯欄位輸出「英文」
            translation_text = f"【英文對照】\n{working_text}"
        else:
            working_text = user_input
            mode_notice = "英文原語模式"
            
            # ✨ 關鍵修正：輸入英文時，完整文本翻譯欄位輸出「繁體中文」
            translated_zh = GoogleTranslator(source='en', target='zh-TW').translate(working_text)
            translation_text = f"【中文翻譯】\n{translated_zh}"

        # B. 執行 AI 指代消解運算
        preds = model.predict(texts=[working_text])
        clusters = preds[0].get_clusters()
        
        # C. 建立【AI 智慧單字本】
        vocab_output = ""
        doc = nlp(working_text)
        extracted_words = set()
        
        for token in doc:
            if token.pos_ == "NOUN" and len(token.text) > 2:
                extracted_words.add(token.lemma_.lower())
        
        if extracted_words:
            for word in sorted(extracted_words):
                try:
                    word_zh = GoogleTranslator(source='en', target='zh-TW').translate(word)
                    vocab_output += f"🔸 {word.capitalize()} ➔ {word_zh}\n"
                except:
                    pass
        else:
            vocab_output = "ℹ️ 未偵測到適合學習的核心英文單字。"

        # D. 生成「AI 語意共指報告」
        report_text = f"✨ 系統狀態：{mode_notice}\n"
        report_text += f"📝 英文運算空間: {working_text}\n"
        report_text += "-----------------------------------------\n"
        if not clusters:
            report_text += "🔍 分析結果：指代關係明確，無需額外消解。"
        else:
            report_text += "🎯【實體連連看鏈結 (Entity Chains)】:\n"
            for i, cluster in enumerate(clusters):
                cluster_str_en = ' ↔ '.join(cluster)
                try:
                    translated_items = [GoogleTranslator(source='en', target='zh-TW').translate(item) for item in cluster]
                    cluster_str_zh = ' ↔ '.join(translated_items)
                except:
                    cluster_str_zh = cluster_str_en
                report_text += f" 🔗 鏈結 {i+1} (中): {cluster_str_zh}\n"
                report_text += f"    └─ (英): {cluster_str_en}\n"

        # 💥 這裡非常重要！必須依序回傳給前端的三個框框
        return translation_text, vocab_output, report_text

    except Exception as e:
        return f"錯誤: {str(e)}", "無法整合單字", f"運行異常: {str(e)}"

# 6. 精美 UI 介面設計 (將 theme 移出 Blocks 構造函數)
with gr.Blocks() as demo:
    gr.Markdown("# 🤖 AI 跨語言智慧語意學習終端")
    gr.Markdown("### 🚀 專題亮點：結合核心指代消解 (Coreference Resolution) 與 NLP 智慧名詞提取技術")
    
    with gr.Row():
        with gr.Column(scale=1):
            txt_input = gr.Textbox(
                label="📥 請輸入中文或英文段落 (Input Text)", 
                placeholder="例如：Mary is a Farmer. Mary had a little lamb.", 
                lines=5
            )
            btn_submit = gr.Button("🔥 執行多維度 AI 語意解析", variant="primary")
            
        with gr.Column(scale=1):
            out_translation = gr.Textbox(label="📖 完整文本翻譯/對照 (Translation/Context)", lines=3)
            out_vocab = gr.Textbox(label="📚 AI 智慧單字本 (Vocabulary Booklet)", lines=5)
            out_report = gr.Textbox(label="🎯 AI 語意消解報告 (Coreference Report)", lines=5)
            
    btn_submit.click(
        fn=coref_learning_pipeline, 
        inputs=txt_input, 
        outputs=[out_translation, out_vocab, out_report]
    )

if __name__ == "__main__":
    # ✨ 關鍵修正：按照最新規範，將 theme 傳入 launch() 方法中
    demo.launch(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="indigo"))