File size: 3,394 Bytes
1793f82 6ceffd5 1793f82 2af716e 6ceffd5 a7278e8 1793f82 a7278e8 2af716e a7278e8 2af716e fc2fc31 2af716e a7278e8 2af716e a7278e8 2af716e 6ceffd5 2af716e fc2fc31 2af716e 6ceffd5 2af716e fc2fc31 2af716e 1793f82 6ceffd5 dc8b5ff fc2fc31 2af716e dc8b5ff 2af716e 881be67 dc8b5ff 881be67 2af716e fc2fc31 6ceffd5 dc8b5ff 1793f82 2af716e dc8b5ff 2af716e dc8b5ff 2af716e dc8b5ff 2af716e dc8b5ff 1793f82 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 |
import gradio as gr
import os
from transformers import BertTokenizer, pipeline
# 1. 安全金鑰與工具初始化 (對應書中圖 1-3)
hf_token = os.getenv("HF_TOKEN")
model_name = "google-bert/bert-base-chinese"
try:
# 載入編碼器並擴充字典 (實作書中 2.3.6 節)
tokenizer = BertTokenizer.from_pretrained(model_name, token=hf_token)
tokenizer.add_tokens(['明月', '裝飾', '窗子', '李福林'])
# 載入情感分析管線 (第 5-6 章)
classifier = pipeline("sentiment-analysis", model="LiYuan/amazon-review-sentiment-analysis", token=hf_token)
# 載入命名實體識別管線 (第 10-11 章實戰任務)
# 使用專門處理中文實體的模型
ner_tagger = pipeline("ner", model="ckiplab/bert-base-chinese-ner", token=hf_token)
except Exception as e:
print(f"初始化錯誤: {e}")
tokenizer = classifier = ner_tagger = None
def advanced_nlp_lab(input_text):
if not tokenizer: return "⚠️ 系統初始化失敗"
lines = [line.strip() for line in input_text.split('\n') if line.strip()]
if not lines: return "💡 請輸入文字開始偵測!"
# 2. 實作進階批次編碼 (對應書中 2.3.5 節)
batch_out = tokenizer.batch_encode_plus(
lines,
add_special_tokens=True,
padding='max_length',
max_length=25,
truncation=True,
return_tensors="pt"
)
lab_reports = []
for i, line in enumerate(lines):
# 3. 實作 NER 實體偵測 (對應書中第 10 章任務)
ner_results = ner_tagger(line)
entities = [f"{entity['word']}({entity['entity']})" for entity in ner_results]
# 4. 情感分析 (對應書中第 7 章任務)
sentiment = classifier(line)[0]
ids = batch_out['input_ids'][i].tolist()
tokens = [tokenizer.decode([idx]) for idx in ids if idx != 0]
lab_reports.append({
"📝 原始句子": line,
"🔍 實體偵測 (NER)": entities if entities else "未偵測到實體",
"🎭 情感分析": f"{sentiment['label']} (信心: {sentiment['score']:.2f})",
"🧩 分詞結構": " | ".join(tokens),
"🔢 機器編碼 IDs": [idx for idx in ids if idx != 0]
})
return lab_reports
# 5. 建立專業風格 Blocks 介面
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("## 🕵️♂️ Hugging Face 中文語義全解析實驗室")
gr.Markdown("本程式整合了李福林著《Hugging Face 自然語言處理實戰》中的多項任務:從編碼矩陣到情感分類,再到命名實體識別。")
with gr.Row():
input_area = gr.Textbox(
label="🔮 請輸入包含人名、地名或情感的中文 (支援多行)",
lines=4,
placeholder="例如:李福林在北京寫下了這本 Hugging Face 實戰書。"
)
run_btn = gr.Button("🚀 啟動多維度語義解析", variant="primary")
output_json = gr.JSON(label="📊 深度解析報告 (實作書中第 2, 7, 10 章核心)")
run_btn.click(fn=advanced_nlp_lab, inputs=input_area, outputs=output_json)
gr.Examples(
examples=[["李福林在台北展示了 Transformer 的威力"], ["明月裝飾了你的窗子"]],
inputs=input_area
)
if __name__ == "__main__":
demo.launch()
|