Spaces:

cwadayi
/

Hf1

Sleeping

App Files Files Community

cwadayi commited on Dec 21, 2025

Commit

dc8b5ff

verified ·

1 Parent(s): fc2fc31

Update app.py

Browse files

Files changed (1) hide show

app.py +49 -40

app.py CHANGED Viewed

@@ -3,74 +3,83 @@ import os
 import torch
 from transformers import BertTokenizer, pipeline
-# 1. 讀取安全金鑰
 hf_token = os.getenv("HF_TOKEN")
 model_name = "google-bert/bert-base-chinese"
 try:
-    # 2. 載入編碼器並擴充字典 (對應書中 2.3.6 節)
     tokenizer = BertTokenizer.from_pretrained(model_name, token=hf_token)
-    tokenizer.add_tokens(['明月', '裝飾', '窗子'])
-    # 3. 載入推理管線
     classifier = pipeline("sentiment-analysis", model="LiYuan/amazon-review-sentiment-analysis", token=hf_token)
 except Exception as e:
-    print(f"初始化失敗: {e}")
     classifier = None
-def enhanced_nlp_workflow(input_text):
-    if classifier is None: return "系統初始化失敗"
-    # 將輸入按行拆分，實作批次處理
     lines = [line.strip() for line in input_text.split('\n') if line.strip()]
-    if not lines: return "請輸入文字"
-    # 4. 實作批次編碼函數 (對應書中 2.3.5 節)
     batch_out = tokenizer.batch_encode_plus(
         lines,
         add_special_tokens=True,
-        truncation=True,
         padding='max_length',
-        max_length=20, # 縮短長度方便在介面觀察
         return_tensors="pt"
     )
-    # 5. 執行批次推理
     results = classifier(lines)
-    # 整理輸出資訊 (對應書中表 2-2)
-    output = []
     for i, line in enumerate(lines):
-        res = results[i]
         ids = batch_out['input_ids'][i].tolist()
-        mask = batch_out['attention_mask'][i].tolist()
-        # 移除 [PAD] 以利閱讀解碼結果
-        clean_ids = [idx for idx in ids if idx != 0]
-        decoded = tokenizer.decode(clean_ids)
-        output.append({
-            "原始句子": line,
-            "情感標籤": f"{res['label']} ({res['score']:.2f})",
-            "還原結果": decoded,
-            "Input IDs": clean_ids,
-            "Attention Mask (前10位)": mask[:10] # 呈現 PAD 邏輯
         })
-    return output
-# 6. 建立 Gradio 介面
-demo = gr.Interface(
-    fn=enhanced_nlp_workflow,
-    inputs=gr.Textbox(
-        label="請輸入中文句子 (支援多行批次輸入)",
-        lines=3,
-        placeholder="第一行：明月裝飾了你的窗子\n第二行：這本書真的非常實用"
-    ),
-    outputs=gr.JSON(label="強化版標準流程輸出結果"),
-    title="CNLP 強化實作：Hugging Face 批次處理與編碼解密",
-    description="本程式完整實作李福林老師書中第一、二章的所有關鍵功能：批次編碼、字典擴充與解碼驗證。"
-)
 if __name__ == "__main__":
     demo.launch()

 import torch
 from transformers import BertTokenizer, pipeline
+# 1. 安全金鑰與模型初始化
 hf_token = os.getenv("HF_TOKEN")
 model_name = "google-bert/bert-base-chinese"
 try:
+    # 載入並擴充字典（實作書中 2.3.6 節）
     tokenizer = BertTokenizer.from_pretrained(model_name, token=hf_token)
+    tokenizer.add_tokens(['明月', '裝飾', '窗子', '夢境']) # 創意擴充詞彙
+    # 載入推理管線
     classifier = pipeline("sentiment-analysis", model="LiYuan/amazon-review-sentiment-analysis", token=hf_token)
 except Exception as e:
+    tokenizer = None
     classifier = None
+def creative_nlp_lab(input_text):
+    if not tokenizer or not classifier: return "系統初始化失敗，請檢查 Secret 設定。"
     lines = [line.strip() for line in input_text.split('\n') if line.strip()]
+    if not lines: return "請輸入文字來開啟實驗！"
+    # 2. 執行批次編碼（實作書中 2.3.5 節）
     batch_out = tokenizer.batch_encode_plus(
         lines,
         add_special_tokens=True,
         padding='max_length',
+        max_length=15,
+        truncation=True,
         return_tensors="pt"
     )
     results = classifier(lines)
+    lab_reports = []
     for i, line in enumerate(lines):
         ids = batch_out['input_ids'][i].tolist()
+        # 實作書中 decode 驗證功能
+        tokens = [tokenizer.decode([idx]) for idx in ids if idx != 0]
+        # 創意功能：詞元化視覺呈現
+        visual_tokens = " | ".join(tokens)
+        # 創意功能：語境風格分析（基於關鍵字與情緒）
+        style = "現代散文"
+        if "明月" in line or "窗" in line: style = "經典詩意"
+        lab_reports.append({
+            "🔬 實驗對象": line,
+            "🎨 語境風格": style,
+            "🎭 情感色彩": f"{results[i]['label']} (強度: {results[i]['score']:.2f})",
+            "🧩 詞元拆解 (Tokens)": visual_tokens,
+            "🔢 機器編碼 (Input IDs)": [idx for idx in ids if idx != 0]
         })
+    return lab_reports
+# 3. 建立充滿創意的介面
+with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# 🚀 Hugging Face 中文 NLP 創意實驗室")
+    gr.Markdown("本實驗室基於《Hugging Face 自然語言處理實戰》架構，展示編碼器如何將感性的文字轉化為理性的數據。")
+    with gr.Row():
+        input_area = gr.Textbox(
+            label="輸入靈感（支援多行批次輸入）",
+            lines=4,
+            placeholder="例如：\n明月裝飾了你的窗子\n這本書讓 AI 變得簡單"
+        )
+    run_btn = gr.Button("開始實驗", variant="primary")
+    output_json = gr.JSON(label="實驗報告（實作書中表 2-2 數據架構）")
+    run_btn.click(fn=creative_nlp_lab, inputs=input_area, outputs=output_json)
+    gr.Examples(
+        examples=[["明月裝飾了你的窗子\n你裝飾了別人的夢"], ["HuggingFace 工具集真的好用"]],
+        inputs=input_area
+    )
 if __name__ == "__main__":
     demo.launch()