Spaces:

GCLing
/

emotion

Runtime error

App Files Files Community

GCLing commited on Jun 16, 2025

Commit

c7ec63e

verified ·

1 Parent(s): 41762c4

Update app.py

Browse files

Files changed (1) hide show

app.py +124 -124

app.py CHANGED Viewed

@@ -1,124 +1,124 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-import os
-import joblib
-import numpy as np
-import librosa
-import gradio as gr
-from huggingface_hub import hf_hub_download
-from deepface import DeepFace
-from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
-# --- 1. 下載並載入 SVM 模型 ---
-# 這裡 repo_id 填你的模型倉庫路徑，例如 "GCLing/emotion-svm-model"
-# filename 填上傳到該倉庫的檔案名，例如 "svm_emotion_model.joblib"
-print("Downloading SVM model from Hugging Face Hub...")
-model_path = hf_hub_download(repo_id="GCLing/emotion-svm-model", filename="svm_emotion_model.joblib")
-print(f"SVM model downloaded to: {model_path}")
-svm_model = joblib.load(model_path)
-print("SVM model loaded.")
-# --- 2. 載入文字情緒分析模型 ---
-# 以 uer/roberta-base-finetuned-chinanews-chinese 為例；可替換成其他合適的中文情感分類模型
-print("Loading text sentiment model...")
-tokenizer = AutoTokenizer.from_pretrained("uer/roberta-base-finetuned-chinanews-chinese")
-model_txt = AutoModelForSequenceClassification.from_pretrained("uer/roberta-base-finetuned-chinanews-chinese")
-text_emotion = pipeline("sentiment-analysis", model=model_txt, tokenizer=tokenizer)
-print("Text sentiment model loaded.")
-# --- 3. 聲音特徵擷取函式 ---
-def extract_feature(signal: np.ndarray, sr: int) -> np.ndarray:
-    """
-    從一段音訊 signal (numpy array) 和取樣率 sr 計算 MFCC 特徵 (13 維)，
-    並回傳平均與變異組成的特徵向量 (共 26 維)。
-    """
-    # librosa 載入後 signal 為 float numpy array
-    mfcc = librosa.feature.mfcc(y=signal, sr=sr, n_mfcc=13)
-    # axis=1: 每個 MFCC 維度對時間做平均與變異數
-    return np.concatenate([np.mean(mfcc, axis=1), np.var(mfcc, axis=1)])
-# --- 4. 三種預測函式 ---
-def predict_face(img: np.ndarray):
-    """
-    臉部情緒分析：使用 DeepFace 分析單張影像 (numpy array, HxWx3)。
-    強制使用 OpenCV 後端以避免 retinaface/tf 版本衝突。
-    回傳格式為 dict，例如 {"happy": 0.80, "sad": 0.05, ...}
-    """
-    # DeepFace.analyze 可能較耗時，建議在 Space 上需有適當硬體
-    result = DeepFace.analyze(img, actions=["emotion"], detector_backend="opencv")
-    # result["emotion"] 是字典
-    return result["emotion"]
-def predict_voice(audio):
-    """
-    語音情緒分析：audio 由 Gradio 傳入，形式為暫存檔路徑字串 (str)。
-    用 librosa.load 讀取，再提取 MFCC 特徵，最後用 SVM 模型 predict_proba。
-    回傳格式為 dict，例如 {"angry":0.1, "happy":0.7, ...}
-    """
-    # audio 參數為 Gradio Audio 組件給的檔案路徑
-    signal, sr = librosa.load(audio, sr=None)
-    feat = extract_feature(signal, sr)
-    probs = svm_model.predict_proba([feat])[0]
-    labels = svm_model.classes_
-    return {labels[i]: float(probs[i]) for i in range(len(labels))}
-def predict_text(text: str):
-    """
-    文字情緒分析：使用 transformers pipeline，
-    輸入中文字串，回傳 dict，例如 {"POSITIVE":0.95} 或模型輸出標籤與信心分數。
-    """
-    if not text or text.strip() == "":
-        return {}
-    pred = text_emotion(text)[0]
-    # pred 形如 {"label": "...", "score": ...}
-    return {pred["label"]: float(pred["score"])}
-# --- 5. 建立 Gradio 介面 ---
-def build_interface():
-    """
-    建立一個 TabbedInterface，包含三個子 Interface：
-    - 臉部情緒 (Webcam 拍照或上傳)
-    - 語音情緒 (錄音或上傳音檔)
-    - 文字情緒 (文字輸入)
-    """
-    # 臉部情緒：使用 gr.Interface 或 Blocks?
-    face_interface = gr.Interface(
-        fn=predict_face,
-        inputs=gr.Image(sources="webcam", streaming=True, type="numpy"),
-        outputs=gr.Label(num_top_classes=1),
-        title="臉部情緒 (即時 Webcam)",
-        description="允許攝影機拍照後自動分析當前表情的情緒分佈。"
-    )
-    # 語音情緒：錄音或上傳
-    voice_interface = gr.Interface(
-        fn=predict_voice,
-        inputs=gr.Audio(sources="microphone", type="filepath"),
-        outputs=gr.Label(num_top_classes=1),
-        title="語音情緒",
-        description="錄製語音或上傳音訊檔，模型會回傳「驚訝/生氣/開心/悲傷/害怕」五種情緒機率。"
-    )
-    # 文字情緒：輸入中文
-    text_interface = gr.Interface(
-        fn=predict_text,
-        inputs=gr.Textbox(lines=3, placeholder="請輸入中文文字…"),
-        outputs=gr.Label(num_top_classes=1),
-        title="文字情緒",
-        description="輸入中文文字，即時判斷文字情緒並回傳標籤與信心分數。"
-    )
-    # 三合一 Tabs
-    app = gr.TabbedInterface(
-        interface_list=[face_interface, voice_interface, text_interface],
-        tab_names=["臉部情緒", "語音情緒", "文字情緒"]
-    )
-    return app
-if __name__ == "__main__":
-    # 可修改 port，如有多個服務可選不同 port
-    demo = build_interface()
-    # share=True 會產生臨時公開連結；若部署到 Spaces，可去掉 share 或留 False
-    demo.launch(server_name="0.0.0.0", server_port=7861, share=True)

+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+import os
+import joblib
+import numpy as np
+import librosa
+import gradio as gr
+from huggingface_hub import hf_hub_download
+from deepface import DeepFace
+from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
+# --- 1. 下載並載入 SVM 模型 ---
+# 這裡 repo_id 填你的模型倉庫路徑，例如 "GCLing/emotion-svm-model"
+# filename 填上傳到該倉庫的檔案名，例如 "svm_emotion_model.joblib"
+print("Downloading SVM model from Hugging Face Hub...")
+model_path = hf_hub_download(repo_id="GCLing/emotion-svm-model", filename="svm_emotion_model.joblib")
+print(f"SVM model downloaded to: {model_path}")
+svm_model = joblib.load(model_path)
+print("SVM model loaded.")
+# --- 2. 載入文字情緒分析模型 ---
+# 以 uer/roberta-base-finetuned-chinanews-chinese 為例；可替換成其他合適的中文情感分類模型
+print("Loading text sentiment model...")
+tokenizer = AutoTokenizer.from_pretrained("uer/roberta-base-finetuned-chinanews-chinese")
+model_txt = AutoModelForSequenceClassification.from_pretrained("uer/roberta-base-finetuned-chinanews-chinese")
+text_emotion = pipeline("sentiment-analysis", model=model_txt, tokenizer=tokenizer)
+print("Text sentiment model loaded.")
+# --- 3. 聲音特徵擷取函式 ---
+def extract_feature(signal: np.ndarray, sr: int) -> np.ndarray:
+    """
+    從一段音訊 signal (numpy array) 和取樣率 sr 計算 MFCC 特徵 (13 維)，
+    並回傳平均與變異組成的特徵向量 (共 26 維)。
+    """
+    # librosa 載入後 signal 為 float numpy array
+    mfcc = librosa.feature.mfcc(y=signal, sr=sr, n_mfcc=13)
+    # axis=1: 每個 MFCC 維度對時間做平均與變異數
+    return np.concatenate([np.mean(mfcc, axis=1), np.var(mfcc, axis=1)])
+# --- 4. 三種預測函式 ---
+def predict_face(img: np.ndarray):
+    """
+    臉部情緒分析：使用 DeepFace 分析單張影像 (numpy array, HxWx3)。
+    強制使用 OpenCV 後端以避免 retinaface/tf 版本衝突。
+    回傳格式為 dict，例如 {"happy": 0.80, "sad": 0.05, ...}
+    """
+    # DeepFace.analyze 可能較耗時，建議在 Space 上需有適當硬體
+    result = DeepFace.analyze(img, actions=["emotion"], detector_backend="opencv")
+    # result["emotion"] 是字典
+    return result["emotion"]
+def predict_voice(audio):
+    """
+    語音情緒分析：audio 由 Gradio 傳入，形式為暫存檔路徑字串 (str)。
+    用 librosa.load 讀取，再提取 MFCC 特徵，最後用 SVM 模型 predict_proba。
+    回傳格式為 dict，例如 {"angry":0.1, "happy":0.7, ...}
+    """
+    # audio 參數為 Gradio Audio 組件給的檔案路徑
+    signal, sr = librosa.load(audio, sr=None)
+    feat = extract_feature(signal, sr)
+    probs = svm_model.predict_proba([feat])[0]
+    labels = svm_model.classes_
+    return {labels[i]: float(probs[i]) for i in range(len(labels))}
+def predict_text(text: str):
+    """
+    文字情緒分析：使用 transformers pipeline，
+    輸入中文字串，回傳 dict，例如 {"POSITIVE":0.95} 或模型輸出標籤與信心分數。
+    """
+    if not text or text.strip() == "":
+        return {}
+    pred = text_emotion(text)[0]
+    # pred 形如 {"label": "...", "score": ...}
+    return {pred["label"]: float(pred["score"])}
+# --- 5. 建立 Gradio 介面 ---
+def build_interface():
+    """
+    建立一個 TabbedInterface，包含三個子 Interface：
+    - 臉部情緒 (Webcam 拍照或上傳)
+    - 語音情緒 (錄音或上傳音檔)
+    - 文字情緒 (文字輸入)
+    """
+    # 臉部情緒：使用 gr.Interface 或 Blocks?
+    face_interface = gr.Interface(
+        fn=predict_face,
+        inputs=gr.Image(sources="webcam", streaming=True, type="numpy"),
+        outputs=gr.Label(num_top_classes=1),
+        title="臉部情緒 (即時 Webcam)",
+        description="允許攝影機拍照後自動分析當前表情的情緒分佈。"
+    )
+    # 語音情緒：錄音或上傳
+    voice_interface = gr.Interface(
+        fn=predict_voice,
+        inputs=gr.Audio(sources="microphone", type="filepath"),
+        outputs=gr.Label(num_top_classes=1),
+        title="語音情緒",
+        description="錄製語音或上傳音訊檔，模型會回傳「驚訝/生氣/開心/悲傷/害怕」五種情緒機率。"
+    )
+    # 文字情緒：輸入中文
+    text_interface = gr.Interface(
+        fn=predict_text,
+        inputs=gr.Textbox(lines=3, placeholder="請輸入中文文字…"),
+        outputs=gr.Label(num_top_classes=1),
+        title="文字情緒",
+        description="輸入中文文字，即時判斷文字情緒並回傳標籤與信心分數。"
+    )
+    # 三合一 Tabs
+    app = gr.TabbedInterface(
+        interface_list=[face_interface, voice_interface, text_interface],
+        tab_names=["臉部情緒", "語音情緒", "文字情緒"]
+    )
+    return app
+if __name__ == "__main__":
+    # 可修改 port，如有多個服務可選不同 port
+    demo = build_interface()
+    # share=True 會產生臨時公開連結；若部署到 Spaces，可去掉 share 或留 False
+    demo.launch(server_name="0.0.0.0", server_port=7861, share=True)