Spaces:

GCLing
/

emotion

Runtime error

App Files Files Community

GCLing commited on Jun 16, 2025

Commit

7b9bebe

verified ·

1 Parent(s): b8ff14e

Update app.py

Browse files

Files changed (1) hide show

app.py +84 -93

app.py CHANGED Viewed

@@ -1,28 +1,22 @@
 import gradio as gr
 print("Gradio version:", gr.__version__)
-import os
-import joblib
 import numpy as np
 import librosa
-import gradio as gr
-import time
-import re
-from transformers import pipeline
 from huggingface_hub import hf_hub_download
 from deepface import DeepFace
-from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
-# --- 1. 下載並載入 SVM 模型 ---
-# 這裡 repo_id 填你的模型倉庫路徑，例如 "GCLing/emotion-svm-model"
-# filename 填上傳到該倉庫的檔案名，例如 "svm_emotion_model.joblib"
 print("Downloading SVM model from Hugging Face Hub...")
 model_path = hf_hub_download(repo_id="GCLing/emotion-svm-model", filename="svm_emotion_model.joblib")
 print(f"SVM model downloaded to: {model_path}")
 svm_model = joblib.load(model_path)
 print("SVM model loaded.")
-# --- 2. 載入文字情緒分析模型 ---
 zero_shot = pipeline("zero-shot-classification", model="joeddav/xlm-roberta-large-xnli")
 candidate_labels = ["joy", "sadness", "anger", "fear", "surprise", "disgust"]
 label_map_en2cn = {
@@ -36,82 +30,38 @@ emo_keywords = {
     "surprise": ["驚訝","意外","嚇","驚詫","詫異","訝異","好奇"],
     "fear": ["怕","恐懼","緊張","懼","膽怯","畏"]
 }
-# 简单否定词列表
 negations = ["不","沒","沒有","別","勿","非"]
-# --- 3. 聲音特徵擷取函式 ---
-def extract_feature(signal: np.ndarray, sr: int) -> np.ndarray:
-    """
-    從一段音訊 signal (numpy array) 和取樣率 sr 計算 MFCC 特徵 (13 維)，
-    並回傳平均與變異組成的特徵向量 (共 26 維)。
-    """
-    # librosa 載入後 signal 為 float numpy array
-    mfcc = librosa.feature.mfcc(y=signal, sr=sr, n_mfcc=13)
-    # axis=1: 每個 MFCC 維度對時間做平均與變異數
-    return np.concatenate([np.mean(mfcc, axis=1), np.var(mfcc, axis=1)])
-# --- 4. 三種預測函式 ---
-def predict_face(img):
-    print("predict_face called, img is None?", img is None)
-    if img is None:
-        return {}
-    try:
-        res = DeepFace.analyze(img, actions=["emotion"], detector_backend="opencv")
-        # 省略 list/dict 处理...
-        # 直接取第一张人脸
-        if isinstance(res, list):
-            first = res[0] if res else {}
-            emo = first.get("emotion", {}) if isinstance(first, dict) else {}
-        else:
-            emo = res.get("emotion", {}) if isinstance(res, dict) else {}
-        print("predict_face result:", emo)
-        return emo
-    except Exception as e:
-        print("DeepFace.analyze error:", e)
-        return {}
-def predict_voice(audio_path: str):
-    # 如果没有录音文件路径，直接返回空字典或提示
-    if not audio_path:
-        # 可打印日志，帮助调试
-        print("predict_voice: 收到 None 或空 audio_path，跳過分析")
-        return {}
-    try:
-        signal, sr = librosa.load(audio_path, sr=None)
-        # 提取特征
-        feat = extract_feature(signal, sr)  # 你的特征提取函数
-        probs = svm_model.predict_proba([feat])[0]
-        labels = svm_model.classes_
-        return {labels[i]: float(probs[i]) for i in range(len(labels))}
-    except Exception as e:
-        print("predict_voice error:", e)
-        return {}
 def predict_text_mixed(text: str):
-    """
-    先用 keyword_emotion 规则；若未命中再用 zero-shot 分类，
-    返回 {中文标签: float_score} 的 dict，供 gr.Label 显示。
-    """
     if not text or text.strip() == "":
         return {}
-    # 规则优先
     res = keyword_emotion(text)
     if res:
-        # 只返回最高那一项及其比例，也可返回完整分布
         top_emo = max(res, key=res.get)
-        # 可将英文 key 转成中文，若需要
-        # mapping: happy->高兴, angry->愤怒, etc.
-        mapping = {
-            "happy": "高兴",
-            "angry": "愤怒",
-            "sad": "悲伤",
-            "surprise": "惊讶",
-            "fear": "恐惧"
-        }
         cn = mapping.get(top_emo, top_emo)
         return {cn: res[top_emo]}
-    # 规则未命中，zero-shot fallback
     try:
         out = zero_shot(text, candidate_labels=candidate_labels,
                         hypothesis_template="这句话表达了{}情绪")
@@ -124,29 +74,70 @@ def predict_text_mixed(text: str):
         print("zero-shot error:", e)
         return {"中性": 1.0}
-# --- 5. 建立 Gradio 介面 ---
-with gr.Blocks() as demo:
-    with gr.TabItem("臉部情緒"):
-        with gr.Row():
-            webcam = gr.Image(source="webcam", streaming=True, type="numpy", label="攝像頭畫面")
-            face_out = gr.Label(label="情緒分布")
-        webcam.stream(fn=predict_face, inputs=webcam, outputs=face_out)
-        # 其餘 Tab 可按原先寫法，或用 Blocks 方式
-        with gr.TabItem("語音情緒"):
-            audio = gr.Audio(sources="microphone", streaming=False, type="filepath", label="錄音")
-            audio_output = gr.Label(label="語音情緒結果")
-            # 用 change/submit 触发：录音结束后调用 predict_voice
-            audio.change(fn=predict_voice, inputs=audio, outputs=audio_output)
         with gr.TabItem("文字情緒"):
             gr.Markdown("### 文字情緒 分析 (规则+zero-shot)")
             with gr.Row():
                 text = gr.Textbox(lines=3, placeholder="請輸入中文文字…")
                 text_out = gr.Label(label="文字情緒結果")
             text.submit(fn=predict_text_mixed, inputs=text, outputs=text_out)
 if __name__ == "__main__":
     demo.launch()
-  # 不要传 server_name 或 server_port

 import gradio as gr
 print("Gradio version:", gr.__version__)
+import os, time, re
 import numpy as np
+import joblib
 import librosa
 from huggingface_hub import hf_hub_download
 from deepface import DeepFace
+from transformers import pipeline
+# 如果不手动用 AutoTokenizer/AutoModel，就不必 import AutoTokenizer, AutoModelForSequenceClassification
+# --- 1. 加载 SVM 语音模型 ---
 print("Downloading SVM model from Hugging Face Hub...")
 model_path = hf_hub_download(repo_id="GCLing/emotion-svm-model", filename="svm_emotion_model.joblib")
 print(f"SVM model downloaded to: {model_path}")
 svm_model = joblib.load(model_path)
 print("SVM model loaded.")
+# --- 2. 文本情绪分析：规则+zero-shot ---
 zero_shot = pipeline("zero-shot-classification", model="joeddav/xlm-roberta-large-xnli")
 candidate_labels = ["joy", "sadness", "anger", "fear", "surprise", "disgust"]
 label_map_en2cn = {
     "surprise": ["驚訝","意外","嚇","驚詫","詫異","訝異","好奇"],
     "fear": ["怕","恐懼","緊張","懼","膽怯","畏"]
 }
 negations = ["不","沒","沒有","別","勿","非"]
+def keyword_emotion(text: str):
+    counts = {emo: 0 for emo in emo_keywords}
+    for emo, kws in emo_keywords.items():
+        for w in kws:
+            idx = text.find(w)
+            if idx != -1:
+                # 简单否定检测
+                neg = False
+                for neg_word in negations:
+                    plen = len(neg_word)
+                    if idx - plen >= 0 and text[idx-plen:idx] == neg_word:
+                        neg = True
+                        break
+                if not neg:
+                    counts[emo] += 1
+    total = sum(counts.values())
+    if total > 0:
+        return {emo: counts[emo]/total for emo in counts}
+    else:
+        return None
 def predict_text_mixed(text: str):
     if not text or text.strip() == "":
         return {}
     res = keyword_emotion(text)
     if res:
         top_emo = max(res, key=res.get)
+        mapping = {"happy":"高兴","angry":"愤怒","sad":"悲���","surprise":"惊讶","fear":"恐惧"}
         cn = mapping.get(top_emo, top_emo)
         return {cn: res[top_emo]}
     try:
         out = zero_shot(text, candidate_labels=candidate_labels,
                         hypothesis_template="这句话表达了{}情绪")
         print("zero-shot error:", e)
         return {"中性": 1.0}
+# --- 3. 语音情绪预测函数 ---
+def extract_feature(signal: np.ndarray, sr: int) -> np.ndarray:
+    mfcc = librosa.feature.mfcc(y=signal, sr=sr, n_mfcc=13)
+    return np.concatenate([np.mean(mfcc, axis=1), np.var(mfcc, axis=1)])
+def predict_voice(audio_path: str):
+    if not audio_path:
+        print("predict_voice: 无 audio_path，跳过")
+        return {}
+    try:
+        signal, sr = librosa.load(audio_path, sr=None)
+        feat = extract_feature(signal, sr)
+        probs = svm_model.predict_proba([feat])[0]
+        labels = svm_model.classes_
+        return {labels[i]: float(probs[i]) for i in range(len(labels))}
+    except Exception as e:
+        print("predict_voice error:", e)
+        return {}
+# --- 4. 人脸情绪预测函数 ---
+def predict_face(img: np.ndarray):
+    print("predict_face called, img is None?", img is None)
+    if img is None:
+        return {}
+    try:
+        res = DeepFace.analyze(img, actions=["emotion"], detector_backend="opencv")
+        if isinstance(res, list):
+            first = res[0] if res else {}
+            emo = first.get("emotion", {}) if isinstance(first, dict) else {}
+        else:
+            emo = res.get("emotion", {}) if isinstance(res, dict) else {}
+        # 转 float，确保 JSON 可序列化
+        emo_fixed = {k: float(v) for k, v in emo.items()}
+        print("predict_face result:", emo_fixed)
+        return emo_fixed
+    except Exception as e:
+        print("DeepFace.analyze error:", e)
+        return {}
+# --- 5. Gradio 界面 ---
+with gr.Blocks() as demo:
+    gr.Markdown("## 多模態情緒分析示例")
+    with gr.Tabs():
+        # 臉部情緒 Tab
+        with gr.TabItem("臉部情緒"):
+            gr.Markdown("### 臉部情緒 (即時 Webcam Streaming 分析)")
+            with gr.Row():
+                webcam = gr.Image(source="webcam", streaming=True, type="numpy", label="攝像頭畫面")
+                face_out = gr.Label(label="情緒分布")
+            webcam.stream(fn=predict_face, inputs=webcam, outputs=face_out)
+        # 語音情緒 Tab
+        with gr.TabItem("語音情緒"):
+            gr.Markdown("### 語音情緒 分析")
+            with gr.Row():
+                audio = gr.Audio(source="microphone", streaming=False, type="filepath", label="錄音")
+                voice_out = gr.Label(label="語音情緒結果")
+            audio.change(fn=predict_voice, inputs=audio, outputs=voice_out)
+        # 文字情緒 Tab
         with gr.TabItem("文字情緒"):
             gr.Markdown("### 文字情緒 分析 (规则+zero-shot)")
             with gr.Row():
                 text = gr.Textbox(lines=3, placeholder="請輸入中文文字…")
                 text_out = gr.Label(label="文字情緒結果")
             text.submit(fn=predict_text_mixed, inputs=text, outputs=text_out)
 if __name__ == "__main__":
     demo.launch()