Update app.py
Browse files
app.py
CHANGED
|
@@ -1,124 +1,124 @@
|
|
| 1 |
-
#!/usr/bin/env python3
|
| 2 |
-
# -*- coding: utf-8 -*-
|
| 3 |
-
|
| 4 |
-
import os
|
| 5 |
-
import joblib
|
| 6 |
-
import numpy as np
|
| 7 |
-
import librosa
|
| 8 |
-
import gradio as gr
|
| 9 |
-
from huggingface_hub import hf_hub_download
|
| 10 |
-
from deepface import DeepFace
|
| 11 |
-
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
|
| 12 |
-
|
| 13 |
-
# --- 1. 下載並載入 SVM 模型 ---
|
| 14 |
-
# 這裡 repo_id 填你的模型倉庫路徑,例如 "GCLing/emotion-svm-model"
|
| 15 |
-
# filename 填上傳到該倉庫的檔案名,例如 "svm_emotion_model.joblib"
|
| 16 |
-
print("Downloading SVM model from Hugging Face Hub...")
|
| 17 |
-
model_path = hf_hub_download(repo_id="GCLing/emotion-svm-model", filename="svm_emotion_model.joblib")
|
| 18 |
-
print(f"SVM model downloaded to: {model_path}")
|
| 19 |
-
svm_model = joblib.load(model_path)
|
| 20 |
-
print("SVM model loaded.")
|
| 21 |
-
|
| 22 |
-
# --- 2. 載入文字情緒分析模型 ---
|
| 23 |
-
# 以 uer/roberta-base-finetuned-chinanews-chinese 為例;可替換成其他合適的中文情感分類模型
|
| 24 |
-
print("Loading text sentiment model...")
|
| 25 |
-
tokenizer = AutoTokenizer.from_pretrained("uer/roberta-base-finetuned-chinanews-chinese")
|
| 26 |
-
model_txt = AutoModelForSequenceClassification.from_pretrained("uer/roberta-base-finetuned-chinanews-chinese")
|
| 27 |
-
text_emotion = pipeline("sentiment-analysis", model=model_txt, tokenizer=tokenizer)
|
| 28 |
-
print("Text sentiment model loaded.")
|
| 29 |
-
|
| 30 |
-
# --- 3. 聲音特徵擷取函式 ---
|
| 31 |
-
def extract_feature(signal: np.ndarray, sr: int) -> np.ndarray:
|
| 32 |
-
"""
|
| 33 |
-
從一段音訊 signal (numpy array) 和取樣率 sr 計算 MFCC 特徵 (13 維),
|
| 34 |
-
並回傳平均與變異組成的特徵向量 (共 26 維)。
|
| 35 |
-
"""
|
| 36 |
-
# librosa 載入後 signal 為 float numpy array
|
| 37 |
-
mfcc = librosa.feature.mfcc(y=signal, sr=sr, n_mfcc=13)
|
| 38 |
-
# axis=1: 每個 MFCC 維度對時間做平均與變異數
|
| 39 |
-
return np.concatenate([np.mean(mfcc, axis=1), np.var(mfcc, axis=1)])
|
| 40 |
-
|
| 41 |
-
# --- 4. 三種預測函式 ---
|
| 42 |
-
|
| 43 |
-
def predict_face(img: np.ndarray):
|
| 44 |
-
"""
|
| 45 |
-
臉部情緒分析:使用 DeepFace 分析單張影像 (numpy array, HxWx3)。
|
| 46 |
-
強制使用 OpenCV 後端以避免 retinaface/tf 版本衝突。
|
| 47 |
-
回傳格式為 dict,例如 {"happy": 0.80, "sad": 0.05, ...}
|
| 48 |
-
"""
|
| 49 |
-
# DeepFace.analyze 可能較耗時,建議在 Space 上需有適當硬體
|
| 50 |
-
result = DeepFace.analyze(img, actions=["emotion"], detector_backend="opencv")
|
| 51 |
-
# result["emotion"] 是字典
|
| 52 |
-
return result["emotion"]
|
| 53 |
-
|
| 54 |
-
def predict_voice(audio):
|
| 55 |
-
"""
|
| 56 |
-
語音情緒分析:audio 由 Gradio 傳入,形式為暫存檔路徑字串 (str)。
|
| 57 |
-
用 librosa.load 讀取,再提取 MFCC 特徵,最後用 SVM 模型 predict_proba。
|
| 58 |
-
回傳格式為 dict,例如 {"angry":0.1, "happy":0.7, ...}
|
| 59 |
-
"""
|
| 60 |
-
# audio 參數為 Gradio Audio 組件給的檔案路徑
|
| 61 |
-
signal, sr = librosa.load(audio, sr=None)
|
| 62 |
-
feat = extract_feature(signal, sr)
|
| 63 |
-
probs = svm_model.predict_proba([feat])[0]
|
| 64 |
-
labels = svm_model.classes_
|
| 65 |
-
return {labels[i]: float(probs[i]) for i in range(len(labels))}
|
| 66 |
-
|
| 67 |
-
def predict_text(text: str):
|
| 68 |
-
"""
|
| 69 |
-
文字情緒分析:使用 transformers pipeline,
|
| 70 |
-
輸入中文字串,回傳 dict,例如 {"POSITIVE":0.95} 或模型輸出標籤與信心分數。
|
| 71 |
-
"""
|
| 72 |
-
if not text or text.strip() == "":
|
| 73 |
-
return {}
|
| 74 |
-
pred = text_emotion(text)[0]
|
| 75 |
-
# pred 形如 {"label": "...", "score": ...}
|
| 76 |
-
return {pred["label"]: float(pred["score"])}
|
| 77 |
-
|
| 78 |
-
# --- 5. 建立 Gradio 介面 ---
|
| 79 |
-
def build_interface():
|
| 80 |
-
"""
|
| 81 |
-
建立一個 TabbedInterface,包含三個子 Interface:
|
| 82 |
-
- 臉部情緒 (Webcam 拍照或上傳)
|
| 83 |
-
- 語音情緒 (錄音或上傳音檔)
|
| 84 |
-
- 文字情緒 (文字輸入)
|
| 85 |
-
"""
|
| 86 |
-
# 臉部情緒:使用 gr.Interface 或 Blocks?
|
| 87 |
-
face_interface = gr.Interface(
|
| 88 |
-
fn=predict_face,
|
| 89 |
-
inputs=gr.Image(sources="webcam", streaming=True, type="numpy"),
|
| 90 |
-
outputs=gr.Label(num_top_classes=1),
|
| 91 |
-
title="臉部情緒 (即時 Webcam)",
|
| 92 |
-
description="允許攝影機拍照後自動分析當前表情的情緒分佈。"
|
| 93 |
-
)
|
| 94 |
-
|
| 95 |
-
# 語音情緒:錄音或上傳
|
| 96 |
-
voice_interface = gr.Interface(
|
| 97 |
-
fn=predict_voice,
|
| 98 |
-
inputs=gr.Audio(sources="microphone", type="filepath"),
|
| 99 |
-
outputs=gr.Label(num_top_classes=1),
|
| 100 |
-
title="語音情緒",
|
| 101 |
-
description="錄製語音或上傳音訊檔,模型會回傳「驚訝/生氣/開心/悲傷/害怕」五種情緒機率。"
|
| 102 |
-
)
|
| 103 |
-
|
| 104 |
-
# 文字情緒:輸入中文
|
| 105 |
-
text_interface = gr.Interface(
|
| 106 |
-
fn=predict_text,
|
| 107 |
-
inputs=gr.Textbox(lines=3, placeholder="請輸入中文文字…"),
|
| 108 |
-
outputs=gr.Label(num_top_classes=1),
|
| 109 |
-
title="文字情緒",
|
| 110 |
-
description="輸入中文文字,即時判斷文字情緒並回傳標籤與信心分數。"
|
| 111 |
-
)
|
| 112 |
-
|
| 113 |
-
# 三合一 Tabs
|
| 114 |
-
app = gr.TabbedInterface(
|
| 115 |
-
interface_list=[face_interface, voice_interface, text_interface],
|
| 116 |
-
tab_names=["臉部情緒", "語音情緒", "文字情緒"]
|
| 117 |
-
)
|
| 118 |
-
return app
|
| 119 |
-
|
| 120 |
-
if __name__ == "__main__":
|
| 121 |
-
# 可修改 port,如有多個服務可選不同 port
|
| 122 |
-
demo = build_interface()
|
| 123 |
-
# share=True 會產生臨時公開連結;若部署到 Spaces,可去掉 share 或留 False
|
| 124 |
-
demo.launch(server_name="0.0.0.0", server_port=7861, share=True)
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
# -*- coding: utf-8 -*-
|
| 3 |
+
|
| 4 |
+
import os
|
| 5 |
+
import joblib
|
| 6 |
+
import numpy as np
|
| 7 |
+
import librosa
|
| 8 |
+
import gradio as gr
|
| 9 |
+
from huggingface_hub import hf_hub_download
|
| 10 |
+
from deepface import DeepFace
|
| 11 |
+
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
|
| 12 |
+
|
| 13 |
+
# --- 1. 下載並載入 SVM 模型 ---
|
| 14 |
+
# 這裡 repo_id 填你的模型倉庫路徑,例如 "GCLing/emotion-svm-model"
|
| 15 |
+
# filename 填上傳到該倉庫的檔案名,例如 "svm_emotion_model.joblib"
|
| 16 |
+
print("Downloading SVM model from Hugging Face Hub...")
|
| 17 |
+
model_path = hf_hub_download(repo_id="GCLing/emotion-svm-model", filename="svm_emotion_model.joblib")
|
| 18 |
+
print(f"SVM model downloaded to: {model_path}")
|
| 19 |
+
svm_model = joblib.load(model_path)
|
| 20 |
+
print("SVM model loaded.")
|
| 21 |
+
|
| 22 |
+
# --- 2. 載入文字情緒分析模型 ---
|
| 23 |
+
# 以 uer/roberta-base-finetuned-chinanews-chinese 為例;可替換成其他合適的中文情感分類模型
|
| 24 |
+
print("Loading text sentiment model...")
|
| 25 |
+
tokenizer = AutoTokenizer.from_pretrained("uer/roberta-base-finetuned-chinanews-chinese")
|
| 26 |
+
model_txt = AutoModelForSequenceClassification.from_pretrained("uer/roberta-base-finetuned-chinanews-chinese")
|
| 27 |
+
text_emotion = pipeline("sentiment-analysis", model=model_txt, tokenizer=tokenizer)
|
| 28 |
+
print("Text sentiment model loaded.")
|
| 29 |
+
|
| 30 |
+
# --- 3. 聲音特徵擷取函式 ---
|
| 31 |
+
def extract_feature(signal: np.ndarray, sr: int) -> np.ndarray:
|
| 32 |
+
"""
|
| 33 |
+
從一段音訊 signal (numpy array) 和取樣率 sr 計算 MFCC 特徵 (13 維),
|
| 34 |
+
並回傳平均與變異組成的特徵向量 (共 26 維)。
|
| 35 |
+
"""
|
| 36 |
+
# librosa 載入後 signal 為 float numpy array
|
| 37 |
+
mfcc = librosa.feature.mfcc(y=signal, sr=sr, n_mfcc=13)
|
| 38 |
+
# axis=1: 每個 MFCC 維度對時間做平均與變異數
|
| 39 |
+
return np.concatenate([np.mean(mfcc, axis=1), np.var(mfcc, axis=1)])
|
| 40 |
+
|
| 41 |
+
# --- 4. 三種預測函式 ---
|
| 42 |
+
|
| 43 |
+
def predict_face(img: np.ndarray):
|
| 44 |
+
"""
|
| 45 |
+
臉部情緒分析:使用 DeepFace 分析單張影像 (numpy array, HxWx3)。
|
| 46 |
+
強制使用 OpenCV 後端以避免 retinaface/tf 版本衝突。
|
| 47 |
+
回傳格式為 dict,例如 {"happy": 0.80, "sad": 0.05, ...}
|
| 48 |
+
"""
|
| 49 |
+
# DeepFace.analyze 可能較耗時,建議在 Space 上需有適當硬體
|
| 50 |
+
result = DeepFace.analyze(img, actions=["emotion"], detector_backend="opencv")
|
| 51 |
+
# result["emotion"] 是字典
|
| 52 |
+
return result["emotion"]
|
| 53 |
+
|
| 54 |
+
def predict_voice(audio):
|
| 55 |
+
"""
|
| 56 |
+
語音情緒分析:audio 由 Gradio 傳入,形式為暫存檔路徑字串 (str)。
|
| 57 |
+
用 librosa.load 讀取,再提取 MFCC 特徵,最後用 SVM 模型 predict_proba。
|
| 58 |
+
回傳格式為 dict,例如 {"angry":0.1, "happy":0.7, ...}
|
| 59 |
+
"""
|
| 60 |
+
# audio 參數為 Gradio Audio 組件給的檔案路徑
|
| 61 |
+
signal, sr = librosa.load(audio, sr=None)
|
| 62 |
+
feat = extract_feature(signal, sr)
|
| 63 |
+
probs = svm_model.predict_proba([feat])[0]
|
| 64 |
+
labels = svm_model.classes_
|
| 65 |
+
return {labels[i]: float(probs[i]) for i in range(len(labels))}
|
| 66 |
+
|
| 67 |
+
def predict_text(text: str):
|
| 68 |
+
"""
|
| 69 |
+
文字情緒分析:使用 transformers pipeline,
|
| 70 |
+
輸入中文字串,回傳 dict,例如 {"POSITIVE":0.95} 或模型輸出標籤與信心分數。
|
| 71 |
+
"""
|
| 72 |
+
if not text or text.strip() == "":
|
| 73 |
+
return {}
|
| 74 |
+
pred = text_emotion(text)[0]
|
| 75 |
+
# pred 形如 {"label": "...", "score": ...}
|
| 76 |
+
return {pred["label"]: float(pred["score"])}
|
| 77 |
+
|
| 78 |
+
# --- 5. 建立 Gradio 介面 ---
|
| 79 |
+
def build_interface():
|
| 80 |
+
"""
|
| 81 |
+
建立一個 TabbedInterface,包含三個子 Interface:
|
| 82 |
+
- 臉部情緒 (Webcam 拍照或上傳)
|
| 83 |
+
- 語音情緒 (錄音或上傳音檔)
|
| 84 |
+
- 文字情緒 (文字輸入)
|
| 85 |
+
"""
|
| 86 |
+
# 臉部情緒:使用 gr.Interface 或 Blocks?
|
| 87 |
+
face_interface = gr.Interface(
|
| 88 |
+
fn=predict_face,
|
| 89 |
+
inputs=gr.Image(sources="webcam", streaming=True, type="numpy"),
|
| 90 |
+
outputs=gr.Label(num_top_classes=1),
|
| 91 |
+
title="臉部情緒 (即時 Webcam)",
|
| 92 |
+
description="允許攝影機拍照後自動分析當前表情的情緒分佈。"
|
| 93 |
+
)
|
| 94 |
+
|
| 95 |
+
# 語音情緒:錄音或上傳
|
| 96 |
+
voice_interface = gr.Interface(
|
| 97 |
+
fn=predict_voice,
|
| 98 |
+
inputs=gr.Audio(sources="microphone", type="filepath"),
|
| 99 |
+
outputs=gr.Label(num_top_classes=1),
|
| 100 |
+
title="語音情緒",
|
| 101 |
+
description="錄製語音或上傳音訊檔,模型會回傳「驚訝/生氣/開心/悲傷/害怕」五種情緒機率。"
|
| 102 |
+
)
|
| 103 |
+
|
| 104 |
+
# 文字情緒:輸入中文
|
| 105 |
+
text_interface = gr.Interface(
|
| 106 |
+
fn=predict_text,
|
| 107 |
+
inputs=gr.Textbox(lines=3, placeholder="請輸入中文文字…"),
|
| 108 |
+
outputs=gr.Label(num_top_classes=1),
|
| 109 |
+
title="文字情緒",
|
| 110 |
+
description="輸入中文文字,即時判斷文字情緒並回傳標籤與信心分數。"
|
| 111 |
+
)
|
| 112 |
+
|
| 113 |
+
# 三合一 Tabs
|
| 114 |
+
app = gr.TabbedInterface(
|
| 115 |
+
interface_list=[face_interface, voice_interface, text_interface],
|
| 116 |
+
tab_names=["臉部情緒", "語音情緒", "文字情緒"]
|
| 117 |
+
)
|
| 118 |
+
return app
|
| 119 |
+
|
| 120 |
+
if __name__ == "__main__":
|
| 121 |
+
# 可修改 port,如有多個服務可選不同 port
|
| 122 |
+
demo = build_interface()
|
| 123 |
+
# share=True 會產生臨時公開連結;若部署到 Spaces,可去掉 share 或留 False
|
| 124 |
+
demo.launch(server_name="0.0.0.0", server_port=7861, share=True)
|