Spaces:

XiaoBai1221
/

Bloom_Ware

Sleeping

App Files Files Community

Bloom_Ware / models /emotion_recognition /emotion.py

XiaoBai1221

Done

6c78660 4 months ago

raw

history blame contribute delete

5.3 kB

	import os
	import wave
	import numpy as np
	import pyaudio
	import torch
	import torch.nn as nn
	import torch.nn.functional as F
	import librosa
	from transformers import AutoConfig, Wav2Vec2FeatureExtractor, HubertPreTrainedModel, HubertModel

	# 模型設定
	model_name_or_path = "xmj2002/hubert-base-ch-speech-emotion-recognition"
	sample_rate = 16000
	duration = 6 # 錄音時長(秒)

	# 全域變數，用於延遲載入
	_model = None
	_processor = None
	_config = None

	# 情緒標籤
	def id2class(id):
	emotions = {
	0: "生氣(angry)",
	1: "恐懼(fear)",
	2: "開心(happy)",
	3: "中性(neutral)",
	4: "悲傷(sad)",
	5: "驚訝(surprise)"
	}
	return emotions.get(id, "未知情緒")

	# 定義模型架構
	class HubertClassificationHead(nn.Module):
	def __init__(self, config):
	super().__init__()
	self.dense = nn.Linear(config.hidden_size, config.hidden_size)
	self.dropout = nn.Dropout(config.classifier_dropout)
	self.out_proj = nn.Linear(config.hidden_size, config.num_class)

	def forward(self, x):
	x = self.dense(x)
	x = torch.tanh(x)
	x = self.dropout(x)
	x = self.out_proj(x)
	return x

	class HubertForSpeechClassification(HubertPreTrainedModel):
	def __init__(self, config):
	super().__init__(config)
	self.hubert = HubertModel(config)
	self.classifier = HubertClassificationHead(config)
	self.init_weights()

	def forward(self, x):
	outputs = self.hubert(x)
	hidden_states = outputs[0]
	x = torch.mean(hidden_states, dim=1)
	x = self.classifier(x)
	return x

	def _load_model():
	"""延遲載入模型，避免在模組匯入時就下載"""
	import logging
	logger = logging.getLogger("emotion_recognition")
	global _model, _processor, _config
	if _model is None:
	logger.debug("正在延遲載入情緒辨識模型...")
	try:
	_config = AutoConfig.from_pretrained(model_name_or_path)
	_processor = Wav2Vec2FeatureExtractor.from_pretrained(model_name_or_path)
	_model = HubertForSpeechClassification.from_pretrained(model_name_or_path, config=_config)
	_model.eval()
	logger.debug("情緒辨識模型載入完成！")
	except Exception as e:
	logger.error(f"情緒辨識模型載入失敗: {e}")
	_model = None
	_processor = None
	_config = None

	def record_audio():
	"""從麥克風錄製音頻"""
	p = pyaudio.PyAudio()

	print(f"請開始說話，錄音時間為 {duration} 秒...")

	stream = p.open(
	format=pyaudio.paInt16,
	channels=1,
	rate=sample_rate,
	input=True,
	frames_per_buffer=1024
	)

	frames = []
	for _ in range(0, int(sample_rate / 1024 * duration)):
	data = stream.read(1024)
	frames.append(data)

	print("錄音結束！")

	stream.stop_stream()
	stream.close()
	p.terminate()

	# 保存為臨時文件
	temp_file = "temp_recording.wav"
	wf = wave.open(temp_file, 'wb')
	wf.setnchannels(1)
	wf.setsampwidth(p.get_sample_size(pyaudio.paInt16))
	wf.setframerate(sample_rate)
	wf.writeframes(b''.join(frames))
	wf.close()

	return temp_file

	def predict(audio_path):
	"""使用模型預測情緒"""
	global _model, _processor
	if _model is None or _processor is None:
	_load_model()
	if _model is None or _processor is None:
	return 3, 0.0, {"中性(neutral)": "1.0000"} # 返回預設值

	speech, sr = librosa.load(path=audio_path, sr=sample_rate)
	speech = _processor(speech, padding="max_length", truncation=True,
	max_length=duration * sr, return_tensors="pt",
	sampling_rate=sr).input_values

	with torch.no_grad():
	logit = _model(speech)

	scores = F.softmax(logit, dim=1).detach().cpu().numpy()[0]
	pred_id = torch.argmax(logit).cpu().item()

	# 列出所有情緒的置信度
	all_emotions = {}
	for i in range(6):
	all_emotions[id2class(i)] = f"{scores[i]:.4f}"

	return pred_id, scores[pred_id], all_emotions

	def main():
	print("歡迎使用中文語音情緒辨識系統！")
	print("這個程式會錄製你的語音，然後辨識你的情緒。")

	while True:
	input("按下 Enter 開始錄音...")
	audio_path = record_audio()

	print("正在分析情緒...")
	pred_id, confidence, all_emotions = predict(audio_path)

	print("\n==========================================")
	print(f"預測結果：{id2class(pred_id)}")
	print(f"置信度：{confidence:.4f}")
	print("\n所有情緒置信度：")
	for emotion, score in all_emotions.items():
	print(f"{emotion}: {score}")
	print("==========================================\n")

	choice = input("要再試一次嗎？(y/n): ")
	if choice.lower() != 'y':
	# 清理臨時文件
	if os.path.exists(audio_path):
	os.remove(audio_path)
	break

	print("謝謝使用！掰掰～")

	if __name__ == "__main__":
	main()