Spaces:

vyluong
/

PoC_ASR_v6_dev

Sleeping

App Files Files Community

PoC_ASR_v6_dev / app /services /emo.py

vyluong

PoC deployment

857b1b2 verified 2 months ago

raw

history blame contribute delete

3.84 kB

	import os
	import sys
	import time
	import logging
	import librosa
	import numpy as np
	import torch

	from torch.nn import functional as F
	from huggingface_hub import hf_hub_download

	logger = logging.getLogger(__name__)

	# HuggingFace repo
	AVAILABLE_MODELS = {
	"dual_emotion": "vyluong/emo_dual_classi"
	}

	emotion_labels = ['Angry', 'Anxiety', 'Happy', 'Sad', 'Neutral']

	EMOTION_META = {
	"Angry": {"emoji": "😡", "color": "#ff4d4f"},
	"Anxiety": {"emoji": "😰", "color": "#faad14"},
	"Happy": {"emoji": "😊", "color": "#52c41a"},
	"Sad": {"emoji": "😢", "color": "#1890ff"},
	"Neutral": {"emoji": "😐", "color": "#d9d9d9"},
	}



	class EmotionService:

	_models = {}

	emotion_labels = emotion_labels
	meta = EMOTION_META

	@classmethod
	def load_dual_model(cls, repo_id, device):

	logger.info(f"Downloading model from HF: {repo_id}")

	model_file = hf_hub_download(
	repo_id=repo_id,
	filename="pytorch_model.bin"
	)

	model_code = hf_hub_download(
	repo_id=repo_id,
	filename="model.py"
	)

	# add model folder to python path
	import importlib.util

	spec = importlib.util.spec_from_file_location("hf_model", model_code)
	module = importlib.util.module_from_spec(spec)
	spec.loader.exec_module(module)

	Dual = module.Dual

	model = Dual()

	state_dict = torch.load(model_file, map_location=device)

	model.load_state_dict(state_dict)

	model.to(device)
	model.eval()

	logger.info("Emotion model loaded successfully")

	return model



	@classmethod
	def get_model(cls, model_name="dual_emotion"):

	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

	if model_name in cls._models:
	return cls._models[model_name]

	repo_id = AVAILABLE_MODELS[model_name]

	model = cls.load_dual_model(repo_id, device)

	cls._models[model_name] = model

	return model


	@classmethod
	def preload_model(cls):

	logger.info("Preloading emotion model...")

	cls.get_model()

	logger.info("Emotion model ready")


	# extract mfcc from segments
	@staticmethod
	def extract_mfcc_segment(
	audio: np.ndarray,
	sr: int,
	start: float,
	end: float,
	duration: float = 5.0,
	n_mfcc: int = 128,
	n_fft: int = 2048,
	hop_length: int = 512
	):

	start_sample = int(start * sr)
	end_sample = int(end * sr)

	segment = audio[start_sample:end_sample]

	if len(segment) == 0:
	return None

	target_len = int(sr * duration)

	if len(segment) < target_len:
	segment = np.pad(segment,(0,target_len-len(segment)),mode="symmetric")
	else:
	segment = segment[:target_len]

	mfcc = librosa.feature.mfcc(
	y=segment,
	sr=sr,
	n_mfcc=n_mfcc,
	n_fft=n_fft,
	hop_length=hop_length
	)

	return mfcc


	@classmethod
	def predict_from_mfcc(cls, mfcc):

	model = cls.get_model()

	tensor = torch.from_numpy(mfcc).unsqueeze(0).unsqueeze(0).float()

	device = next(model.parameters()).device
	tensor = tensor.to(device)

	with torch.no_grad():

	output = model(tensor)

	probs = F.softmax(output.squeeze(), dim=0).cpu().numpy()

	label = cls.emotion_labels[np.argmax(probs)]

	return label


	# predict from segments
	@classmethod
	def predict_segment(cls, audio, sr, start, end):

	mfcc = cls.extract_mfcc_segment(audio, sr, start, end)

	if mfcc is None:
	return "Neutral"

	return cls.predict_from_mfcc(mfcc)