Spaces:
Sleeping
Sleeping
| import os | |
| import sys | |
| import time | |
| import logging | |
| import librosa | |
| import numpy as np | |
| import torch | |
| from torch.nn import functional as F | |
| from huggingface_hub import hf_hub_download | |
| logger = logging.getLogger(__name__) | |
| # HuggingFace repo | |
| AVAILABLE_MODELS = { | |
| "dual_emotion": "vyluong/emo_dual_classi" | |
| } | |
| emotion_labels = ['Angry', 'Anxiety', 'Happy', 'Sad', 'Neutral'] | |
| EMOTION_META = { | |
| "Angry": {"emoji": "๐ก", "color": "#ff4d4f"}, | |
| "Anxiety": {"emoji": "๐ฐ", "color": "#faad14"}, | |
| "Happy": {"emoji": "๐", "color": "#52c41a"}, | |
| "Sad": {"emoji": "๐ข", "color": "#1890ff"}, | |
| "Neutral": {"emoji": "๐", "color": "#d9d9d9"}, | |
| } | |
| class EmotionService: | |
| _models = {} | |
| emotion_labels = emotion_labels | |
| meta = EMOTION_META | |
| def load_dual_model(cls, repo_id, device): | |
| logger.info(f"Downloading model from HF: {repo_id}") | |
| model_file = hf_hub_download( | |
| repo_id=repo_id, | |
| filename="pytorch_model.bin" | |
| ) | |
| model_code = hf_hub_download( | |
| repo_id=repo_id, | |
| filename="model.py" | |
| ) | |
| # add model folder to python path | |
| import importlib.util | |
| spec = importlib.util.spec_from_file_location("hf_model", model_code) | |
| module = importlib.util.module_from_spec(spec) | |
| spec.loader.exec_module(module) | |
| Dual = module.Dual | |
| model = Dual() | |
| state_dict = torch.load(model_file, map_location=device) | |
| model.load_state_dict(state_dict) | |
| model.to(device) | |
| model.eval() | |
| logger.info("Emotion model loaded successfully") | |
| return model | |
| def get_model(cls, model_name="dual_emotion"): | |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| if model_name in cls._models: | |
| return cls._models[model_name] | |
| repo_id = AVAILABLE_MODELS[model_name] | |
| model = cls.load_dual_model(repo_id, device) | |
| cls._models[model_name] = model | |
| return model | |
| def preload_model(cls): | |
| logger.info("Preloading emotion model...") | |
| cls.get_model() | |
| logger.info("Emotion model ready") | |
| # extract mfcc from segments | |
| def extract_mfcc_segment( | |
| audio: np.ndarray, | |
| sr: int, | |
| start: float, | |
| end: float, | |
| duration: float = 5.0, | |
| n_mfcc: int = 128, | |
| n_fft: int = 2048, | |
| hop_length: int = 512 | |
| ): | |
| start_sample = int(start * sr) | |
| end_sample = int(end * sr) | |
| segment = audio[start_sample:end_sample] | |
| if len(segment) == 0: | |
| return None | |
| target_len = int(sr * duration) | |
| if len(segment) < target_len: | |
| segment = np.pad(segment,(0,target_len-len(segment)),mode="symmetric") | |
| else: | |
| segment = segment[:target_len] | |
| mfcc = librosa.feature.mfcc( | |
| y=segment, | |
| sr=sr, | |
| n_mfcc=n_mfcc, | |
| n_fft=n_fft, | |
| hop_length=hop_length | |
| ) | |
| return mfcc | |
| def predict_from_mfcc(cls, mfcc): | |
| model = cls.get_model() | |
| tensor = torch.from_numpy(mfcc).unsqueeze(0).unsqueeze(0).float() | |
| device = next(model.parameters()).device | |
| tensor = tensor.to(device) | |
| with torch.no_grad(): | |
| output = model(tensor) | |
| probs = F.softmax(output.squeeze(), dim=0).cpu().numpy() | |
| label = cls.emotion_labels[np.argmax(probs)] | |
| return label | |
| # predict from segments | |
| def predict_segment(cls, audio, sr, start, end): | |
| mfcc = cls.extract_mfcc_segment(audio, sr, start, end) | |
| if mfcc is None: | |
| return "Neutral" | |
| return cls.predict_from_mfcc(mfcc) |