| """ |
| Emotion Detection using fine-tuned DistilRoBERTa model. |
| |
| This module provides emotion prediction from text input using a |
| multi-label classification model trained on the Cirimus/Super-Emotion dataset. |
| """ |
|
|
| from typing import Any, Dict |
|
|
| import torch |
| from transformers import AutoModelForSequenceClassification, AutoTokenizer |
|
|
|
|
| class EmotionDetector: |
| """Emotion detection using a fine-tuned DistilRoBERTa model.""" |
|
|
| LABELS = ["anger", "fear", "joy", "love", "neutral", "sadness", "surprise"] |
| THRESHOLD = 0.56 |
|
|
| def __init__(self, use_mock: bool = False) -> None: |
| """ |
| Initialize the emotion detector. Requires HF_REPO_ID environment variable. |
| |
| Args: |
| use_mock: If True, use a mock implementation (no model loading). |
| """ |
| self.use_mock = use_mock |
| if use_mock: |
| print(" ⚠️ EmotionDetector initialized in MOCK mode.") |
| return |
|
|
| import os |
|
|
| repo_id = os.getenv("HF_REPO_ID") |
|
|
| if not repo_id: |
| raise ValueError("HF_REPO_ID environment variable must be set") |
|
|
| print( |
| f" Loading tokenizer from HF Hub: {repo_id} (subfolder=emotion_model)..." |
| ) |
| self.tokenizer = AutoTokenizer.from_pretrained( |
| repo_id, subfolder="emotion_model" |
| ) |
|
|
| print(f" Loading model from HF Hub: {repo_id} (subfolder=emotion_model)...") |
| self.model = AutoModelForSequenceClassification.from_pretrained( |
| repo_id, subfolder="emotion_model" |
| ) |
|
|
| self.model.eval() |
|
|
| |
| self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
| self.model.to(self.device) |
| print(f" Model loaded on device: {self.device}") |
|
|
| def predict(self, text: str) -> Dict[str, Any]: |
| """ |
| Predict emotion from text. |
| |
| Args: |
| text: Input text to analyze. |
| |
| Returns: |
| Dictionary containing: |
| - emotion: The dominant emotion label (str) |
| - confidence: Score of the dominant emotion (float) |
| - probabilities: Dictionary of all emotion scores (dict[str, float]) |
| """ |
| if self.use_mock: |
| return { |
| "emotion": "neutral", |
| "confidence": 0.5, |
| "probabilities": {label: 0.1 for label in self.LABELS}, |
| } |
|
|
| |
| inputs = self.tokenizer( |
| text, |
| return_tensors="pt", |
| truncation=True, |
| max_length=512, |
| padding=True, |
| ) |
|
|
| |
| inputs = {k: v.to(self.device) for k, v in inputs.items()} |
|
|
| |
| with torch.no_grad(): |
| outputs = self.model(**inputs) |
| logits = outputs.logits |
|
|
| |
| |
| probabilities = torch.sigmoid(logits).cpu().numpy()[0] |
|
|
| |
| prob_dict = { |
| label: float(score) for label, score in zip(self.LABELS, probabilities) |
| } |
|
|
| |
| |
| |
| |
| |
|
|
| |
| |
| |
| |
|
|
| valid_emotions = {k: v for k, v in prob_dict.items() if v >= self.THRESHOLD} |
|
|
| if valid_emotions: |
| dominant_emotion, confidence = max( |
| valid_emotions.items(), key=lambda x: x[1] |
| ) |
| else: |
| dominant_emotion, confidence = max(prob_dict.items(), key=lambda x: x[1]) |
|
|
| return { |
| "emotion": dominant_emotion, |
| "confidence": confidence, |
| "probabilities": prob_dict, |
| } |
|
|
| def close(self) -> None: |
| """Clean up resources.""" |
| if self.use_mock: |
| return |
|
|
| |
| del self.model |
| del self.tokenizer |
| if torch.cuda.is_available(): |
| torch.cuda.empty_cache() |
|
|