othsueh
/

CombineCorpus_ORG

Audio Classification

wav2vec2-emodualhead

emotion-recognition

Model card Files Files and versions

othsueh commited on Jun 3, 2025

Commit

cdc1bf3

·

verified ·

1 Parent(s): c22fa18

Create handler.py

Files changed (1) hide show

handler.py +55 -0

handler.py ADDED Viewed

	@@ -0,0 +1,55 @@

+import os
+import io
+import torch
+import torchaudio
+from typing import Any, Dict
+from transformers import AutoConfig, AutoProcessor
+from modeling_upstream_finetune import UpstreamFinetune
+class EndpointHandler():
+    def __init__(self, model_dir: str, **kwargs: Any) -> None:
+        # Load config and model with trust_remote_code
+        device = 'cuda'
+        self.emotions = ['neutral','happy','sad','angry','surprise','contempt']
+        self.model = UpstreamFinetune.from_pretrained(
+            model_dir,
+            device=device,
+        )
+        self.model.eval()
+    def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
+        # Expect raw audio bytes or a base64 string in `data["inputs"]`
+        audio = data["inputs"]
+        sampling_rate = data.get("sampling_rate", 16000)
+        # Decode MP3/WAV bytes → waveform tensor
+        waveform, sr = torchaudio.load(io.BytesIO(audio))
+        if sr != sampling_rate:
+            waveform = torchaudio.functional.resample(waveform, sr, sampling_rate)
+        # Forward pass
+        with torch.no_grad():
+            cat_logits, reg_outputs = self.model(
+                waveform,
+                sampling_rate
+            )
+        # Convert logits to probabilities using softmax
+        emotion_probs = torch.nn.functional.softmax(cat_logits, dim=1)
+        # Create emotion predictions
+        emotion_predictions = []
+        for i, emotion in enumerate(self.emotions):
+            emotion_predictions.append({
+                "label": emotion,
+                "score": float(emotion_probs[0, i])  # Convert tensor to float
+            })
+        # Add arousal and valence predictions
+        result = emotion_predictions + [
+            {"label": "arousal", "score": float(reg_outputs[0, 0])},
+            {"label": "valence", "score": float(reg_outputs[0, 1])}
+        ]
+        return result