othsueh
/

clean-jazz-186

Audio Classification

wav2vec2-emodualhead

emotion-recognition

Model card Files Files and versions

othsueh commited on May 13, 2025

Commit

f704679

·

verified ·

1 Parent(s): 6ce85cd

Update handler.py

Files changed (1) hide show

handler.py +3 -15

handler.py CHANGED Viewed

@@ -8,35 +8,23 @@ from modeling_upstream_finetune import UpstreamFinetune
 class EndpointHandler:
     def __init__(self, model_dir: str, **kwargs: Any) -> None:
         # Load config and model with trust_remote_code
-        self.config = AutoConfig.from_pretrained(
-            model_dir, trust_remote_code=True
-        )
         self.model = UpstreamFinetune.from_pretrained(
             model_dir,
             trust_remote_code=True,
             # pass any kwargs like device mapping
         )
         self.model.eval()
-        # Load processor (feature extractor + tokenizer)
-        self.processor = AutoProcessor.from_pretrained(
-            model_dir, trust_remote_code=True
-        )
     def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
         # Expect raw audio bytes or a base64 string in `data["inputs"]`
         audio = data["inputs"]
         sr = data.get("sampling_rate", 16000)
-        # Preprocess
-        inputs = self.processor(
-            audio,
-            sampling_rate=sr,
-            return_tensors="pt",
-            padding=True
-        )
         # Forward pass
         with torch.no_grad():
             cat_logits, reg_outputs = self.model(
-                inputs.input_values.squeeze(0),
                 sr
             )
         # Postprocess to Python types

 class EndpointHandler:
     def __init__(self, model_dir: str, **kwargs: Any) -> None:
         # Load config and model with trust_remote_code
+        device = 'cuda'
         self.model = UpstreamFinetune.from_pretrained(
             model_dir,
+            device=device,
             trust_remote_code=True,
             # pass any kwargs like device mapping
         )
         self.model.eval()
     def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
         # Expect raw audio bytes or a base64 string in `data["inputs"]`
         audio = data["inputs"]
         sr = data.get("sampling_rate", 16000)
         # Forward pass
         with torch.no_grad():
             cat_logits, reg_outputs = self.model(
+                audio,
                 sr
             )
         # Postprocess to Python types