from transformers import pipeline import torch class EndpointHandler: """HF Inference Endpoints entry point.""" def __init__(self, model_path: str = "."): self.pipe = pipeline( task="audio-text-to-text", model=model_path, device=0 if torch.cuda.is_available() else -1, torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32, trust_remote_code=True, ) def __call__(self, data): """ Expected JSON: { "audio": , "turns": [{"role": "system", "content": "..."}, ...] } Returns: {"generated_text": "..."} """ try: result = self.pipe(data, max_new_tokens=256) return {"generated_text": result} except Exception as e: return { "error": str(e), "status_code": 500 }