| from transformers import pipeline | |
| import torch | |
| class EndpointHandler: | |
| """HF Inference Endpoints entry point.""" | |
| def __init__(self, model_path: str = "."): | |
| self.pipe = pipeline( | |
| task="audio-text-to-text", | |
| model=model_path, | |
| device=0 if torch.cuda.is_available() else -1, | |
| torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32, | |
| trust_remote_code=True, | |
| ) | |
| def __call__(self, data): | |
| """ | |
| Expected JSON: { | |
| "audio": <base64-encoded 16-kHz WAV bytes>, | |
| "turns": [{"role": "system", "content": "..."}, ...] | |
| } | |
| Returns: {"generated_text": "..."} | |
| """ | |
| try: | |
| result = self.pipe(data, max_new_tokens=256) | |
| return {"generated_text": result} | |
| except Exception as e: | |
| return { | |
| "error": str(e), | |
| "status_code": 500 | |
| } |