from transformers import pipeline
import torch

class EndpointHandler:
    """HF Inference Endpoints entry point."""

    def __init__(self, model_path: str = "."):
        self.pipe = pipeline(
            task="audio-text-to-text",
            model=model_path,
            device=0 if torch.cuda.is_available() else -1,
            torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
            trust_remote_code=True,
        )

    def __call__(self, data):
        """
        Expected JSON: {
            "audio": <base64-encoded 16-kHz WAV bytes>,
            "turns": [{"role": "system", "content": "..."}, ...]
        }
        Returns: {"generated_text": "..."}
        """
        try:
            result = self.pipe(data, max_new_tokens=256)
            return {"generated_text": result}
        except Exception as e:
            return {
                "error": str(e),
                "status_code": 500
            }