# handler.py class EndpointHandler: def __init__(self, path=""): # lazy import only inside init from omnilingual_asr.models.inference.pipeline import ASRInferencePipeline # Load the omniASR pipeline self.pipeline = ASRInferencePipeline(model_card="facebook/omniASR-LLM-7B") def __call__(self, data): # import here so it doesn’t trigger during module load import io import soundfile as sf # read raw bytes audio_bytes = data.get("inputs") if not audio_bytes: return {"error": "no audio provided"} f = io.BytesIO(audio_bytes) audio, sr = sf.read(f) # run transcription result = self.pipeline.transcribe([audio], batch_size=1) return {"text": result}