Spaces:
Sleeping
Sleeping
| import subprocess | |
| import tempfile | |
| import os | |
| import logging | |
| from transformers import pipeline | |
| logger = logging.getLogger(__name__) | |
| class TranscriptionService: | |
| def __init__(self): | |
| self._pipeline = None | |
| def load_model(self, model_name: str = "openai/whisper-small"): | |
| logger.info(f"Loading Whisper model: {model_name}") | |
| self._pipeline = pipeline( | |
| "automatic-speech-recognition", | |
| model=model_name, | |
| device="cpu", | |
| ) | |
| logger.info("Whisper model loaded successfully") | |
| def is_loaded(self) -> bool: | |
| return self._pipeline is not None | |
| async def transcribe(self, audio_bytes: bytes) -> str: | |
| if not self.is_loaded: | |
| raise RuntimeError("Whisper model not loaded") | |
| tmp_webm = None | |
| tmp_wav = None | |
| try: | |
| with tempfile.NamedTemporaryFile(suffix=".webm", delete=False) as f: | |
| f.write(audio_bytes) | |
| tmp_webm = f.name | |
| tmp_wav = tmp_webm.replace(".webm", ".wav") | |
| process = subprocess.run( | |
| ["ffmpeg", "-i", tmp_webm, "-ar", "16000", "-ac", "1", "-f", "wav", "-y", tmp_wav], | |
| capture_output=True, | |
| ) | |
| if process.returncode != 0: | |
| raise RuntimeError(f"FFmpeg conversion failed: {process.stderr.decode()}") | |
| result = self._pipeline(tmp_wav) | |
| return result["text"].strip() | |
| finally: | |
| for path in [tmp_webm, tmp_wav]: | |
| if path and os.path.exists(path): | |
| try: | |
| os.unlink(path) | |
| except OSError: | |
| pass | |
| # Singleton | |
| transcription_service = TranscriptionService() | |