checklist-agent / app /services /transcription.py
alexorlov's picture
Upload app/services/transcription.py with huggingface_hub
966a3a9 verified
import subprocess
import tempfile
import os
import logging
from transformers import pipeline
logger = logging.getLogger(__name__)
class TranscriptionService:
def __init__(self):
self._pipeline = None
def load_model(self, model_name: str = "openai/whisper-small"):
logger.info(f"Loading Whisper model: {model_name}")
self._pipeline = pipeline(
"automatic-speech-recognition",
model=model_name,
device="cpu",
)
logger.info("Whisper model loaded successfully")
@property
def is_loaded(self) -> bool:
return self._pipeline is not None
async def transcribe(self, audio_bytes: bytes) -> str:
if not self.is_loaded:
raise RuntimeError("Whisper model not loaded")
tmp_webm = None
tmp_wav = None
try:
with tempfile.NamedTemporaryFile(suffix=".webm", delete=False) as f:
f.write(audio_bytes)
tmp_webm = f.name
tmp_wav = tmp_webm.replace(".webm", ".wav")
process = subprocess.run(
["ffmpeg", "-i", tmp_webm, "-ar", "16000", "-ac", "1", "-f", "wav", "-y", tmp_wav],
capture_output=True,
)
if process.returncode != 0:
raise RuntimeError(f"FFmpeg conversion failed: {process.stderr.decode()}")
result = self._pipeline(tmp_wav)
return result["text"].strip()
finally:
for path in [tmp_webm, tmp_wav]:
if path and os.path.exists(path):
try:
os.unlink(path)
except OSError:
pass
# Singleton
transcription_service = TranscriptionService()