malek-messaoudii
fix errors
a8c8142
raw
history blame
2.23 kB
import torch
from transformers import pipeline
import logging
import tempfile
import os
logger = logging.getLogger(__name__)
# Global STT pipeline
stt_pipeline = None
def load_stt_model():
"""Load the free Whisper model for speech-to-text"""
global stt_pipeline
try:
logger.info("Loading Whisper-medium STT model...")
stt_pipeline = pipeline(
"automatic-speech-recognition",
model="openai/whisper-medium",
device="cpu"
)
logger.info("βœ“ Whisper-medium STT model loaded successfully")
except Exception as e:
logger.error(f"βœ— Failed to load Whisper-medium model: {str(e)}")
stt_pipeline = None
async def speech_to_text(audio_bytes: bytes, filename: str) -> str:
"""
Convert audio bytes to text using free Whisper model.
Args:
audio_bytes: Raw audio file bytes
filename: Name of the audio file
Returns:
Transcribed text
"""
global stt_pipeline
try:
if stt_pipeline is None:
load_stt_model()
if stt_pipeline is None:
raise Exception("STT model failed to load")
logger.info(f"Converting audio to text using Whisper-medium")
# Save audio bytes to temporary file
with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as temp_audio:
temp_audio.write(audio_bytes)
temp_audio_path = temp_audio.name
try:
# Transcribe using Whisper
result = stt_pipeline(temp_audio_path)
transcribed_text = result.get("text", "").strip()
if not transcribed_text:
transcribed_text = "Sorry, I couldn't understand the audio."
logger.info(f"βœ“ STT successful: '{transcribed_text}'")
return transcribed_text
finally:
# Clean up temporary file
if os.path.exists(temp_audio_path):
os.unlink(temp_audio_path)
except Exception as e:
logger.error(f"βœ— STT failed: {str(e)}")
raise Exception(f"Speech-to-text conversion failed: {str(e)}")