Spaces:

NLP-Debater-Project
/

FastAPI-Backend-Models

Sleeping

FastAPI-Backend-Models / services /stt_service.py

malek-messaoudii

Update files

674469e about 2 months ago

2.04 kB

	import logging
	import tempfile
	import os
	from transformers import pipeline
	import librosa
	import numpy as np

	logger = logging.getLogger(__name__)
	stt_pipeline = None

	def load_stt_model():
	global stt_pipeline
	try:
	logger.info("Loading Whisper-base STT model...")
	stt_pipeline = pipeline(
	"automatic-speech-recognition",
	model="openai/whisper-base",
	device="cpu", # Use "cuda" if GPU available
	chunk_length_s=30,
	)
	logger.info("✓ Whisper STT model loaded successfully")
	except Exception as e:
	logger.error(f"✗ Failed to load STT model: {str(e)}")
	stt_pipeline = None

	async def speech_to_text(audio_bytes: bytes, filename: str) -> str:
	"""
	Convert audio bytes to text using Whisper.
	Handles WAV, MP3, M4A formats automatically.
	"""
	global stt_pipeline

	try:
	if stt_pipeline is None:
	load_stt_model()
	if stt_pipeline is None:
	raise Exception("STT model not loaded")

	logger.info(f"STT: Converting audio file '{filename}'")

	# Save to temporary file
	with tempfile.NamedTemporaryFile(delete=False, suffix='.wav') as tmp:
	tmp.write(audio_bytes)
	tmp_path = tmp.name

	try:
	# Load and resample audio to 16kHz
	audio, sr = librosa.load(tmp_path, sr=16000)

	# Transcribe
	result = stt_pipeline(audio, generate_kwargs={"language": "english"})
	text = result["text"].strip()

	if not text:
	text = "[Silent audio or unrecognizable speech]"

	logger.info(f"✓ STT Success: '{text}'")
	return text

	finally:
	if os.path.exists(tmp_path):
	os.unlink(tmp_path)

	except Exception as e:
	logger.error(f"✗ STT Error: {str(e)}")
	raise Exception(f"STT failed: {str(e)}")