Spaces:

NLP-Debater-Project
/

FastAPI-Backend-Models

Sleeping

FastAPI-Backend-Models / services /stt_service.py

malek-messaoudii

Refactor audio processing and chatbot services; enhance STT and TTS functionalities with base64 support and session management

56dc677 about 2 months ago

raw

history blame

3.59 kB

	import base64
	import io
	import tempfile
	import os
	import wave
	import audioop

	class STTService:
	def __init__(self):
	self.initialized = False

	async def initialize(self):
	"""Initialize STT service"""
	# For now, we'll use a simple approach without external dependencies
	self.initialized = True
	print("✓ STT Service initialized (basic mode)")

	async def transcribe_audio_base64(self, audio_base64: str, language: str = "en-US") -> str:
	"""Transcribe base64 audio to text - SIMPLIFIED VERSION"""
	try:
	# Decode audio
	audio_data = base64.b64decode(audio_base64)

	# For now, return a placeholder since we don't have STT models configured
	# In a real implementation, you would use Whisper, Vosk, or other STT models here

	audio_info = await self._get_audio_info(audio_data)
	return f"[Audio received: {audio_info}. STT service needs model configuration.]"

	except Exception as e:
	print(f"Transcription error: {e}")
	return "Sorry, I couldn't process the audio message."

	async def _get_audio_info(self, audio_data: bytes) -> str:
	"""Get basic information about the audio file"""
	try:
	with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
	temp_path = temp_file.name
	temp_file.write(audio_data)

	try:
	with wave.open(temp_path, 'rb') as wav_file:
	frames = wav_file.getnframes()
	rate = wav_file.getframerate()
	duration = frames / float(rate)
	return f"Duration: {duration:.2f}s, Sample Rate: {rate}Hz"
	except:
	return f"Size: {len(audio_data)} bytes"

	finally:
	if os.path.exists(temp_path):
	os.unlink(temp_path)

	# Alternative STT service using Whisper if available
	class WhisperSTTService:
	def __init__(self):
	self.model = None
	self.initialized = False

	async def initialize(self):
	"""Initialize Whisper STT service"""
	try:
	import whisper
	self.model = whisper.load_model("medium")
	self.initialized = True
	print("✓ Whisper STT Service initialized")
	except ImportError:
	print("⚠️ Whisper not available. Install with: pip install openai-whisper")
	self.initialized = False
	except Exception as e:
	print(f"⚠️ Whisper initialization failed: {e}")
	self.initialized = False

	async def transcribe_audio_base64(self, audio_base64: str, language: str = "en") -> str:
	"""Transcribe using Whisper"""
	if not self.initialized:
	return "STT service not available. Please install Whisper."

	try:
	audio_data = base64.b64decode(audio_base64)

	with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
	temp_path = temp_file.name
	temp_file.write(audio_data)

	result = self.model.transcribe(temp_path, language=language)
	transcription = result["text"]

	os.unlink(temp_path)
	return transcription

	except Exception as e:
	print(f"Whisper transcription error: {e}")
	return "Sorry, I couldn't transcribe the audio."