malek-messaoudii
Refactor audio processing and chatbot services; enhance STT and TTS functionalities with base64 support and session management
56dc677
raw
history blame
3.59 kB
import base64
import io
import tempfile
import os
import wave
import audioop
class STTService:
def __init__(self):
self.initialized = False
async def initialize(self):
"""Initialize STT service"""
# For now, we'll use a simple approach without external dependencies
self.initialized = True
print("✓ STT Service initialized (basic mode)")
async def transcribe_audio_base64(self, audio_base64: str, language: str = "en-US") -> str:
"""Transcribe base64 audio to text - SIMPLIFIED VERSION"""
try:
# Decode audio
audio_data = base64.b64decode(audio_base64)
# For now, return a placeholder since we don't have STT models configured
# In a real implementation, you would use Whisper, Vosk, or other STT models here
audio_info = await self._get_audio_info(audio_data)
return f"[Audio received: {audio_info}. STT service needs model configuration.]"
except Exception as e:
print(f"Transcription error: {e}")
return "Sorry, I couldn't process the audio message."
async def _get_audio_info(self, audio_data: bytes) -> str:
"""Get basic information about the audio file"""
try:
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
temp_path = temp_file.name
temp_file.write(audio_data)
try:
with wave.open(temp_path, 'rb') as wav_file:
frames = wav_file.getnframes()
rate = wav_file.getframerate()
duration = frames / float(rate)
return f"Duration: {duration:.2f}s, Sample Rate: {rate}Hz"
except:
return f"Size: {len(audio_data)} bytes"
finally:
if os.path.exists(temp_path):
os.unlink(temp_path)
# Alternative STT service using Whisper if available
class WhisperSTTService:
def __init__(self):
self.model = None
self.initialized = False
async def initialize(self):
"""Initialize Whisper STT service"""
try:
import whisper
self.model = whisper.load_model("medium")
self.initialized = True
print("✓ Whisper STT Service initialized")
except ImportError:
print("⚠️ Whisper not available. Install with: pip install openai-whisper")
self.initialized = False
except Exception as e:
print(f"⚠️ Whisper initialization failed: {e}")
self.initialized = False
async def transcribe_audio_base64(self, audio_base64: str, language: str = "en") -> str:
"""Transcribe using Whisper"""
if not self.initialized:
return "STT service not available. Please install Whisper."
try:
audio_data = base64.b64decode(audio_base64)
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
temp_path = temp_file.name
temp_file.write(audio_data)
result = self.model.transcribe(temp_path, language=language)
transcription = result["text"]
os.unlink(temp_path)
return transcription
except Exception as e:
print(f"Whisper transcription error: {e}")
return "Sorry, I couldn't transcribe the audio."