malek-messaoudii
Refactor audio processing and chatbot services; enhance STT and TTS functionalities with base64 support and session management
56dc677
| import base64 | |
| import io | |
| import tempfile | |
| import os | |
| import wave | |
| import audioop | |
| class STTService: | |
| def __init__(self): | |
| self.initialized = False | |
| async def initialize(self): | |
| """Initialize STT service""" | |
| # For now, we'll use a simple approach without external dependencies | |
| self.initialized = True | |
| print("✓ STT Service initialized (basic mode)") | |
| async def transcribe_audio_base64(self, audio_base64: str, language: str = "en-US") -> str: | |
| """Transcribe base64 audio to text - SIMPLIFIED VERSION""" | |
| try: | |
| # Decode audio | |
| audio_data = base64.b64decode(audio_base64) | |
| # For now, return a placeholder since we don't have STT models configured | |
| # In a real implementation, you would use Whisper, Vosk, or other STT models here | |
| audio_info = await self._get_audio_info(audio_data) | |
| return f"[Audio received: {audio_info}. STT service needs model configuration.]" | |
| except Exception as e: | |
| print(f"Transcription error: {e}") | |
| return "Sorry, I couldn't process the audio message." | |
| async def _get_audio_info(self, audio_data: bytes) -> str: | |
| """Get basic information about the audio file""" | |
| try: | |
| with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file: | |
| temp_path = temp_file.name | |
| temp_file.write(audio_data) | |
| try: | |
| with wave.open(temp_path, 'rb') as wav_file: | |
| frames = wav_file.getnframes() | |
| rate = wav_file.getframerate() | |
| duration = frames / float(rate) | |
| return f"Duration: {duration:.2f}s, Sample Rate: {rate}Hz" | |
| except: | |
| return f"Size: {len(audio_data)} bytes" | |
| finally: | |
| if os.path.exists(temp_path): | |
| os.unlink(temp_path) | |
| # Alternative STT service using Whisper if available | |
| class WhisperSTTService: | |
| def __init__(self): | |
| self.model = None | |
| self.initialized = False | |
| async def initialize(self): | |
| """Initialize Whisper STT service""" | |
| try: | |
| import whisper | |
| self.model = whisper.load_model("medium") | |
| self.initialized = True | |
| print("✓ Whisper STT Service initialized") | |
| except ImportError: | |
| print("⚠️ Whisper not available. Install with: pip install openai-whisper") | |
| self.initialized = False | |
| except Exception as e: | |
| print(f"⚠️ Whisper initialization failed: {e}") | |
| self.initialized = False | |
| async def transcribe_audio_base64(self, audio_base64: str, language: str = "en") -> str: | |
| """Transcribe using Whisper""" | |
| if not self.initialized: | |
| return "STT service not available. Please install Whisper." | |
| try: | |
| audio_data = base64.b64decode(audio_base64) | |
| with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file: | |
| temp_path = temp_file.name | |
| temp_file.write(audio_data) | |
| result = self.model.transcribe(temp_path, language=language) | |
| transcription = result["text"] | |
| os.unlink(temp_path) | |
| return transcription | |
| except Exception as e: | |
| print(f"Whisper transcription error: {e}") | |
| return "Sorry, I couldn't transcribe the audio." |