File size: 3,591 Bytes
56dc677 95cb26e 56dc677 4a13628 56dc677 3b2b211 56dc677 4a13628 56dc677 4a13628 3b2b211 56dc677 95cb26e 56dc677 674469e 56dc677 674469e 56dc677 95cb26e 56dc677 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 |
import base64
import io
import tempfile
import os
import wave
import audioop
class STTService:
def __init__(self):
self.initialized = False
async def initialize(self):
"""Initialize STT service"""
# For now, we'll use a simple approach without external dependencies
self.initialized = True
print("✓ STT Service initialized (basic mode)")
async def transcribe_audio_base64(self, audio_base64: str, language: str = "en-US") -> str:
"""Transcribe base64 audio to text - SIMPLIFIED VERSION"""
try:
# Decode audio
audio_data = base64.b64decode(audio_base64)
# For now, return a placeholder since we don't have STT models configured
# In a real implementation, you would use Whisper, Vosk, or other STT models here
audio_info = await self._get_audio_info(audio_data)
return f"[Audio received: {audio_info}. STT service needs model configuration.]"
except Exception as e:
print(f"Transcription error: {e}")
return "Sorry, I couldn't process the audio message."
async def _get_audio_info(self, audio_data: bytes) -> str:
"""Get basic information about the audio file"""
try:
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
temp_path = temp_file.name
temp_file.write(audio_data)
try:
with wave.open(temp_path, 'rb') as wav_file:
frames = wav_file.getnframes()
rate = wav_file.getframerate()
duration = frames / float(rate)
return f"Duration: {duration:.2f}s, Sample Rate: {rate}Hz"
except:
return f"Size: {len(audio_data)} bytes"
finally:
if os.path.exists(temp_path):
os.unlink(temp_path)
# Alternative STT service using Whisper if available
class WhisperSTTService:
def __init__(self):
self.model = None
self.initialized = False
async def initialize(self):
"""Initialize Whisper STT service"""
try:
import whisper
self.model = whisper.load_model("medium")
self.initialized = True
print("✓ Whisper STT Service initialized")
except ImportError:
print("⚠️ Whisper not available. Install with: pip install openai-whisper")
self.initialized = False
except Exception as e:
print(f"⚠️ Whisper initialization failed: {e}")
self.initialized = False
async def transcribe_audio_base64(self, audio_base64: str, language: str = "en") -> str:
"""Transcribe using Whisper"""
if not self.initialized:
return "STT service not available. Please install Whisper."
try:
audio_data = base64.b64decode(audio_base64)
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
temp_path = temp_file.name
temp_file.write(audio_data)
result = self.model.transcribe(temp_path, language=language)
transcription = result["text"]
os.unlink(temp_path)
return transcription
except Exception as e:
print(f"Whisper transcription error: {e}")
return "Sorry, I couldn't transcribe the audio." |