File size: 3,591 Bytes
56dc677
 
95cb26e
 
56dc677
 
4a13628
56dc677
 
 
3b2b211
56dc677
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4a13628
56dc677
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4a13628
3b2b211
56dc677
95cb26e
56dc677
 
 
674469e
56dc677
 
674469e
56dc677
 
95cb26e
56dc677
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import base64
import io
import tempfile
import os
import wave
import audioop

class STTService:
    def __init__(self):
        self.initialized = False
    
    async def initialize(self):
        """Initialize STT service"""
        # For now, we'll use a simple approach without external dependencies
        self.initialized = True
        print("✓ STT Service initialized (basic mode)")
    
    async def transcribe_audio_base64(self, audio_base64: str, language: str = "en-US") -> str:
        """Transcribe base64 audio to text - SIMPLIFIED VERSION"""
        try:
            # Decode audio
            audio_data = base64.b64decode(audio_base64)
            
            # For now, return a placeholder since we don't have STT models configured
            # In a real implementation, you would use Whisper, Vosk, or other STT models here
            
            audio_info = await self._get_audio_info(audio_data)
            return f"[Audio received: {audio_info}. STT service needs model configuration.]"
            
        except Exception as e:
            print(f"Transcription error: {e}")
            return "Sorry, I couldn't process the audio message."
    
    async def _get_audio_info(self, audio_data: bytes) -> str:
        """Get basic information about the audio file"""
        try:
            with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
                temp_path = temp_file.name
                temp_file.write(audio_data)
            
            try:
                with wave.open(temp_path, 'rb') as wav_file:
                    frames = wav_file.getnframes()
                    rate = wav_file.getframerate()
                    duration = frames / float(rate)
                    return f"Duration: {duration:.2f}s, Sample Rate: {rate}Hz"
            except:
                return f"Size: {len(audio_data)} bytes"
        
        finally:
            if os.path.exists(temp_path):
                os.unlink(temp_path)

# Alternative STT service using Whisper if available
class WhisperSTTService:
    def __init__(self):
        self.model = None
        self.initialized = False
    
    async def initialize(self):
        """Initialize Whisper STT service"""
        try:
            import whisper
            self.model = whisper.load_model("medium")
            self.initialized = True
            print("✓ Whisper STT Service initialized")
        except ImportError:
            print("⚠️ Whisper not available. Install with: pip install openai-whisper")
            self.initialized = False
        except Exception as e:
            print(f"⚠️ Whisper initialization failed: {e}")
            self.initialized = False
    
    async def transcribe_audio_base64(self, audio_base64: str, language: str = "en") -> str:
        """Transcribe using Whisper"""
        if not self.initialized:
            return "STT service not available. Please install Whisper."
        
        try:
            audio_data = base64.b64decode(audio_base64)
            
            with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
                temp_path = temp_file.name
                temp_file.write(audio_data)
            
            result = self.model.transcribe(temp_path, language=language)
            transcription = result["text"]
            
            os.unlink(temp_path)
            return transcription
            
        except Exception as e:
            print(f"Whisper transcription error: {e}")
            return "Sorry, I couldn't transcribe the audio."