File size: 4,986 Bytes
becb41b
 
 
69793f3
becb41b
 
f7ae3f9
becb41b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69793f3
727a543
69793f3
 
 
becb41b
 
69793f3
 
 
 
 
 
 
becb41b
69793f3
 
 
becb41b
69793f3
 
 
 
 
 
 
 
 
 
 
 
 
 
becb41b
8bed01a
 
 
 
69793f3
 
 
 
becb41b
69793f3
 
 
 
becb41b
69793f3
 
 
 
 
 
 
 
 
 
 
 
 
 
becb41b
69793f3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
becb41b
5fbf1ba
becb41b
69793f3
becb41b
 
f7ae3f9
5fbf1ba
 
 
 
 
 
 
f7ae3f9
183aa1f
 
becb41b
 
 
 
 
69793f3
becb41b
4220e34
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
"""

SIMBOTI Live - Real-Time WebRTC Translation using FastRTC

This app provides live audio translation using the FastRTC library.

Uses StreamHandlerBase for proper send-receive mode handling.

"""

from fastrtc import Stream, StreamHandler
import numpy as np
import tempfile
import wave
import os

# Import the existing translator
from carebridge_client import CareBridgeTranslator

# --- Languages ---
LANGUAGES = {
    "English": "en", "Polish": "pl", "Romanian": "ro", "Punjabi": "pa", 
    "Urdu": "ur", "Portuguese": "pt", "Spanish": "es", "Arabic": "ar", 
    "Bengali": "bn", "Gujarati": "gu", "Italian": "it"
}

# --- Lazy Load Translator ---
translator = None

def get_translator():
    global translator
    if translator is None:
        translator = CareBridgeTranslator()
    return translator

# --- StreamHandler Class for Real-Time Translation ---
class LiveTranslationHandler(StreamHandler):
    """

    StreamHandler for real-time audio translation.

    Receives audio chunks, accumulates, translates, and returns TTS audio.

    """
    
    def __init__(self, expected_layout="mono", output_sample_rate=24000, output_frame_size=480):
        super().__init__(expected_layout, output_sample_rate, output_frame_size)
        self.audio_buffer = []
        self.frame_count = 0
        self.BUFFER_THRESHOLD = 50  # ~2 seconds of audio at 24kHz
        self.source_lang = "English"
        self.target_lang = "Polish"
    
    def copy(self):
        """Required: create a copy for new connections."""
        return LiveTranslationHandler()
    
    def receive(self, frame: np.ndarray) -> np.ndarray:
        """

        Called for each incoming audio frame.

        Accumulates frames and processes when threshold is reached.

        """
        self.audio_buffer.append(frame)
        self.frame_count += 1
        
        # Process after accumulating enough audio
        if self.frame_count >= self.BUFFER_THRESHOLD:
            return self._process_and_respond()
        
        # Return silence while buffering
        return np.zeros(self.output_frame_size, dtype=np.float32)
    
    def emit(self, msg: str):
        """Emit a message to the UI (required abstract method)."""
        pass
        
    def _process_and_respond(self):
        """Process accumulated audio and return translated TTS."""
        if not self.audio_buffer:
            return np.zeros(self.output_frame_size, dtype=np.float32)
        
        # Combine all buffered frames
        combined = np.concatenate(self.audio_buffer)
        self.audio_buffer = []
        self.frame_count = 0
        
        # Save to temp WAV
        temp_wav = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
        try:
            with wave.open(temp_wav.name, 'wb') as wf:
                wf.setnchannels(1)
                wf.setsampwidth(2)
                wf.setframerate(self.output_sample_rate)
                int_audio = (combined * 32767).astype(np.int16)
                wf.writeframes(int_audio.tobytes())
            
            # Translate
            t = get_translator()
            translated_text = t.translate_audio(temp_wav.name, self.source_lang, self.target_lang)
            print(f"[SIMBOTI] Translated: {translated_text}")
            
            # Generate TTS
            tts_path = t.speak_text(translated_text, self.target_lang)
            if tts_path:
                import librosa
                tts_audio, _ = librosa.load(tts_path, sr=self.output_sample_rate)
                os.unlink(tts_path)
                # Return first chunk of TTS audio
                if len(tts_audio) > self.output_frame_size:
                    return tts_audio[:self.output_frame_size].astype(np.float32)
                return tts_audio.astype(np.float32)
        except Exception as e:
            print(f"[SIMBOTI] Error: {e}")
        finally:
            if os.path.exists(temp_wav.name):
                os.unlink(temp_wav.name)
        
        return np.zeros(self.output_frame_size, dtype=np.float32)

# --- FastRTC Stream with Robust Public STUN Servers ---
stream = Stream(
    handler=LiveTranslationHandler(),
    modality="audio",
    mode="send-receive",
    rtc_configuration={
        "iceServers": [
            {"urls": ["stun:stun.l.google.com:19302"]},
            {"urls": ["stun:stun1.l.google.com:19302"]},
            {"urls": ["stun:stun2.l.google.com:19302"]},
            {"urls": ["stun:stun3.l.google.com:19302"]},
            {"urls": ["stun:stun4.l.google.com:19302"]},
        ]
    },
    concurrency_limit=5,
    time_limit=60,
)

# Launch with Gradio UI
if __name__ == "__main__":
    print("[SIMBOTI] Starting Live Translation...")
    print("[SIMBOTI] Languages: English -> Polish")
    print("[SIMBOTI] Open your browser to the URL below:")
    # Launch with Gradio UI
    stream.ui.launch()