""" SIMBOTI Live - Real-Time WebRTC Translation using FastRTC This app provides live audio translation using the FastRTC library. Uses StreamHandlerBase for proper send-receive mode handling. """ from fastrtc import Stream, StreamHandler import numpy as np import tempfile import wave import os # Import the existing translator from carebridge_client import CareBridgeTranslator # --- Languages --- LANGUAGES = { "English": "en", "Polish": "pl", "Romanian": "ro", "Punjabi": "pa", "Urdu": "ur", "Portuguese": "pt", "Spanish": "es", "Arabic": "ar", "Bengali": "bn", "Gujarati": "gu", "Italian": "it" } # --- Lazy Load Translator --- translator = None def get_translator(): global translator if translator is None: translator = CareBridgeTranslator() return translator # --- StreamHandler Class for Real-Time Translation --- class LiveTranslationHandler(StreamHandler): """ StreamHandler for real-time audio translation. Receives audio chunks, accumulates, translates, and returns TTS audio. """ def __init__(self, expected_layout="mono", output_sample_rate=24000, output_frame_size=480): super().__init__(expected_layout, output_sample_rate, output_frame_size) self.audio_buffer = [] self.frame_count = 0 self.BUFFER_THRESHOLD = 50 # ~2 seconds of audio at 24kHz self.source_lang = "English" self.target_lang = "Polish" def copy(self): """Required: create a copy for new connections.""" return LiveTranslationHandler() def receive(self, frame: np.ndarray) -> np.ndarray: """ Called for each incoming audio frame. Accumulates frames and processes when threshold is reached. """ self.audio_buffer.append(frame) self.frame_count += 1 # Process after accumulating enough audio if self.frame_count >= self.BUFFER_THRESHOLD: return self._process_and_respond() # Return silence while buffering return np.zeros(self.output_frame_size, dtype=np.float32) def emit(self, msg: str): """Emit a message to the UI (required abstract method).""" pass def _process_and_respond(self): """Process accumulated audio and return translated TTS.""" if not self.audio_buffer: return np.zeros(self.output_frame_size, dtype=np.float32) # Combine all buffered frames combined = np.concatenate(self.audio_buffer) self.audio_buffer = [] self.frame_count = 0 # Save to temp WAV temp_wav = tempfile.NamedTemporaryFile(delete=False, suffix=".wav") try: with wave.open(temp_wav.name, 'wb') as wf: wf.setnchannels(1) wf.setsampwidth(2) wf.setframerate(self.output_sample_rate) int_audio = (combined * 32767).astype(np.int16) wf.writeframes(int_audio.tobytes()) # Translate t = get_translator() translated_text = t.translate_audio(temp_wav.name, self.source_lang, self.target_lang) print(f"[SIMBOTI] Translated: {translated_text}") # Generate TTS tts_path = t.speak_text(translated_text, self.target_lang) if tts_path: import librosa tts_audio, _ = librosa.load(tts_path, sr=self.output_sample_rate) os.unlink(tts_path) # Return first chunk of TTS audio if len(tts_audio) > self.output_frame_size: return tts_audio[:self.output_frame_size].astype(np.float32) return tts_audio.astype(np.float32) except Exception as e: print(f"[SIMBOTI] Error: {e}") finally: if os.path.exists(temp_wav.name): os.unlink(temp_wav.name) return np.zeros(self.output_frame_size, dtype=np.float32) # --- FastRTC Stream with Robust Public STUN Servers --- stream = Stream( handler=LiveTranslationHandler(), modality="audio", mode="send-receive", rtc_configuration={ "iceServers": [ {"urls": ["stun:stun.l.google.com:19302"]}, {"urls": ["stun:stun1.l.google.com:19302"]}, {"urls": ["stun:stun2.l.google.com:19302"]}, {"urls": ["stun:stun3.l.google.com:19302"]}, {"urls": ["stun:stun4.l.google.com:19302"]}, ] }, concurrency_limit=5, time_limit=60, ) # Launch with Gradio UI if __name__ == "__main__": print("[SIMBOTI] Starting Live Translation...") print("[SIMBOTI] Languages: English -> Polish") print("[SIMBOTI] Open your browser to the URL below:") # Launch with Gradio UI stream.ui.launch()