|
|
"""
|
|
|
SIMBOTI Live - Real-Time WebRTC Translation using FastRTC
|
|
|
This app provides live audio translation using the FastRTC library.
|
|
|
Uses StreamHandlerBase for proper send-receive mode handling.
|
|
|
"""
|
|
|
|
|
|
from fastrtc import Stream, StreamHandler
|
|
|
import numpy as np
|
|
|
import tempfile
|
|
|
import wave
|
|
|
import os
|
|
|
|
|
|
|
|
|
from carebridge_client import CareBridgeTranslator
|
|
|
|
|
|
|
|
|
LANGUAGES = {
|
|
|
"English": "en", "Polish": "pl", "Romanian": "ro", "Punjabi": "pa",
|
|
|
"Urdu": "ur", "Portuguese": "pt", "Spanish": "es", "Arabic": "ar",
|
|
|
"Bengali": "bn", "Gujarati": "gu", "Italian": "it"
|
|
|
}
|
|
|
|
|
|
|
|
|
translator = None
|
|
|
|
|
|
def get_translator():
|
|
|
global translator
|
|
|
if translator is None:
|
|
|
translator = CareBridgeTranslator()
|
|
|
return translator
|
|
|
|
|
|
|
|
|
class LiveTranslationHandler(StreamHandler):
|
|
|
"""
|
|
|
StreamHandler for real-time audio translation.
|
|
|
Receives audio chunks, accumulates, translates, and returns TTS audio.
|
|
|
"""
|
|
|
|
|
|
def __init__(self, expected_layout="mono", output_sample_rate=24000, output_frame_size=480):
|
|
|
super().__init__(expected_layout, output_sample_rate, output_frame_size)
|
|
|
self.audio_buffer = []
|
|
|
self.frame_count = 0
|
|
|
self.BUFFER_THRESHOLD = 50
|
|
|
self.source_lang = "English"
|
|
|
self.target_lang = "Polish"
|
|
|
|
|
|
def copy(self):
|
|
|
"""Required: create a copy for new connections."""
|
|
|
return LiveTranslationHandler()
|
|
|
|
|
|
def receive(self, frame: np.ndarray) -> np.ndarray:
|
|
|
"""
|
|
|
Called for each incoming audio frame.
|
|
|
Accumulates frames and processes when threshold is reached.
|
|
|
"""
|
|
|
self.audio_buffer.append(frame)
|
|
|
self.frame_count += 1
|
|
|
|
|
|
|
|
|
if self.frame_count >= self.BUFFER_THRESHOLD:
|
|
|
return self._process_and_respond()
|
|
|
|
|
|
|
|
|
return np.zeros(self.output_frame_size, dtype=np.float32)
|
|
|
|
|
|
def emit(self, msg: str):
|
|
|
"""Emit a message to the UI (required abstract method)."""
|
|
|
pass
|
|
|
|
|
|
def _process_and_respond(self):
|
|
|
"""Process accumulated audio and return translated TTS."""
|
|
|
if not self.audio_buffer:
|
|
|
return np.zeros(self.output_frame_size, dtype=np.float32)
|
|
|
|
|
|
|
|
|
combined = np.concatenate(self.audio_buffer)
|
|
|
self.audio_buffer = []
|
|
|
self.frame_count = 0
|
|
|
|
|
|
|
|
|
temp_wav = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
|
|
|
try:
|
|
|
with wave.open(temp_wav.name, 'wb') as wf:
|
|
|
wf.setnchannels(1)
|
|
|
wf.setsampwidth(2)
|
|
|
wf.setframerate(self.output_sample_rate)
|
|
|
int_audio = (combined * 32767).astype(np.int16)
|
|
|
wf.writeframes(int_audio.tobytes())
|
|
|
|
|
|
|
|
|
t = get_translator()
|
|
|
translated_text = t.translate_audio(temp_wav.name, self.source_lang, self.target_lang)
|
|
|
print(f"[SIMBOTI] Translated: {translated_text}")
|
|
|
|
|
|
|
|
|
tts_path = t.speak_text(translated_text, self.target_lang)
|
|
|
if tts_path:
|
|
|
import librosa
|
|
|
tts_audio, _ = librosa.load(tts_path, sr=self.output_sample_rate)
|
|
|
os.unlink(tts_path)
|
|
|
|
|
|
if len(tts_audio) > self.output_frame_size:
|
|
|
return tts_audio[:self.output_frame_size].astype(np.float32)
|
|
|
return tts_audio.astype(np.float32)
|
|
|
except Exception as e:
|
|
|
print(f"[SIMBOTI] Error: {e}")
|
|
|
finally:
|
|
|
if os.path.exists(temp_wav.name):
|
|
|
os.unlink(temp_wav.name)
|
|
|
|
|
|
return np.zeros(self.output_frame_size, dtype=np.float32)
|
|
|
|
|
|
|
|
|
stream = Stream(
|
|
|
handler=LiveTranslationHandler(),
|
|
|
modality="audio",
|
|
|
mode="send-receive",
|
|
|
rtc_configuration={
|
|
|
"iceServers": [
|
|
|
{"urls": ["stun:stun.l.google.com:19302"]},
|
|
|
{"urls": ["stun:stun1.l.google.com:19302"]},
|
|
|
{"urls": ["stun:stun2.l.google.com:19302"]},
|
|
|
{"urls": ["stun:stun3.l.google.com:19302"]},
|
|
|
{"urls": ["stun:stun4.l.google.com:19302"]},
|
|
|
]
|
|
|
},
|
|
|
concurrency_limit=5,
|
|
|
time_limit=60,
|
|
|
)
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
print("[SIMBOTI] Starting Live Translation...")
|
|
|
print("[SIMBOTI] Languages: English -> Polish")
|
|
|
print("[SIMBOTI] Open your browser to the URL below:")
|
|
|
|
|
|
stream.ui.launch()
|
|
|
|