File size: 4,986 Bytes
becb41b 69793f3 becb41b f7ae3f9 becb41b 69793f3 727a543 69793f3 becb41b 69793f3 becb41b 69793f3 becb41b 69793f3 becb41b 8bed01a 69793f3 becb41b 69793f3 becb41b 69793f3 becb41b 69793f3 becb41b 5fbf1ba becb41b 69793f3 becb41b f7ae3f9 5fbf1ba f7ae3f9 183aa1f becb41b 69793f3 becb41b 4220e34 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
"""
SIMBOTI Live - Real-Time WebRTC Translation using FastRTC
This app provides live audio translation using the FastRTC library.
Uses StreamHandlerBase for proper send-receive mode handling.
"""
from fastrtc import Stream, StreamHandler
import numpy as np
import tempfile
import wave
import os
# Import the existing translator
from carebridge_client import CareBridgeTranslator
# --- Languages ---
LANGUAGES = {
"English": "en", "Polish": "pl", "Romanian": "ro", "Punjabi": "pa",
"Urdu": "ur", "Portuguese": "pt", "Spanish": "es", "Arabic": "ar",
"Bengali": "bn", "Gujarati": "gu", "Italian": "it"
}
# --- Lazy Load Translator ---
translator = None
def get_translator():
global translator
if translator is None:
translator = CareBridgeTranslator()
return translator
# --- StreamHandler Class for Real-Time Translation ---
class LiveTranslationHandler(StreamHandler):
"""
StreamHandler for real-time audio translation.
Receives audio chunks, accumulates, translates, and returns TTS audio.
"""
def __init__(self, expected_layout="mono", output_sample_rate=24000, output_frame_size=480):
super().__init__(expected_layout, output_sample_rate, output_frame_size)
self.audio_buffer = []
self.frame_count = 0
self.BUFFER_THRESHOLD = 50 # ~2 seconds of audio at 24kHz
self.source_lang = "English"
self.target_lang = "Polish"
def copy(self):
"""Required: create a copy for new connections."""
return LiveTranslationHandler()
def receive(self, frame: np.ndarray) -> np.ndarray:
"""
Called for each incoming audio frame.
Accumulates frames and processes when threshold is reached.
"""
self.audio_buffer.append(frame)
self.frame_count += 1
# Process after accumulating enough audio
if self.frame_count >= self.BUFFER_THRESHOLD:
return self._process_and_respond()
# Return silence while buffering
return np.zeros(self.output_frame_size, dtype=np.float32)
def emit(self, msg: str):
"""Emit a message to the UI (required abstract method)."""
pass
def _process_and_respond(self):
"""Process accumulated audio and return translated TTS."""
if not self.audio_buffer:
return np.zeros(self.output_frame_size, dtype=np.float32)
# Combine all buffered frames
combined = np.concatenate(self.audio_buffer)
self.audio_buffer = []
self.frame_count = 0
# Save to temp WAV
temp_wav = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
try:
with wave.open(temp_wav.name, 'wb') as wf:
wf.setnchannels(1)
wf.setsampwidth(2)
wf.setframerate(self.output_sample_rate)
int_audio = (combined * 32767).astype(np.int16)
wf.writeframes(int_audio.tobytes())
# Translate
t = get_translator()
translated_text = t.translate_audio(temp_wav.name, self.source_lang, self.target_lang)
print(f"[SIMBOTI] Translated: {translated_text}")
# Generate TTS
tts_path = t.speak_text(translated_text, self.target_lang)
if tts_path:
import librosa
tts_audio, _ = librosa.load(tts_path, sr=self.output_sample_rate)
os.unlink(tts_path)
# Return first chunk of TTS audio
if len(tts_audio) > self.output_frame_size:
return tts_audio[:self.output_frame_size].astype(np.float32)
return tts_audio.astype(np.float32)
except Exception as e:
print(f"[SIMBOTI] Error: {e}")
finally:
if os.path.exists(temp_wav.name):
os.unlink(temp_wav.name)
return np.zeros(self.output_frame_size, dtype=np.float32)
# --- FastRTC Stream with Robust Public STUN Servers ---
stream = Stream(
handler=LiveTranslationHandler(),
modality="audio",
mode="send-receive",
rtc_configuration={
"iceServers": [
{"urls": ["stun:stun.l.google.com:19302"]},
{"urls": ["stun:stun1.l.google.com:19302"]},
{"urls": ["stun:stun2.l.google.com:19302"]},
{"urls": ["stun:stun3.l.google.com:19302"]},
{"urls": ["stun:stun4.l.google.com:19302"]},
]
},
concurrency_limit=5,
time_limit=60,
)
# Launch with Gradio UI
if __name__ == "__main__":
print("[SIMBOTI] Starting Live Translation...")
print("[SIMBOTI] Languages: English -> Polish")
print("[SIMBOTI] Open your browser to the URL below:")
# Launch with Gradio UI
stream.ui.launch()
|