SIMBOTI-Live / live_app.py
NurseCitizenDeveloper's picture
Upload live_app.py with huggingface_hub
4220e34 verified
"""
SIMBOTI Live - Real-Time WebRTC Translation using FastRTC
This app provides live audio translation using the FastRTC library.
Uses StreamHandlerBase for proper send-receive mode handling.
"""
from fastrtc import Stream, StreamHandler
import numpy as np
import tempfile
import wave
import os
# Import the existing translator
from carebridge_client import CareBridgeTranslator
# --- Languages ---
LANGUAGES = {
"English": "en", "Polish": "pl", "Romanian": "ro", "Punjabi": "pa",
"Urdu": "ur", "Portuguese": "pt", "Spanish": "es", "Arabic": "ar",
"Bengali": "bn", "Gujarati": "gu", "Italian": "it"
}
# --- Lazy Load Translator ---
translator = None
def get_translator():
global translator
if translator is None:
translator = CareBridgeTranslator()
return translator
# --- StreamHandler Class for Real-Time Translation ---
class LiveTranslationHandler(StreamHandler):
"""
StreamHandler for real-time audio translation.
Receives audio chunks, accumulates, translates, and returns TTS audio.
"""
def __init__(self, expected_layout="mono", output_sample_rate=24000, output_frame_size=480):
super().__init__(expected_layout, output_sample_rate, output_frame_size)
self.audio_buffer = []
self.frame_count = 0
self.BUFFER_THRESHOLD = 50 # ~2 seconds of audio at 24kHz
self.source_lang = "English"
self.target_lang = "Polish"
def copy(self):
"""Required: create a copy for new connections."""
return LiveTranslationHandler()
def receive(self, frame: np.ndarray) -> np.ndarray:
"""
Called for each incoming audio frame.
Accumulates frames and processes when threshold is reached.
"""
self.audio_buffer.append(frame)
self.frame_count += 1
# Process after accumulating enough audio
if self.frame_count >= self.BUFFER_THRESHOLD:
return self._process_and_respond()
# Return silence while buffering
return np.zeros(self.output_frame_size, dtype=np.float32)
def emit(self, msg: str):
"""Emit a message to the UI (required abstract method)."""
pass
def _process_and_respond(self):
"""Process accumulated audio and return translated TTS."""
if not self.audio_buffer:
return np.zeros(self.output_frame_size, dtype=np.float32)
# Combine all buffered frames
combined = np.concatenate(self.audio_buffer)
self.audio_buffer = []
self.frame_count = 0
# Save to temp WAV
temp_wav = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
try:
with wave.open(temp_wav.name, 'wb') as wf:
wf.setnchannels(1)
wf.setsampwidth(2)
wf.setframerate(self.output_sample_rate)
int_audio = (combined * 32767).astype(np.int16)
wf.writeframes(int_audio.tobytes())
# Translate
t = get_translator()
translated_text = t.translate_audio(temp_wav.name, self.source_lang, self.target_lang)
print(f"[SIMBOTI] Translated: {translated_text}")
# Generate TTS
tts_path = t.speak_text(translated_text, self.target_lang)
if tts_path:
import librosa
tts_audio, _ = librosa.load(tts_path, sr=self.output_sample_rate)
os.unlink(tts_path)
# Return first chunk of TTS audio
if len(tts_audio) > self.output_frame_size:
return tts_audio[:self.output_frame_size].astype(np.float32)
return tts_audio.astype(np.float32)
except Exception as e:
print(f"[SIMBOTI] Error: {e}")
finally:
if os.path.exists(temp_wav.name):
os.unlink(temp_wav.name)
return np.zeros(self.output_frame_size, dtype=np.float32)
# --- FastRTC Stream with Robust Public STUN Servers ---
stream = Stream(
handler=LiveTranslationHandler(),
modality="audio",
mode="send-receive",
rtc_configuration={
"iceServers": [
{"urls": ["stun:stun.l.google.com:19302"]},
{"urls": ["stun:stun1.l.google.com:19302"]},
{"urls": ["stun:stun2.l.google.com:19302"]},
{"urls": ["stun:stun3.l.google.com:19302"]},
{"urls": ["stun:stun4.l.google.com:19302"]},
]
},
concurrency_limit=5,
time_limit=60,
)
# Launch with Gradio UI
if __name__ == "__main__":
print("[SIMBOTI] Starting Live Translation...")
print("[SIMBOTI] Languages: English -> Polish")
print("[SIMBOTI] Open your browser to the URL below:")
# Launch with Gradio UI
stream.ui.launch()