Spaces:

NurseCitizenDeveloper
/

SIMBOTI-Live

Running

App Files Files Community

SIMBOTI-Live / live_app.py

NurseCitizenDeveloper

Upload live_app.py with huggingface_hub

4220e34 verified 2 days ago

raw

history blame contribute delete

4.99 kB

	"""
	SIMBOTI Live - Real-Time WebRTC Translation using FastRTC
	This app provides live audio translation using the FastRTC library.
	Uses StreamHandlerBase for proper send-receive mode handling.
	"""

	from fastrtc import Stream, StreamHandler
	import numpy as np
	import tempfile
	import wave
	import os

	# Import the existing translator
	from carebridge_client import CareBridgeTranslator

	# --- Languages ---
	LANGUAGES = {
	"English": "en", "Polish": "pl", "Romanian": "ro", "Punjabi": "pa",
	"Urdu": "ur", "Portuguese": "pt", "Spanish": "es", "Arabic": "ar",
	"Bengali": "bn", "Gujarati": "gu", "Italian": "it"
	}

	# --- Lazy Load Translator ---
	translator = None

	def get_translator():
	global translator
	if translator is None:
	translator = CareBridgeTranslator()
	return translator

	# --- StreamHandler Class for Real-Time Translation ---
	class LiveTranslationHandler(StreamHandler):
	"""
	StreamHandler for real-time audio translation.
	Receives audio chunks, accumulates, translates, and returns TTS audio.
	"""

	def __init__(self, expected_layout="mono", output_sample_rate=24000, output_frame_size=480):
	super().__init__(expected_layout, output_sample_rate, output_frame_size)
	self.audio_buffer = []
	self.frame_count = 0
	self.BUFFER_THRESHOLD = 50 # ~2 seconds of audio at 24kHz
	self.source_lang = "English"
	self.target_lang = "Polish"

	def copy(self):
	"""Required: create a copy for new connections."""
	return LiveTranslationHandler()

	def receive(self, frame: np.ndarray) -> np.ndarray:
	"""
	Called for each incoming audio frame.
	Accumulates frames and processes when threshold is reached.
	"""
	self.audio_buffer.append(frame)
	self.frame_count += 1

	# Process after accumulating enough audio
	if self.frame_count >= self.BUFFER_THRESHOLD:
	return self._process_and_respond()

	# Return silence while buffering
	return np.zeros(self.output_frame_size, dtype=np.float32)

	def emit(self, msg: str):
	"""Emit a message to the UI (required abstract method)."""
	pass

	def _process_and_respond(self):
	"""Process accumulated audio and return translated TTS."""
	if not self.audio_buffer:
	return np.zeros(self.output_frame_size, dtype=np.float32)

	# Combine all buffered frames
	combined = np.concatenate(self.audio_buffer)
	self.audio_buffer = []
	self.frame_count = 0

	# Save to temp WAV
	temp_wav = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
	try:
	with wave.open(temp_wav.name, 'wb') as wf:
	wf.setnchannels(1)
	wf.setsampwidth(2)
	wf.setframerate(self.output_sample_rate)
	int_audio = (combined * 32767).astype(np.int16)
	wf.writeframes(int_audio.tobytes())

	# Translate
	t = get_translator()
	translated_text = t.translate_audio(temp_wav.name, self.source_lang, self.target_lang)
	print(f"[SIMBOTI] Translated: {translated_text}")

	# Generate TTS
	tts_path = t.speak_text(translated_text, self.target_lang)
	if tts_path:
	import librosa
	tts_audio, _ = librosa.load(tts_path, sr=self.output_sample_rate)
	os.unlink(tts_path)
	# Return first chunk of TTS audio
	if len(tts_audio) > self.output_frame_size:
	return tts_audio[:self.output_frame_size].astype(np.float32)
	return tts_audio.astype(np.float32)
	except Exception as e:
	print(f"[SIMBOTI] Error: {e}")
	finally:
	if os.path.exists(temp_wav.name):
	os.unlink(temp_wav.name)

	return np.zeros(self.output_frame_size, dtype=np.float32)

	# --- FastRTC Stream with Robust Public STUN Servers ---
	stream = Stream(
	handler=LiveTranslationHandler(),
	modality="audio",
	mode="send-receive",
	rtc_configuration={
	"iceServers": [
	{"urls": ["stun:stun.l.google.com:19302"]},
	{"urls": ["stun:stun1.l.google.com:19302"]},
	{"urls": ["stun:stun2.l.google.com:19302"]},
	{"urls": ["stun:stun3.l.google.com:19302"]},
	{"urls": ["stun:stun4.l.google.com:19302"]},
	]
	},
	concurrency_limit=5,
	time_limit=60,
	)

	# Launch with Gradio UI
	if __name__ == "__main__":
	print("[SIMBOTI] Starting Live Translation...")
	print("[SIMBOTI] Languages: English -> Polish")
	print("[SIMBOTI] Open your browser to the URL below:")
	# Launch with Gradio UI
	stream.ui.launch()