Spaces:

pgits
/

voiceCalendar

Sleeping

voiceCalendar / webrtc_streamlit.py

Peter Michael Gits

fix: Add unique keys to all Streamlit buttons to resolve duplicate element ID error

1c7f2b8 8 months ago

55 kB

	"""
	Streamlit-native WebRTC Speech-to-Text Component
	Implements unmute.sh patterns without FastAPI conflicts
	"""

	import streamlit as st
	import asyncio
	import json
	import logging
	import tempfile
	import os
	import base64
	from datetime import datetime
	import requests
	import websocket
	import threading
	from typing import Dict, Optional

	# Import gradio_client at top level to avoid runtime import errors
	try:
	from gradio_client import Client
	GRADIO_CLIENT_AVAILABLE = True
	except ImportError:
	GRADIO_CLIENT_AVAILABLE = False
	Client = None

	logger = logging.getLogger(__name__)

	class StreamlitWebRTCHandler:
	"""Handles WebRTC functionality within Streamlit using unmute.sh patterns"""

	def __init__(self):
	# STT service configuration
	self.stt_service_url = "https://pgits-stt-gpu-service.hf.space"

	# Persistent Gradio client for optimal performance
	self._client = None
	self._client_initialized = False

	# Audio buffer for unmute.sh flush trick
	if 'audio_buffer' not in st.session_state:
	st.session_state.audio_buffer = []
	if 'recording_state' not in st.session_state:
	st.session_state.recording_state = 'stopped'
	if 'transcriptions' not in st.session_state:
	st.session_state.transcriptions = []

	@property
	def client(self):
	"""Get or create persistent Gradio client for optimal performance"""
	if not GRADIO_CLIENT_AVAILABLE:
	logger.error("gradio_client not available")
	return None

	if self._client is None or not self._client_initialized:
	try:
	self._client = Client(self.stt_service_url)
	self._client_initialized = True
	logger.info(f"🔗 Initialized persistent Gradio client for {self.stt_service_url}")
	except Exception as e:
	logger.error(f"Failed to initialize Gradio client: {e}")
	self._client = None
	self._client_initialized = False
	return self._client

	def add_audio_chunk(self, audio_data: bytes):
	"""Add audio chunk to buffer using unmute.sh methodology"""
	try:
	st.session_state.audio_buffer.append(audio_data)
	logger.info(f"🎤 Added audio chunk ({len(audio_data)} bytes) - Total chunks: {len(st.session_state.audio_buffer)}")

	# Update UI status
	st.session_state.recording_status = f"Recording... ({len(st.session_state.audio_buffer)} chunks buffered)"

	except Exception as e:
	logger.error(f"Error adding audio chunk: {e}")
	st.error(f"Audio buffering error: {str(e)}")

	async def process_buffered_audio_with_flush(self):
	"""Process buffered audio using unmute.sh flush trick"""
	try:
	if not st.session_state.audio_buffer:
	st.warning("No audio chunks to process")
	return

	# Combine all chunks (unmute.sh methodology)
	all_audio_data = b''.join(st.session_state.audio_buffer)
	total_chunks = len(st.session_state.audio_buffer)

	logger.info(f"🔄 Processing {total_chunks} buffered chunks ({len(all_audio_data)} bytes) with flush trick")

	# Create temporary file for complete audio
	with tempfile.NamedTemporaryFile(suffix='.webm', delete=False) as tmp_file:
	tmp_file.write(all_audio_data)
	tmp_file_path = tmp_file.name

	try:
	# Process with flush trick (is_final=True for unmute.sh methodology)
	transcription = await self.transcribe_audio_file(tmp_file_path)

	if transcription and transcription.strip() and not transcription.startswith("ERROR"):
	# Add to transcriptions with unmute.sh metadata
	transcription_entry = {
	"text": transcription.strip(),
	"timestamp": datetime.now().isoformat(),
	"audio_size": len(all_audio_data),
	"format": "webm/audio",
	"is_final": True, # unmute.sh flush trick marker
	"chunks_processed": total_chunks
	}

	st.session_state.transcriptions.append(transcription_entry)
	logger.info(f"📝 Final transcription processed: {transcription[:50]}...")

	# Update UI
	st.success(f"Transcribed: {transcription}")
	st.rerun()

	else:
	error_msg = f"Audio processing failed: {transcription if transcription else 'No result'}"
	logger.error(error_msg)
	st.error(error_msg)

	finally:
	# Clean up (unmute.sh cleanup methodology)
	if os.path.exists(tmp_file_path):
	os.unlink(tmp_file_path)

	# Clear the buffer
	st.session_state.audio_buffer = []
	st.session_state.recording_status = "Processing complete - buffer cleared"
	logger.info("🧹 Cleared audio buffer after flush trick processing")

	except Exception as e:
	logger.error(f"Error processing buffered audio: {e}")
	st.error(f"Buffered audio processing error: {str(e)}")

	async def transcribe_audio_file(self, audio_file_path: str) -> Optional[str]:
	"""Transcribe audio file using optimized STT service Gradio API (unmute.sh flush methodology)"""
	start_time = datetime.now()

	try:
	# Use persistent client for optimal performance
	client = self.client
	if client is None:
	return "ERROR: Failed to initialize Gradio client"

	logger.info(f"🎤 Starting transcription with persistent client...")

	# Optimized call with minimal overhead - Use proper Gradio file format
	from gradio_client import handle_file

	result = await asyncio.get_event_loop().run_in_executor(
	None,
	lambda: client.predict(
	handle_file(audio_file_path), # Proper Gradio file format
	"en", # language (English by default)
	"base", # model_size_param (optimized for speed)
	api_name="/gradio_transcribe_wrapper"
	)
	)

	# Efficient result parsing
	transcription_text, timing_json, status_text = result

	# Calculate total latency
	total_time = (datetime.now() - start_time).total_seconds()

	if transcription_text.startswith("✅"):
	# Extract transcription from success message (optimized parsing)
	transcription = transcription_text[2:].strip() # Remove "✅ " efficiently

	logger.info(f"🚀 FLUSH TRICK: STT completed in {total_time:.2f}s - {transcription[:50]}...")
	return transcription
	else:
	error_msg = f"STT service error: {transcription_text}"
	logger.error(f"❌ STT failed in {total_time:.2f}s: {error_msg}")
	return f"ERROR: {error_msg}"

	except Exception as e:
	total_time = (datetime.now() - start_time).total_seconds()
	error_msg = f"Optimized transcription failed in {total_time:.2f}s: {str(e)}"
	logger.error(error_msg)

	# Reset client on error for next attempt
	self._client = None
	self._client_initialized = False

	return f"ERROR: {error_msg}"

	async def process_realtime_chunk(self, audio_data: bytes, chunk_index: int, timestamp: str) -> dict:
	"""Process individual audio chunk in real-time for streaming transcriptions"""
	try:
	logger.info(f"🎤 Processing real-time chunk {chunk_index} ({len(audio_data)} bytes)")

	# Save chunk to temporary file
	with tempfile.NamedTemporaryFile(suffix='.webm', delete=False) as tmp_file:
	tmp_file.write(audio_data)
	tmp_file_path = tmp_file.name

	try:
	# Process with STT service
	transcription = await self.transcribe_audio_file(tmp_file_path)

	if transcription and transcription.strip() and not transcription.startswith("ERROR"):
	# Add to live transcriptions for real-time display
	transcription_entry = {
	"text": transcription.strip(),
	"timestamp": timestamp,
	"source": "stt_service",
	"chunk_index": chunk_index,
	"processing_time": 0, # Will be calculated
	"audio_size": len(audio_data)
	}

	# Initialize session state if needed
	if 'live_transcriptions' not in st.session_state:
	st.session_state.live_transcriptions = []

	st.session_state.live_transcriptions.append(transcription_entry)

	logger.info(f"✅ Real-time transcription {chunk_index}: '{transcription[:50]}...'")

	return {
	"success": True,
	"transcription": transcription.strip(),
	"chunk_index": chunk_index,
	"timestamp": timestamp,
	"processing_time": 0
	}
	else:
	logger.info(f"ℹ️ Chunk {chunk_index}: No valid transcription")
	return {
	"success": False,
	"transcription": "",
	"message": "No speech detected or transcription failed"
	}

	finally:
	# Clean up temp file
	if os.path.exists(tmp_file_path):
	os.unlink(tmp_file_path)

	except Exception as e:
	error_msg = f"Real-time processing failed for chunk {chunk_index}: {str(e)}"
	logger.error(error_msg)
	return {
	"success": False,
	"error": error_msg,
	"chunk_index": chunk_index
	}

	async def test_stt_with_voice_file(self):
	"""Test STT service with actual voice file and display results"""
	try:
	st.info("🎤 Testing STT service with real audio...")

	# Import MCP voice service
	from mcp_voice_service import mcp_create_test_voice

	# Create test voice file
	test_voice_file = await mcp_create_test_voice()
	st.info(f"📁 Created test voice file: {test_voice_file}")

	# Process with STT service
	transcription = await self.transcribe_audio_file(test_voice_file)

	if transcription and transcription.strip() and not transcription.startswith("ERROR"):
	# Add real STT result to live transcriptions
	if 'live_transcriptions' not in st.session_state:
	st.session_state.live_transcriptions = []

	stt_result = {
	"text": transcription.strip(),
	"timestamp": datetime.now().isoformat(),
	"source": "stt_service",
	"chunk_index": "real_stt_test"
	}
	st.session_state.live_transcriptions.append(stt_result)

	st.success(f"✅ STT Service Response: '{transcription}'")
	else:
	st.error(f"❌ STT Processing failed: {transcription}")

	except Exception as e:
	error_msg = f"STT test failed: {str(e)}"
	logger.error(error_msg)
	st.error(f"❌ {error_msg}")

	async def process_latest_webrtc_capture(self):
	"""Process WebRTC captured audio using unmute.sh patterns"""
	try:
	st.info("🎤 Checking for WebRTC audio chunks...")

	# Create a test audio file to demonstrate STT processing
	# In real implementation, this would get actual captured audio
	test_message = "Testing WebRTC to STT connection with optimized Gradio client"

	# Use persistent client for optimal performance
	client = self.client
	if client is None:
	st.error("❌ Failed to initialize STT client")
	return

	st.info(f"🔗 Connected to STT service, simulating WebRTC audio processing...")

	# For now, demonstrate the connection works
	# TODO: Replace with actual WebRTC audio file processing
	st.success("✅ WebRTC to STT bridge is working - ready for actual audio processing")
	st.info("Next: Capture actual audio file from JavaScript and send to STT")

	except Exception as e:
	error_msg = f"WebRTC processing failed: {str(e)}"
	logger.error(error_msg)
	st.error(f"❌ {error_msg}")

	def process_in_memory_chunks(self):
	"""Process stored audio chunks using gradio_client for in-memory transcription"""
	if not hasattr(st.session_state, 'pending_chunks'):
	st.session_state.pending_chunks = []

	# Check if there are chunks to process
	# This would be populated by a JavaScript->Streamlit bridge
	if hasattr(st.session_state, 'new_audio_chunks'):
	for chunk_data in st.session_state.new_audio_chunks:
	try:
	# Convert base64 to temp file
	import base64
	import tempfile

	audio_binary = base64.b64decode(chunk_data['audioBase64'])

	with tempfile.NamedTemporaryFile(suffix='.webm', delete=False) as tmp_file:
	tmp_file.write(audio_binary)
	temp_path = tmp_file.name

	# Use gradio_client for transcription
	client = self.client
	if client:
	result = client.predict(
	temp_path, # audio file path
	"en", # language
	"base", # model_size_param
	api_name="/gradio_transcribe_memory"
	)

	if result and len(result) > 0:
	transcription = result[0] if isinstance(result, (list, tuple)) else str(result)

	# Add to live transcriptions
	st.session_state.live_transcriptions.append({
	'text': f"🚀 MEMORY: {transcription}",
	'timestamp': datetime.now().isoformat(),
	'source': 'in_memory_stt',
	'processing_time': 0.1, # Estimated
	'chunk_index': chunk_data.get('chunkIndex', 0)
	})

	# Clean up temp file
	import os
	os.unlink(temp_path)

	except Exception as e:
	st.error(f"Error processing in-memory chunk: {e}")

	# Clear processed chunks
	st.session_state.new_audio_chunks = []

	def _process_bridge_data(self, component_value):
	"""Process audio chunks received from JavaScript bridge"""
	try:
	# Handle different types of bridge data
	if isinstance(component_value, dict):
	chunks = None

	# Direct audioChunks
	if 'audioChunks' in component_value:
	chunks = component_value['audioChunks']
	st.success(f"🌉 BRIDGE: Received {len(chunks)} chunks via direct communication!")

	# Polling data format
	elif component_value.get('type') == 'pollingData' and 'audioChunks' in component_value:
	chunks = []
	for polling_data in component_value['audioChunks']:
	if 'audioChunks' in polling_data:
	chunks.extend(polling_data['audioChunks'])
	if chunks:
	st.success(f"🌉 POLLING: Received {len(chunks)} chunks via polling fallback!")

	# Process the chunks
	if chunks:
	for chunk_data in chunks:
	if chunk_data.get('needsProcessing', False):
	# Add to processing queue
	if 'pending_audio_chunks' not in st.session_state:
	st.session_state.pending_audio_chunks = []

	st.session_state.pending_audio_chunks.append(chunk_data)

	# Process immediately
	self._process_single_chunk(chunk_data)

	except Exception as e:
	st.error(f"Bridge processing error: {e}")

	def _process_single_chunk(self, chunk_data):
	"""Process a single audio chunk with gradio_client"""
	try:
	import base64
	import tempfile
	import os

	# Convert base64 to temp file
	audio_binary = base64.b64decode(chunk_data['audioBase64'])

	with tempfile.NamedTemporaryFile(suffix='.webm', delete=False) as tmp_file:
	tmp_file.write(audio_binary)
	temp_path = tmp_file.name

	# Use gradio_client for transcription
	client = self.client
	if client:
	result = client.predict(
	temp_path, # audio file path
	"en", # language
	"base", # model_size_param
	api_name="/gradio_transcribe_memory"
	)

	if result and len(result) > 0:
	transcription = result[0] if isinstance(result, (list, tuple)) else str(result)

	# Clean the transcription result
	if transcription.startswith("✅"):
	transcription = transcription.split(":", 1)[-1].strip()

	# Add to live transcriptions with bridge indicator
	st.session_state.live_transcriptions.append({
	'text': f"🌉 BRIDGE STT: {transcription}",
	'timestamp': datetime.now().isoformat(),
	'source': 'bridge_stt',
	'processing_time': 0.2, # Estimated
	'chunk_index': chunk_data.get('chunkIndex', 0)
	})

	# Show success message
	st.success(f"🌉 Processed chunk {chunk_data.get('chunkIndex', 0)}: {transcription}")

	# Clean up temp file
	os.unlink(temp_path)

	except Exception as e:
	st.error(f"Error processing chunk {chunk_data.get('chunkIndex', 0)}: {e}")

	def _check_javascript_polling_data(self):
	"""Check JavaScript window object for pending audio data (polling fallback)"""
	try:
	# Use JavaScript execution to check for pending data
	polling_script = """
	<script>
	// Check both iframe window and parent window for pending data
	let pendingData = null;
	let dataSource = '';

	// First try iframe window
	if (typeof window.streamlitPendingData !== 'undefined' && window.streamlitPendingData.length > 0) {
	pendingData = window.streamlitPendingData;
	window.streamlitPendingData = []; // Clear after reading
	dataSource = 'iframe window';
	}
	// Then try parent window
	else if (window.parent && typeof window.parent.streamlitPendingData !== 'undefined' && window.parent.streamlitPendingData.length > 0) {
	pendingData = window.parent.streamlitPendingData;
	window.parent.streamlitPendingData = []; // Clear after reading
	dataSource = 'parent window';
	}

	if (pendingData && pendingData.length > 0) {
	console.log('🌉 POLLING: Found', pendingData.length, 'pending data sets in', dataSource);

	// Prepare data for Streamlit
	const streamlitData = {
	type: 'pollingData',
	audioChunks: pendingData,
	timestamp: new Date().toISOString(),
	dataSource: dataSource
	};

	// Send via Streamlit component communication
	if (typeof window.Streamlit !== 'undefined') {
	window.Streamlit.setComponentValue(streamlitData);
	console.log('🌉 POLLING: Sent', pendingData.length, 'chunks via Streamlit API from', dataSource);
	}
	// Fallback: Use postMessage to communicate with parent
	else if (window.parent) {
	window.parent.postMessage({
	type: 'streamlit:setComponentValue',
	value: streamlitData
	}, '*');
	console.log('🌉 POLLING: Sent', pendingData.length, 'chunks via postMessage from', dataSource);
	}
	// Last resort: Store in global for manual retrieval
	else {
	if (!window.streamlitProcessedData) window.streamlitProcessedData = [];
	window.streamlitProcessedData.push(streamlitData);
	console.log('🌉 POLLING: Stored', pendingData.length, 'chunks in global fallback from', dataSource);
	}
	} else {
	console.log('🌉 POLLING: No pending data found in iframe or parent window');
	}
	</script>
	"""

	# Execute the polling script and capture its response
	polling_value = st.components.v1.html(polling_script, height=0)

	# Process polling data if received
	if polling_value:
	st.info(f"🌉 POLLING: Received data from polling script")
	self._process_bridge_data(polling_value)

	except Exception as e:
	pass # Silently fail for polling

	def create_webrtc_interface(self):
	"""Create Streamlit WebRTC interface without FastAPI conflicts"""
	# Process any pending in-memory chunks first
	self.process_in_memory_chunks()

	# Ensure session state is properly initialized
	if 'recording_state' not in st.session_state:
	st.session_state.recording_state = 'stopped'
	if 'audio_buffer' not in st.session_state:
	st.session_state.audio_buffer = []
	if 'transcriptions' not in st.session_state:
	st.session_state.transcriptions = []
	if 'recording_status' not in st.session_state:
	st.session_state.recording_status = "Ready to record"

	st.subheader("🎤 WebRTC Speech-to-Text (Unmute.sh Patterns)")

	# Recording controls
	col1, col2, col3 = st.columns(3)

	with col1:
	if st.button("🎤 Start Recording", key="start_recording_btn", disabled=(st.session_state.recording_state == 'recording')):
	st.session_state.recording_state = 'recording'
	st.session_state.audio_buffer = []
	st.session_state.recording_status = "Recording started - speak now"
	st.success("Recording started!")
	st.rerun()

	with col2:
	# Stop Listening button (only show when recording is active)
	if st.session_state.recording_state == 'recording':
	if st.button("⏹️ Stop Listening", key="stop_listening_btn", type="primary"):
	st.session_state.recording_state = 'stopped'
	st.session_state.recording_status = "Recording stopped - transcriptions complete"
	st.success("Recording stopped!")
	st.rerun()

	# Manual refresh for transcription updates
	if st.button("🔄 Refresh Transcriptions", key="refresh_transcriptions_btn"):
	st.success("Transcriptions refreshed!")
	st.rerun()

	# Process captured audio with real STT service
	if st.button("🎤 Process with STT Service", key="process_stt_btn"):
	# Use MCP voice service to test STT connection
	asyncio.run(self.test_stt_with_voice_file())
	st.rerun()

	# Test transcription button (temporary - to verify transcription display works)
	if st.button("🧪 Test Transcription", key="test_transcription_btn"):
	# Add a test transcription to verify the display is working
	if 'live_transcriptions' not in st.session_state:
	st.session_state.live_transcriptions = []

	test_transcription = {
	"text": "This is a test transcription to verify the display is working",
	"timestamp": datetime.now().isoformat(),
	"source": "test",
	"chunk_index": len(st.session_state.live_transcriptions) + 1
	}
	st.session_state.live_transcriptions.append(test_transcription)
	st.success("Test transcription added!")
	st.rerun()

	with col3:
	if st.button("🧹 Clear Buffer", key="clear_buffer_btn"):
	st.session_state.audio_buffer = []
	st.session_state.transcriptions = []
	st.session_state.recording_state = 'stopped'
	st.success("Buffer cleared!")
	st.rerun()

	# Status display
	if 'recording_status' in st.session_state:
	if st.session_state.recording_state == 'recording':
	st.info(f"🔴 {st.session_state.recording_status}")
	elif st.session_state.recording_state == 'processing':
	st.warning("🔄 Processing audio with unmute.sh flush trick...")
	else:
	st.info(f"✅ {st.session_state.recording_status}")

	# STT Transcription Results Display
	st.subheader("📝 STT Transcription Results")

	# Create columns for better layout
	col1, col2 = st.columns([2, 1])

	with col1:
	# Live transcription window
	if 'live_transcriptions' not in st.session_state:
	st.session_state.live_transcriptions = []

	# Live transcription display (simplified to prevent crashes)
	st.markdown("### 🎤 Live Transcriptions")

	if st.session_state.live_transcriptions:
	# Show transcriptions in reverse order (newest first)
	for i, entry in enumerate(reversed(st.session_state.live_transcriptions[-10:])): # Show last 10
	time_str = datetime.fromisoformat(entry['timestamp']).strftime('%H:%M:%S')

	# Color-coded based on source
	if entry.get('source') == 'stt_service':
	st.success(f"🎤 {time_str}: {entry['text']}")
	elif entry.get('source') == 'webrtc_live':
	st.info(f"🔴 {time_str}: {entry['text']}")
	else:
	st.write(f"📝 {time_str}: {entry['text']}")

	# Show status based on recording state
	if st.session_state.recording_state == 'recording':
	st.markdown("🔴 Recording active - Transcriptions will appear here")
	else:
	st.markdown("✅ Recording complete - Results shown above")

	else:
	if st.session_state.recording_state == 'recording':
	st.info("🎧 Listening for speech... Transcriptions will appear here")
	else:
	st.info("🎧 Press 'Start Recording' to begin transcription")

	with col2:
	# Quick stats and controls
	st.markdown("### 📊 Session Stats")
	if st.session_state.live_transcriptions:
	st.metric("Total Transcriptions", len(st.session_state.live_transcriptions))
	st.metric("Latest Processing Time",
	f"{st.session_state.live_transcriptions[-1].get('processing_time', 0):.1f}s"
	if st.session_state.live_transcriptions else "0s")

	if st.button("🧹 Clear Transcriptions", key="clear_transcriptions_btn"):
	st.session_state.live_transcriptions = []
	st.success("Transcriptions cleared!")
	st.rerun()

	# Detailed transcription history
	if st.session_state.transcriptions:
	with st.expander("📋 Detailed Transcription History", expanded=False):
	for i, entry in enumerate(reversed(st.session_state.transcriptions[-5:])): # Show last 5
	st.markdown(f"#{len(st.session_state.transcriptions) - i}")
	st.write(f"Text: {entry['text']}")
	st.write(f"Time: {datetime.fromisoformat(entry['timestamp']).strftime('%H:%M:%S')}")
	st.write(f"Audio Size: {entry['audio_size']} bytes")
	st.write(f"Chunks: {entry['chunks_processed']}")
	if entry.get('is_final'):
	st.write("✅ Flush Trick Applied")
	st.divider()

	# WebRTC JavaScript integration - Functional Implementation
	st.subheader("🌐 WebRTC Audio Capture")

	# UNMUTE.SH WebRTC Implementation - Continuous recording with voice activity detection
	webrtc_html = f"""
	<div id="webrtc-container">
	<div id="status">Ready to start continuous recording - Following unmute.sh patterns</div>
	<div id="transcriptions">Transcriptions will appear here...</div>
	<button id="initBtn" onclick="initializeContinuousRecording()">🎤 Start Continuous Recording</button>
	</div>

	<script>
	// 🌉 STREAMLIT BRIDGE: Initialize component communication
	console.log('🌉 BRIDGE: Initializing Streamlit bridge...');

	// Check if Streamlit component API is available
	if (typeof window.Streamlit !== 'undefined') {{
	console.log('🌉 BRIDGE: Streamlit API detected, setting up component...');
	window.Streamlit.setComponentReady();
	window.Streamlit.setFrameHeight(200);
	}} else {{
	console.log('🌉 BRIDGE: Streamlit API not found, using fallback methods...');
	// Initialize fallback communication
	window.streamlitPendingData = [];
	}}

	// UNMUTE.SH METHODOLOGY: Continuous recording with voice activity detection
	let mediaRecorder = null;
	let audioStream = null;
	let isInitialized = false;
	let audioChunksBuffer = []; // Buffer following unmute.sh patterns
	let silenceThreshold = 0.01; // Voice activity detection threshold
	let audioContext = null;
	let analyser = null;

	const statusDiv = document.getElementById('status');
	const initBtn = document.getElementById('initBtn');
	const transcriptionsDiv = document.getElementById('transcriptions');

	// UNMUTE.SH: Continuous recording initialization
	async function initializeContinuousRecording() {{
	try {{
	console.log('Initializing continuous recording...');
	addTranscription('Requesting microphone access...', new Date().toISOString());

	// UNMUTE.SH: Exact getUserMedia configuration
	audioStream = await navigator.mediaDevices.getUserMedia({{
	audio: {{ sampleRate: 16000, channelCount: 1 }}
	}});

	console.log('Microphone access granted');
	addTranscription('Microphone access granted - continuous recording active', new Date().toISOString());

	// Set up audio analysis for voice activity detection
	audioContext = new (window.AudioContext \|\| window.webkitAudioContext)();
	analyser = audioContext.createAnalyser();
	const source = audioContext.createMediaStreamSource(audioStream);
	source.connect(analyser);

	// UNMUTE.SH: Exact MediaRecorder configuration with WebM/Opus
	const mimeType = 'audio/webm;codecs=opus';
	if (!MediaRecorder.isTypeSupported(mimeType)) {{
	console.warn('WebM/Opus not supported, falling back to default format');
	mediaRecorder = new MediaRecorder(audioStream);
	}} else {{
	console.log('Using WebM/Opus format for continuous recording');
	mediaRecorder = new MediaRecorder(audioStream, {{
	mimeType: mimeType,
	audioBitsPerSecond: 128000
	}});
	}}

	// UNMUTE.SH: Smart chunk processing - only send if there's voice activity
	mediaRecorder.ondataavailable = function(event) {{
	if (event.data.size > 0) {{
	// Check for voice activity before processing
	if (hasVoiceActivity()) {{
	console.log('Voice detected, processing chunk:', event.data.size, 'bytes');
	const reader = new FileReader();
	reader.onloadend = function() {{
	const base64 = reader.result.split(',')[1];

	// UNMUTE.SH: Immediate processing of voice chunks
	processVoiceChunk({{
	type: 'audio_chunk',
	audio_data: base64,
	sample_rate: 16000,
	timestamp: new Date().toISOString(),
	has_voice: true
	}});
	}};
	reader.readAsDataURL(event.data);
	}} else {{
	console.log('Silence detected, skipping chunk');
	}}
	}}
	}};

	// UNMUTE.SH: Start continuous recording with 1-second chunks
	mediaRecorder.start(1000);
	isInitialized = true;

	initBtn.disabled = true;
	initBtn.textContent = '🎤 Continuous Recording Active';
	statusDiv.textContent = '🎤 Listening continuously - speak naturally';

	console.log('Continuous recording initialized with unmute.sh patterns');

	}} catch (error) {{
	console.error('Error initializing continuous recording:', error);
	addTranscription('Error: Could not access microphone', new Date().toISOString(), true);

	if (error.name === 'NotAllowedError') {{
	statusDiv.textContent = '❌ Microphone access denied. Please allow and refresh.';
	}} else if (error.name === 'NotFoundError') {{
	statusDiv.textContent = '❌ No microphone found. Please check audio devices.';
	}}
	}}
	}}

	// UNMUTE.SH: Voice activity detection
	function hasVoiceActivity() {{
	if (!analyser) return true; // Default to processing if no analyser

	const bufferLength = analyser.frequencyBinCount;
	const dataArray = new Uint8Array(bufferLength);
	analyser.getByteFrequencyData(dataArray);

	// Calculate average amplitude
	let sum = 0;
	for (let i = 0; i < bufferLength; i++) {{
	sum += dataArray[i];
	}}
	const average = sum / bufferLength / 255; // Normalize to 0-1

	return average > silenceThreshold;
	}}

	// UNMUTE.SH: Process voice chunks immediately (their flush trick)
	function processVoiceChunk(chunk) {{
	audioChunksBuffer.push(chunk);

	const chunkCount = audioChunksBuffer.length;
	statusDiv.textContent = `🔴 Processing voice (${{chunkCount}} chunks captured)`;

	addTranscription(`Voice detected - sending to STT service...`, chunk.timestamp);

	// REAL-TIME STT: Send chunk immediately to STT service
	sendChunkToSTT(chunk, chunkCount);
	}}

	// NEW: Send individual audio chunks to STT service in real-time
	async function sendChunkToSTT(chunk, chunkIndex) {{
	try {{
	console.log(`📤 Sending chunk ${{chunkIndex}} to STT service...`);

	// Convert base64 audio data to blob
	const audioBytes = atob(chunk.audio_data);
	const arrayBuffer = new ArrayBuffer(audioBytes.length);
	const uint8Array = new Uint8Array(arrayBuffer);
	for (let i = 0; i < audioBytes.length; i++) {{
	uint8Array[i] = audioBytes.charCodeAt(i);
	}}

	const audioBlob = new Blob([uint8Array], {{ type: 'audio/webm;codecs=opus' }});

	// Create form data for STT service
	const formData = new FormData();
	formData.append('audio_chunk', audioBlob, `chunk_${{chunkIndex}}.webm`);
	formData.append('chunk_index', chunkIndex);
	formData.append('timestamp', chunk.timestamp);
	formData.append('sample_rate', chunk.sample_rate);

	// Store audio chunk data for Streamlit to process
	// (Streamlit doesn't support custom POST endpoints, so we store in window)
	if (!window.audioChunksForSTT) {{
	window.audioChunksForSTT = [];
	}}

	window.audioChunksForSTT.push({{
	audioData: chunk.audio_data,
	chunkIndex: chunkIndex,
	timestamp: chunk.timestamp,
	processed: false
	}});

	console.log(`📦 Stored chunk ${{chunkIndex}} for STT processing`);

	// 🌉 BRIDGE: Send chunk to Streamlit via component communication
	try {{
	// Convert audio blob to base64 for bridge transfer
	const arrayBuffer = await audioBlob.arrayBuffer();
	const audioArray = new Uint8Array(arrayBuffer);
	let binary = '';
	for (let i = 0; i < audioArray.length; i++) {{
	binary += String.fromCharCode(audioArray[i]);
	}}
	const audioBase64 = btoa(binary);

	// Create chunk data for bridge
	const bridgeData = {{
	chunkIndex: chunkIndex,
	audioBase64: audioBase64,
	timestamp: chunk.timestamp,
	sampleRate: chunk.sample_rate,
	needsProcessing: true,
	createdAt: new Date().toISOString()
	}};

	// Send to Streamlit via iframe communication
	const streamlitData = {{
	audioChunks: [bridgeData],
	action: 'processAudio',
	timestamp: new Date().toISOString()
	}};

	// 🌉 STREAMLIT BRIDGE: Try multiple communication methods
	let bridgeSuccess = false;

	if (typeof window.Streamlit !== 'undefined') {{
	try {{
	// Use official Streamlit component communication
	window.Streamlit.setComponentValue(streamlitData);
	console.log(`🌉 BRIDGE: Used Streamlit.setComponentValue for chunk ${{chunkIndex}}`);
	bridgeSuccess = true;
	}} catch (e) {{
	console.warn(`🌉 BRIDGE: Streamlit.setComponentValue failed:`, e);
	}}
	}}

	if (!bridgeSuccess && window.parent && window.parent.postMessage) {{
	// Fallback to postMessage with correct Streamlit format
	window.parent.postMessage({{
	type: 'streamlit:setComponentValue',
	value: streamlitData
	}}, '*');
	console.log(`🌉 BRIDGE: Used postMessage fallback for chunk ${{chunkIndex}}`);

	// Don't mark as success yet - postMessage may not reach Streamlit
	// Let it fall through to polling fallback as well
	console.log(`🌉 BRIDGE: Also storing in polling fallback as backup...`);
	}}

	if (!bridgeSuccess) {{
	// 📂 POLLING FALLBACK: Store in global window for polling
	if (!window.streamlitPendingData) window.streamlitPendingData = [];
	window.streamlitPendingData.push(streamlitData);
	console.log(`🌉 POLLING: Stored chunk ${{chunkIndex}} in window.streamlitPendingData (queue length: ${{window.streamlitPendingData.length}})`);
	bridgeSuccess = true;
	}}

	// Display bridge status
	const transcriptionDiv = document.createElement('div');
	transcriptionDiv.innerHTML = `<strong>${{new Date().toLocaleTimeString()}}:</strong> 🌉 Bridge: Processed chunk ${{chunkIndex}} via Streamlit communication`;
	document.getElementById('transcription-output').appendChild(transcriptionDiv);

	return; // Successfully handled by Streamlit bridge!

	}} catch (bridgeError) {{
	console.error('🌉 BRIDGE: Failed to send to Streamlit:', bridgeError);

	// Show error in UI
	const errorDiv = document.createElement('div');
	errorDiv.innerHTML = `<strong>${{new Date().toLocaleTimeString()}}:</strong> ❌ Bridge Error: ${{bridgeError.message}}`;
	errorDiv.style.color = 'red';
	document.getElementById('transcription-output').appendChild(errorDiv);
	}}

	// ✅ BRIDGE ONLY: All audio processing now goes through Streamlit bridge
	console.log(`✅ Audio chunk ${{chunkIndex}} queued for Streamlit bridge processing`);

	// Show visual confirmation that chunk was queued for bridge
	const queuedDiv = document.createElement('div');
	queuedDiv.innerHTML = `<strong>${{new Date().toLocaleTimeString()}}:</strong> 📤 Queued chunk ${{chunkIndex}} for STT processing`;
	queuedDiv.style.color = 'blue';
	document.getElementById('transcription-output').appendChild(queuedDiv);

	}} catch (error) {{
	console.error(`Error processing chunk ${{chunkIndex}}:`, error);
	addTranscription(`❌ Error processing chunk ${{chunkIndex}}: ${{error.message}}`, new Date().toISOString(), true);
	}}
	}}

	// UNMUTE.SH: Exact transcription display pattern
	function addTranscription(text, timestamp, isError = false) {{
	const item = document.createElement('div');
	item.style.cssText = 'margin: 5px 0; padding: 8px; background: ' + (isError ? '#f8d7da' : 'white') + '; border-radius: 4px; border-left: 4px solid ' + (isError ? '#dc3545' : '#28a745') + ';';

	const time = new Date(timestamp).toLocaleTimeString();
	item.innerHTML = `<strong>${{time}}:</strong> ${{text}}`;

	transcriptionsDiv.appendChild(item);
	transcriptionsDiv.scrollTop = transcriptionsDiv.scrollHeight;
	}}

	// Expose unmute.sh data for Streamlit
	window.getUnmuteAudioChunks = function() {{
	return window.unmuteAudioChunks \|\| [];
	}};

	window.clearUnmuteBuffer = function() {{
	audioChunksBuffer = [];
	window.unmuteAudioChunks = [];
	window.audioProcessingReady = false;
	processBtn.disabled = true;
	statusDiv.textContent = 'Buffer cleared - ready to record';
	transcriptionsDiv.innerHTML = 'Transcriptions will appear here...';
	}};

	// 🌉 INTEGRATED POLLING: Check for queued audio chunks and send to Streamlit
	function checkPendingAudioData() {{
	try {{
	// Check for pending data in window
	if (typeof window.streamlitPendingData !== 'undefined' && window.streamlitPendingData.length > 0) {{
	const pendingData = window.streamlitPendingData;
	window.streamlitPendingData = []; // Clear after reading

	console.log('🌉 INTEGRATED POLLING: Found', pendingData.length, 'pending data sets');

	// Send all pending data to Streamlit
	const combinedData = {{
	type: 'pollingData',
	audioChunks: pendingData,
	timestamp: new Date().toISOString(),
	totalChunks: pendingData.reduce((sum, data) => sum + (data.audioChunks ? data.audioChunks.length : 0), 0)
	}};

	// Send via Streamlit component communication (we have access here!)
	if (typeof window.Streamlit !== 'undefined') {{
	window.Streamlit.setComponentValue(combinedData);
	console.log('🌉 INTEGRATED POLLING: Successfully sent', combinedData.totalChunks, 'chunks to Streamlit backend');

	// Show visual confirmation
	const confirmationDiv = document.createElement('div');
	confirmationDiv.innerHTML = `<strong>${{new Date().toLocaleTimeString()}}:</strong> 📡 BACKEND: Sent ${{combinedData.totalChunks}} queued chunks to STT service`;
	confirmationDiv.style.color = 'green';
	confirmationDiv.style.fontWeight = 'bold';
	document.getElementById('transcription-output').appendChild(confirmationDiv);

	}} else {{
	console.warn('🌉 INTEGRATED POLLING: Streamlit API not available');
	// Put data back if we can't send it
	if (!window.streamlitPendingData) window.streamlitPendingData = [];
	window.streamlitPendingData.push(...pendingData);
	}}
	}}
	}} catch (error) {{
	console.error('🌉 INTEGRATED POLLING: Error checking pending data:', error);
	}}
	}}

	// Start periodic polling every 2 seconds
	setInterval(checkPendingAudioData, 2000);
	console.log('🌉 INTEGRATED POLLING: Started polling for queued audio data every 2 seconds');

	</script>

	<style>
	#webrtc-container {{
	padding: 20px;
	border: 1px solid #ddd;
	border-radius: 8px;
	margin: 10px 0;
	background: #f9f9f9;
	}}
	#status {{
	font-weight: bold;
	margin: 10px 0;
	padding: 10px;
	border-radius: 4px;
	background: white;
	}}
	#audio-chunks {{
	color: #666;
	font-size: 0.9em;
	margin: 10px 0;
	}}
	button {{
	margin: 5px;
	padding: 10px 15px;
	border: none;
	border-radius: 4px;
	cursor: pointer;
	font-size: 14px;
	}}
	button:disabled {{
	opacity: 0.5;
	cursor: not-allowed;
	}}
	#start-webrtc {{
	background: #dc3545;
	color: white;
	}}
	#stop-webrtc {{
	background: #6c757d;
	color: white;
	}}
	</style>
	"""

	# Render the functional WebRTC component with bridge communication
	component_value = st.components.v1.html(webrtc_html, height=200)

	# 🌉 BRIDGE: Check for audio chunks from JavaScript
	if component_value:
	# JavaScript can send data back to Streamlit via return values
	self._process_bridge_data(component_value)

	# 🌉 POLLING: Check for data in JavaScript window object (fallback method)
	self._check_javascript_polling_data()

	# Auto-refresh for real-time processing
	if hasattr(st.session_state, 'pending_audio_chunks') and st.session_state.pending_audio_chunks:
	st.info(f"🌉 Processing {len(st.session_state.pending_audio_chunks)} pending chunks...")
	# Clear after showing status
	st.session_state.pending_audio_chunks = []

	# Add auto-refresh button for bridge testing
	col1, col2 = st.columns([1, 1])
	with col1:
	if st.button("🔄 Check Bridge Queue", key="check_bridge_queue_btn"):
	# Force refresh to check for new chunks
	st.rerun()
	with col2:
	# Show bridge status
	pending_count = len(getattr(st.session_state, 'pending_audio_chunks', []))
	st.metric("🌉 Bridge Queue", f"{pending_count} chunks")

	# Service connectivity and performance test
	st.subheader("🔗 STT Service Performance")

	col1, col2, col3 = st.columns(3)

	with col1:
	if st.button("Test Connection", key="test_connection_btn"):
	try:
	response = requests.get(f"{self.stt_service_url}/", timeout=5)
	if response.status_code == 200:
	st.success("✅ STT Service is reachable")
	else:
	st.error(f"❌ STT Service returned {response.status_code}")
	except Exception as e:
	st.error(f"❌ STT Service connection failed: {e}")

	with col2:
	if st.button("Warm Up Client", key="warm_up_client_btn"):
	try:
	start_time = datetime.now()
	client = self.client
	if client:
	warmup_time = (datetime.now() - start_time).total_seconds()
	st.success(f"✅ Client warmed up in {warmup_time:.2f}s")
	st.session_state.client_warmed = True
	else:
	st.error("❌ Failed to warm up client")
	except Exception as e:
	st.error(f"❌ Client warmup failed: {e}")

	with col3:
	if st.button("Performance Info", key="performance_info_btn"):
	st.info("""
	Optimization Features:
	- 🔗 Persistent client connection
	- ⚡ Async execution with thread pool
	- 📊 Latency monitoring
	- 🔄 Auto client reset on errors
	- 🎯 Optimized for 'base' model speed
	""")

	# Display performance metrics
	if hasattr(st.session_state, 'client_warmed') and st.session_state.client_warmed:
	st.success("🔥 Client is warmed up and ready for optimal performance")

	# Global handler instance
	webrtc_handler = StreamlitWebRTCHandler()

	# Run the WebRTC interface
	webrtc_handler.create_webrtc_interface()