Spaces:

pgits
/

voiceCal

Sleeping

voiceCal / streamlit_app.py

Peter Michael Gits

feat: Implement WebRTC integration following unmute.sh pattern

af83599 7 months ago

14.5 kB

	#!/usr/bin/env python3
	"""
	VoiceCal Streamlit App with WebRTC Integration (unmute.sh pattern)
	"""

	import streamlit as st
	import sys
	from datetime import datetime
	import os
	import asyncio
	import json

	def main():
	st.set_page_config(
	page_title="VoiceCal - Voice Assistant",
	page_icon="🎤",
	layout="wide"
	)

	st.title("🎤📅 VoiceCal - Voice-Enabled AI Assistant")
	st.markdown("WebRTC Voice Integration Following unmute.sh Pattern")

	# Service status dashboard
	col1, col2, col3 = st.columns(3)

	with col1:
	st.metric("🎤 VoiceCal", "Online", "✅")
	st.metric("📡 WebRTC", "Ready", "🔄")

	with col2:
	st.metric("🧠 STT Service", "Available", "✅")
	st.metric("🔊 TTS Service", "Available", "✅")

	with col3:
	st.metric("🌐 WebSocket", "Initializing", "⏳")
	st.metric("📱 Client", "Pending", "🔌")

	# WebRTC Integration Section
	st.markdown("---")
	st.header("🌐 WebRTC Voice Integration")

	# JavaScript for WebRTC implementation following unmute.sh pattern
	webrtc_html = """
	<div id="voice-interface" style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); padding: 20px; border-radius: 10px; margin: 20px 0;">
	<h3 style="color: white; margin-top: 0;">🎤 Voice Interface (unmute.sh Pattern)</h3>

	<div style="display: flex; gap: 10px; margin: 20px 0;">
	<button id="start-recording" style="background: #ff4757; color: white; border: none; padding: 10px 20px; border-radius: 5px; cursor: pointer;">
	🎙️ Start Recording
	</button>
	<button id="stop-recording" style="background: #2f3542; color: white; border: none; padding: 10px 20px; border-radius: 5px; cursor: pointer;" disabled>
	⏹️ Stop Recording
	</button>
	<button id="test-tts" style="background: #5352ed; color: white; border: none; padding: 10px 20px; border-radius: 5px; cursor: pointer;">
	🔊 Test TTS
	</button>
	</div>

	<div id="status" style="background: rgba(0,0,0,0.2); padding: 10px; border-radius: 5px; color: white; font-family: monospace;">
	Status: Initializing WebRTC connection...
	</div>

	<div id="transcription" style="background: rgba(255,255,255,0.9); padding: 15px; border-radius: 5px; margin-top: 10px; min-height: 50px;">
	<strong>Transcription:</strong> <span id="transcription-text">Ready for voice input...</span>
	</div>

	<div id="audio-controls" style="margin-top: 15px;">
	<audio id="tts-audio" controls style="width: 100%; display: none;"></audio>
	</div>
	</div>

	<script>
	// WebRTC Implementation following unmute.sh pattern
	class VoiceCalWebRTC {
	constructor() {
	this.websocket = null;
	this.mediaRecorder = null;
	this.audioChunks = [];
	this.isRecording = false;
	this.clientId = 'demo-' + Math.random().toString(36).substr(2, 9);
	this.sttWebSocketUrl = 'wss://pgits-stt-gpu-service.hf.space/ws/stt';
	// Use same host and port with different endpoint path
	const wsProtocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:';
	const wsHost = window.location.host; // includes port
	this.voiceCalWebSocketUrl = `${wsProtocol}//${wsHost}/ws/webrtc/${this.clientId}`;

	this.init();
	}

	async init() {
	this.updateStatus('🔌 Connecting to WebSocket...');
	await this.connectWebSocket();
	this.setupEventListeners();
	}

	async connectWebSocket() {
	try {
	// Follow unmute.sh pattern: Connect to VoiceCal WebRTC handler
	this.websocket = new WebSocket(this.voiceCalWebSocketUrl);

	this.websocket.onopen = () => {
	this.updateStatus('✅ WebSocket connected - Ready for voice interaction');
	console.log('WebSocket connected successfully');
	};

	this.websocket.onmessage = (event) => {
	const data = JSON.parse(event.data);
	this.handleWebSocketMessage(data);
	};

	this.websocket.onclose = () => {
	this.updateStatus('❌ WebSocket disconnected - Attempting reconnection...');
	setTimeout(() => this.connectWebSocket(), 3000);
	};

	this.websocket.onerror = (error) => {
	console.error('WebSocket error:', error);
	this.updateStatus('❌ WebSocket connection error');
	};

	} catch (error) {
	console.error('WebSocket connection failed:', error);
	this.updateStatus('❌ Failed to connect to WebSocket');
	}
	}

	handleWebSocketMessage(data) {
	console.log('Received:', data);

	switch(data.type) {
	case 'connection_confirmed':
	this.updateStatus('✅ Connected - Ready for voice commands');
	break;

	case 'transcription':
	this.updateTranscription(data.text);
	this.updateStatus('✅ Transcription completed');
	break;

	case 'tts_playback':
	this.playTTSAudio(data.audio_data, data.audio_format);
	break;

	case 'recording_started':
	this.updateStatus('🎙️ Recording in progress...');
	break;

	case 'recording_stopped':
	this.updateStatus('⏳ Processing audio (unmute.sh flush trick)...');
	break;

	case 'chunk_buffered':
	this.updateStatus(`📦 Buffering audio chunks (${data.buffer_chunks} chunks)`);
	break;

	case 'error':
	case 'transcription_error':
	case 'tts_error':
	this.updateStatus(`❌ Error: ${data.message}`);
	break;
	}
	}

	setupEventListeners() {
	document.getElementById('start-recording').addEventListener('click', () => {
	this.startRecording();
	});

	document.getElementById('stop-recording').addEventListener('click', () => {
	this.stopRecording();
	});

	document.getElementById('test-tts').addEventListener('click', () => {
	this.testTTS();
	});
	}

	async startRecording() {
	try {
	const stream = await navigator.mediaDevices.getUserMedia({
	audio: {
	sampleRate: 16000,
	channelCount: 1,
	echoCancellation: true,
	noiseSuppression: true
	}
	});

	// unmute.sh pattern: Use MediaRecorder with WebM format
	this.mediaRecorder = new MediaRecorder(stream, {
	mimeType: 'audio/webm;codecs=opus'
	});

	this.audioChunks = [];

	this.mediaRecorder.ondataavailable = (event) => {
	if (event.data.size > 0) {
	this.audioChunks.push(event.data);

	// Real-time streaming: Send chunks as they arrive (unmute.sh pattern)
	const reader = new FileReader();
	reader.onload = () => {
	const audioData = btoa(String.fromCharCode(...new Uint8Array(reader.result)));
	this.sendWebSocketMessage({
	type: 'audio_chunk',
	audio_data: audioData,
	sample_rate: 16000
	});
	};
	reader.readAsArrayBuffer(event.data);
	}
	};

	this.mediaRecorder.onstop = () => {
	// unmute.sh flush trick: Signal end of recording
	this.sendWebSocketMessage({
	type: 'stop_recording'
	});

	stream.getTracks().forEach(track => track.stop());
	};

	// Start recording with small timeslice for real-time streaming
	this.mediaRecorder.start(250); // 250ms chunks following unmute.sh pattern
	this.isRecording = true;

	// Send start recording message
	this.sendWebSocketMessage({
	type: 'start_recording'
	});

	// Update UI
	document.getElementById('start-recording').disabled = true;
	document.getElementById('stop-recording').disabled = false;
	this.updateStatus('🎙️ Recording started - Speak now...');

	} catch (error) {
	console.error('Recording failed:', error);
	this.updateStatus('❌ Microphone access failed');
	}
	}

	stopRecording() {
	if (this.mediaRecorder && this.isRecording) {
	this.mediaRecorder.stop();
	this.isRecording = false;

	// Update UI
	document.getElementById('start-recording').disabled = false;
	document.getElementById('stop-recording').disabled = true;
	this.updateStatus('⏹️ Recording stopped - Processing...');
	}
	}

	sendWebSocketMessage(message) {
	if (this.websocket && this.websocket.readyState === WebSocket.OPEN) {
	this.websocket.send(JSON.stringify(message));
	}
	}

	updateStatus(message) {
	document.getElementById('status').innerHTML = `Status: ${message}`;
	}

	updateTranscription(text) {
	document.getElementById('transcription-text').innerHTML = text;
	}

	playTTSAudio(audioData, format) {
	try {
	const audioElement = document.getElementById('tts-audio');
	const audioBytes = atob(audioData);
	const audioArray = new Uint8Array(audioBytes.length);

	for (let i = 0; i < audioBytes.length; i++) {
	audioArray[i] = audioBytes.charCodeAt(i);
	}

	const audioBlob = new Blob([audioArray], { type: `audio/${format}` });
	const audioUrl = URL.createObjectURL(audioBlob);

	audioElement.src = audioUrl;
	audioElement.style.display = 'block';
	audioElement.play();

	this.updateStatus('🔊 Playing TTS audio response');

	} catch (error) {
	console.error('TTS playback failed:', error);
	this.updateStatus('❌ TTS playback failed');
	}
	}

	testTTS() {
	const testText = "Hello! This is a test of the voice synthesis system. VoiceCal is working with WebRTC integration following the unmute.sh pattern.";

	this.sendWebSocketMessage({
	type: 'tts_request',
	text: testText,
	voice_preset: 'v2/en_speaker_6'
	});

	this.updateStatus('🔊 Requesting TTS synthesis...');
	}
	}

	// Initialize when DOM is ready
	document.addEventListener('DOMContentLoaded', () => {
	window.voiceCalWebRTC = new VoiceCalWebRTC();
	});

	// Initialize immediately if DOM is already loaded
	if (document.readyState === 'loading') {
	document.addEventListener('DOMContentLoaded', () => {
	window.voiceCalWebRTC = new VoiceCalWebRTC();
	});
	} else {
	window.voiceCalWebRTC = new VoiceCalWebRTC();
	}
	</script>
	"""

	# Render the WebRTC interface
	st.components.v1.html(webrtc_html, height=600)

	# Technical Information
	st.markdown("---")
	st.header("🔧 Technical Details")

	col1, col2 = st.columns(2)

	with col1:
	st.subheader("📡 WebRTC Configuration")
	st.code(f"""
	WebSocket URL: wss://pgits-voicecal.hf.space/ws/webrtc/{{client_id}}
	STT Endpoint: wss://pgits-stt-gpu-service.hf.space/ws/stt
	TTS Endpoint: wss://pgits-tts-gpu-service.hf.space/ws/tts
	Audio Format: WebM/Opus (16kHz, Mono)
	Chunk Size: 250ms (unmute.sh pattern)
	""")

	with col2:
	st.subheader("🎯 Features")
	st.write("✅ Real-time audio streaming")
	st.write("✅ WebRTC MediaRecorder integration")
	st.write("✅ unmute.sh pattern implementation")
	st.write("✅ Automatic chunking & buffering")
	st.write("✅ Flush trick for end-of-stream")
	st.write("✅ Bidirectional voice communication")

	# Connection Status
	st.subheader("🔗 Service Endpoints")
	st.json({
	"voicecal_websocket": f"wss://pgits-voicecal.hf.space/ws/webrtc/demo-xxxx",
	"stt_service": "wss://pgits-stt-gpu-service.hf.space/ws/stt",
	"tts_service": "wss://pgits-tts-gpu-service.hf.space/ws/tts",
	"pattern": "unmute.sh WebRTC implementation",
	"status": "Ready for voice interaction"
	})

	# Footer
	st.markdown("---")
	st.markdown("🚀 VoiceCal WebRTC Integration - Following unmute.sh pattern for optimal voice processing")

	if __name__ == "__main__":
	main()