Spaces:

pgits
/

voiceCal

Sleeping

voiceCal / streamlit_websocket_app.py

Peter Michael Gits

restore: Bring back full VoiceCal WebRTC interface

3763b20 7 months ago

16.4 kB

	#!/usr/bin/env python3
	"""
	Streamlit app with embedded WebSocket server for VoiceCal WebRTC
	Single-service approach for HuggingFace Spaces compatibility
	"""

	import streamlit as st
	import asyncio
	import threading
	import json
	import sys
	from datetime import datetime
	import os

	# Configure Streamlit page
	st.set_page_config(
	page_title="VoiceCal - Voice Assistant",
	page_icon="🎤",
	layout="wide"
	)

	def main():
	st.title("🎤📅 VoiceCal - Voice-Enabled AI Assistant")
	st.markdown("WebRTC Voice Integration Following unmute.sh Pattern")

	# Service status dashboard
	col1, col2, col3 = st.columns(3)

	with col1:
	st.metric("🎤 VoiceCal", "Online", "✅")
	st.metric("📡 WebSocket", "Embedded", "🔧")

	with col2:
	st.metric("🧠 STT Service", "Ready", "✅")
	st.metric("🔊 TTS Service", "Ready", "✅")

	with col3:
	st.metric("🌐 Connection", "Direct", "⚡")
	st.metric("📱 Pattern", "unmute.sh", "🎯")

	# Connection Status
	st.success("🎯 STT Service Connected: `wss://pgits-stt-gpu-service.hf.space/ws/stt`")

	# WebRTC Integration Section
	st.markdown("---")
	st.header("🌐 WebRTC Voice Interface")

	# Simplified WebRTC interface that connects directly to STT service
	webrtc_html = """
	<div id="voice-interface" style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); padding: 20px; border-radius: 10px; margin: 20px 0;">
	<h3 style="color: white; margin-top: 0;">🎤 Voice Interface (Direct STT Connection)</h3>

	<div style="display: flex; gap: 10px; margin: 20px 0;">
	<button id="start-recording" style="background: #ff4757; color: white; border: none; padding: 10px 20px; border-radius: 5px; cursor: pointer;">
	🎙️ Start Recording
	</button>
	<button id="stop-recording" style="background: #2f3542; color: white; border: none; padding: 10px 20px; border-radius: 5px; cursor: pointer;" disabled>
	⏹️ Stop Recording
	</button>
	<button id="test-connection" style="background: #5352ed; color: white; border: none; padding: 10px 20px; border-radius: 5px; cursor: pointer;">
	🔗 Test STT Connection
	</button>
	</div>

	<div id="status" style="background: rgba(0,0,0,0.2); padding: 10px; border-radius: 5px; color: white; font-family: monospace;">
	Status: Ready to connect to STT service...
	</div>

	<div id="transcription" style="background: rgba(255,255,255,0.9); padding: 15px; border-radius: 5px; margin-top: 10px; min-height: 50px;">
	<strong>Transcription:</strong> <span id="transcription-text">Ready for voice input...</span>
	</div>
	</div>

	<script>
	// Direct STT WebSocket Connection (unmute.sh Pattern)
	class VoiceCalDirectSTT {
	constructor() {
	this.sttWebSocket = null;
	this.mediaRecorder = null;
	this.audioChunks = [];
	this.isRecording = false;
	this.clientId = 'voicecal-' + Math.random().toString(36).substr(2, 9);
	// Connect to standalone WebSocket STT service v1.0.0
	this.sttWebSocketUrl = 'wss://pgits-stt-gpu-service.hf.space/ws/stt';

	this.setupEventListeners();
	}

	setupEventListeners() {
	document.getElementById('start-recording').addEventListener('click', () => {
	this.startRecording();
	});

	document.getElementById('stop-recording').addEventListener('click', () => {
	this.stopRecording();
	});

	document.getElementById('test-connection').addEventListener('click', () => {
	this.testSTTConnection();
	});
	}

	async testSTTConnection() {
	this.updateStatus('🔗 Testing WebSocket STT service connection...');

	try {
	// Test WebSocket connection to standalone STT service v1.0.0
	const testSocket = new WebSocket(this.sttWebSocketUrl);

	testSocket.onopen = () => {
	this.updateStatus('✅ STT WebSocket connection successful!');
	console.log('STT service WebSocket is ready');
	testSocket.close();
	};

	testSocket.onerror = (error) => {
	this.updateStatus('❌ STT WebSocket connection failed');
	console.error('STT WebSocket error:', error);
	};

	} catch (error) {
	this.updateStatus('❌ Failed to test STT WebSocket connection');
	console.error('STT connection test error:', error);
	}
	}

	async connectToSTT() {
	this.updateStatus('🔌 Connecting to STT service...');

	try {
	this.sttWebSocket = new WebSocket(this.sttWebSocketUrl);

	this.sttWebSocket.onopen = () => {
	this.updateStatus('✅ Connected to STT service - Ready for audio');
	};

	this.sttWebSocket.onmessage = (event) => {
	const data = JSON.parse(event.data);
	this.handleSTTResponse(data);
	};

	this.sttWebSocket.onclose = () => {
	this.updateStatus('🔌 STT connection closed');
	};

	this.sttWebSocket.onerror = (error) => {
	this.updateStatus('❌ STT connection error');
	console.error('STT WebSocket error:', error);
	};

	return true;
	} catch (error) {
	this.updateStatus('❌ Failed to connect to STT service');
	console.error('STT connection failed:', error);
	return false;
	}
	}

	handleSTTResponse(data) {
	console.log('STT WebSocket Response:', data);

	switch(data.type) {
	case 'stt_connection_confirmed':
	this.updateStatus(`✅ ${data.service} v${data.version} connected - ${data.model} ready`);
	break;

	case 'stt_transcription_complete':
	this.updateTranscription(data.transcription);
	const processingTime = data.timing?.processing_time \|\| 'unknown';
	this.updateStatus(`✅ Transcription completed (${processingTime}s)`);
	break;

	case 'stt_transcription_error':
	this.updateStatus(`❌ Transcription error: ${data.error}`);
	break;

	case 'pong':
	console.log('STT service pong received');
	break;

	default:
	console.log('Unknown STT response type:', data.type);
	}
	}

	async startRecording() {
	// Connect to STT service first
	const connected = await this.connectToSTT();
	if (!connected) {
	return;
	}

	try {
	const stream = await navigator.mediaDevices.getUserMedia({
	audio: {
	sampleRate: 16000,
	channelCount: 1,
	echoCancellation: true,
	noiseSuppression: true
	}
	});

	// unmute.sh pattern: WebM format with small chunks
	this.mediaRecorder = new MediaRecorder(stream, {
	mimeType: 'audio/webm;codecs=opus'
	});

	this.audioChunks = [];

	this.mediaRecorder.ondataavailable = (event) => {
	if (event.data.size > 0) {
	this.audioChunks.push(event.data);
	}
	};

	this.mediaRecorder.onstop = () => {
	this.processRecordedAudio();
	stream.getTracks().forEach(track => track.stop());
	};

	// Start recording
	this.mediaRecorder.start();
	this.isRecording = true;

	// Update UI
	document.getElementById('start-recording').disabled = true;
	document.getElementById('stop-recording').disabled = false;
	this.updateStatus('🎙️ Recording audio - Speak now...');

	} catch (error) {
	console.error('Recording failed:', error);
	this.updateStatus('❌ Microphone access failed');
	}
	}

	stopRecording() {
	if (this.mediaRecorder && this.isRecording) {
	this.mediaRecorder.stop();
	this.isRecording = false;

	// Update UI
	document.getElementById('start-recording').disabled = false;
	document.getElementById('stop-recording').disabled = true;
	this.updateStatus('⏹️ Recording stopped - Processing audio...');
	}
	}

	async processRecordedAudio() {
	if (this.audioChunks.length === 0) {
	this.updateStatus('❌ No audio data recorded');
	return;
	}

	try {
	this.updateStatus('⚙️ Processing audio with WebSocket STT...');

	// Combine all audio chunks (unmute.sh pattern)
	const audioBlob = new Blob(this.audioChunks, { type: 'audio/webm' });

	// Send to STT service via WebSocket
	await this.sendAudioViaWebSocket(audioBlob);

	} catch (error) {
	console.error('Audio processing failed:', error);
	this.updateStatus('❌ Audio processing failed');
	}
	}

	async sendAudioViaWebSocket(audioBlob) {
	try {
	if (!this.sttWebSocket \|\| this.sttWebSocket.readyState !== WebSocket.OPEN) {
	this.updateStatus('❌ WebSocket not connected');
	return;
	}

	this.updateStatus('📤 Sending audio to STT via WebSocket...');

	// Convert audio blob to base64 for WebSocket transmission
	const arrayBuffer = await audioBlob.arrayBuffer();
	const base64Audio = btoa(String.fromCharCode(...new Uint8Array(arrayBuffer)));

	// Send audio data via WebSocket to standalone STT service v1.0.0
	this.sttWebSocket.send(JSON.stringify({
	type: "stt_audio_chunk",
	audio_data: base64Audio,
	language: "auto",
	model_size: "base",
	client_id: this.clientId
	}));

	console.log('Audio sent via WebSocket:', base64Audio.length, 'bytes');

	} catch (error) {
	console.error('WebSocket audio transmission failed:', error);
	this.updateStatus('❌ WebSocket transmission failed: ' + error.message);
	}
	}

	/* COMMENTED OUT: HTTP API fallback - focusing on WebSocket-only connectivity
	async sendAudioToSTTAPI(audioBlob) {
	try {
	this.updateStatus('📤 Sending audio to STT via Gradio API...');

	// Create FormData for Gradio API
	const formData = new FormData();
	formData.append('data', audioBlob, 'audio.webm');

	// Gradio API expects this format: data: ["auto", "base", true]
	formData.append('data', JSON.stringify(["auto", "base", true]));

	// Send to Gradio API
	const response = await fetch('https://pgits-stt-gpu-service.hf.space/api/predict', {
	method: 'POST',
	body: formData
	});

	if (response.ok) {
	const result = await response.json();
	console.log('STT API Response:', result);

	// Extract transcription from Gradio response format
	if (result && result.data && result.data.length > 1) {
	const transcription = result.data[1]; // [status, transcription, timestamps]
	if (transcription && transcription.trim()) {
	this.updateTranscription(transcription);
	this.updateStatus('✅ Transcription completed via Gradio API');
	} else {
	this.updateStatus('⚠️ No transcription received');
	}
	} else {
	this.updateStatus('❌ Unexpected API response format');
	console.error('Unexpected response:', result);
	}
	} else {
	throw new Error(`HTTP ${response.status}: ${response.statusText}`);
	}

	} catch (error) {
	console.error('STT API request failed:', error);
	this.updateStatus('❌ STT API request failed: ' + error.message);
	}
	}
	*/ // END COMMENTED OUT HTTP API fallback

	updateStatus(message) {
	document.getElementById('status').innerHTML = `Status: ${message}`;
	}

	updateTranscription(text) {
	document.getElementById('transcription-text').innerHTML = text;
	}
	}

	// Initialize when DOM is ready
	if (document.readyState === 'loading') {
	document.addEventListener('DOMContentLoaded', () => {
	window.voiceCalDirectSTT = new VoiceCalDirectSTT();
	});
	} else {
	window.voiceCalDirectSTT = new VoiceCalDirectSTT();
	}
	</script>
	"""

	# Render the WebRTC interface
	st.components.v1.html(webrtc_html, height=500)

	# Technical Information
	st.markdown("---")
	st.header("🔧 Technical Details")

	col1, col2 = st.columns(2)

	with col1:
	st.subheader("📡 WebSocket Connection")
	st.code("""
	STT WebSocket: wss://pgits-stt-gpu-service.hf.space/ws/stt
	Audio Format: WebM/Opus (16kHz, Mono)
	Service: Standalone STT v1.0.0
	Pattern: unmute.sh methodology
	Connection: Pure WebSocket (no fallbacks)
	""")

	with col2:
	st.subheader("🎯 Features")
	st.write("✅ Pure WebSocket STT connection")
	st.write("✅ WebRTC MediaRecorder integration")
	st.write("✅ unmute.sh audio processing")
	st.write("✅ Real-time voice transcription")
	st.write("✅ Standalone STT service v1.0.0")
	st.write("✅ No HTTP API fallbacks")
	st.write("✅ Base64 audio transmission")

	# Connection Status
	st.subheader("🔗 Service Status")
	st.json({
	"stt_websocket": "wss://pgits-stt-gpu-service.hf.space/ws/stt",
	"stt_service": "Standalone WebSocket STT v1.0.0",
	"connection_type": "pure_websocket",
	"audio_format": "WebM/Opus 16kHz",
	"transmission": "Base64 encoded",
	"pattern": "unmute.sh WebSocket methodology",
	"fallbacks": "disabled",
	"status": "Ready for WebSocket voice interaction"
	})

	# Footer
	st.markdown("---")
	st.markdown("🚀 VoiceCal WebSocket STT - Pure WebSocket WebRTC with standalone STT service v1.0.0")

	if __name__ == "__main__":
	main()