| #!/usr/bin/env python3 |
| """ |
| Streamlit app with embedded WebSocket server for VoiceCal WebRTC |
| Single-service approach for HuggingFace Spaces compatibility |
| """ |
|
|
| import streamlit as st |
| import asyncio |
| import threading |
| import json |
| import sys |
| from datetime import datetime |
| import os |
|
|
| # Configure Streamlit page |
| st.set_page_config( |
| page_title="VoiceCal - Voice Assistant", |
| page_icon="π€", |
| layout="wide" |
| ) |
|
|
| def main(): |
| st.title("π€π
VoiceCal - Voice-Enabled AI Assistant") |
| st.markdown("**WebRTC Voice Integration Following unmute.sh Pattern**") |
| |
| # Service status dashboard |
| col1, col2, col3 = st.columns(3) |
| |
| with col1: |
| st.metric("π€ VoiceCal", "Online", "β
") |
| st.metric("π‘ WebSocket", "Embedded", "π§") |
| |
| with col2: |
| st.metric("π§ STT Service", "Ready", "β
") |
| st.metric("π TTS Service", "Ready", "β
") |
| |
| with col3: |
| st.metric("π Connection", "Direct", "β‘") |
| st.metric("π± Pattern", "unmute.sh", "π―") |
| |
| # Connection Status |
| st.success("π― **STT Service Connected**: `wss://pgits-stt-gpu-service.hf.space/ws/stt`") |
| |
| # WebRTC Integration Section |
| st.markdown("---") |
| st.header("π WebRTC Voice Interface") |
| |
| # Simplified WebRTC interface that connects directly to STT service |
| webrtc_html = """ |
| <div id="voice-interface" style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); padding: 20px; border-radius: 10px; margin: 20px 0;"> |
| <h3 style="color: white; margin-top: 0;">π€ Voice Interface (Direct STT Connection)</h3> |
| |
| <div style="display: flex; gap: 10px; margin: 20px 0;"> |
| <button id="start-recording" style="background: #ff4757; color: white; border: none; padding: 10px 20px; border-radius: 5px; cursor: pointer;"> |
| ποΈ Start Recording |
| </button> |
| <button id="stop-recording" style="background: #2f3542; color: white; border: none; padding: 10px 20px; border-radius: 5px; cursor: pointer;" disabled> |
| βΉοΈ Stop Recording |
| </button> |
| <button id="test-connection" style="background: #5352ed; color: white; border: none; padding: 10px 20px; border-radius: 5px; cursor: pointer;"> |
| π Test STT Connection |
| </button> |
| </div> |
| |
| <div id="status" style="background: rgba(0,0,0,0.2); padding: 10px; border-radius: 5px; color: white; font-family: monospace;"> |
| Status: Ready to connect to STT service... |
| </div> |
| |
| <div id="transcription" style="background: rgba(255,255,255,0.9); padding: 15px; border-radius: 5px; margin-top: 10px; min-height: 50px;"> |
| <strong>Transcription:</strong> <span id="transcription-text">Ready for voice input...</span> |
| </div> |
| </div> |
|
|
| <script> |
| // Direct STT WebSocket Connection (unmute.sh Pattern) |
| class VoiceCalDirectSTT { |
| constructor() { |
| this.sttWebSocket = null; |
| this.mediaRecorder = null; |
| this.audioChunks = []; |
| this.isRecording = false; |
| this.clientId = 'voicecal-' + Math.random().toString(36).substr(2, 9); |
| // Connect to standalone WebSocket STT service v1.0.0 |
| this.sttWebSocketUrl = 'wss://pgits-stt-gpu-service.hf.space/ws/stt'; |
| |
| this.setupEventListeners(); |
| } |
| |
| setupEventListeners() { |
| document.getElementById('start-recording').addEventListener('click', () => { |
| this.startRecording(); |
| }); |
| |
| document.getElementById('stop-recording').addEventListener('click', () => { |
| this.stopRecording(); |
| }); |
| |
| document.getElementById('test-connection').addEventListener('click', () => { |
| this.testSTTConnection(); |
| }); |
| } |
| |
| async testSTTConnection() { |
| this.updateStatus('π Testing WebSocket STT service connection...'); |
| |
| try { |
| // Test WebSocket connection to standalone STT service v1.0.0 |
| const testSocket = new WebSocket(this.sttWebSocketUrl); |
| |
| testSocket.onopen = () => { |
| this.updateStatus('β
STT WebSocket connection successful!'); |
| console.log('STT service WebSocket is ready'); |
| testSocket.close(); |
| }; |
| |
| testSocket.onerror = (error) => { |
| this.updateStatus('β STT WebSocket connection failed'); |
| console.error('STT WebSocket error:', error); |
| }; |
| |
| } catch (error) { |
| this.updateStatus('β Failed to test STT WebSocket connection'); |
| console.error('STT connection test error:', error); |
| } |
| } |
| |
| async connectToSTT() { |
| this.updateStatus('π Connecting to STT service...'); |
| |
| try { |
| this.sttWebSocket = new WebSocket(this.sttWebSocketUrl); |
| |
| this.sttWebSocket.onopen = () => { |
| this.updateStatus('β
Connected to STT service - Ready for audio'); |
| }; |
| |
| this.sttWebSocket.onmessage = (event) => { |
| const data = JSON.parse(event.data); |
| this.handleSTTResponse(data); |
| }; |
| |
| this.sttWebSocket.onclose = () => { |
| this.updateStatus('π STT connection closed'); |
| }; |
| |
| this.sttWebSocket.onerror = (error) => { |
| this.updateStatus('β STT connection error'); |
| console.error('STT WebSocket error:', error); |
| }; |
| |
| return true; |
| } catch (error) { |
| this.updateStatus('β Failed to connect to STT service'); |
| console.error('STT connection failed:', error); |
| return false; |
| } |
| } |
| |
| handleSTTResponse(data) { |
| console.log('STT WebSocket Response:', data); |
| |
| switch(data.type) { |
| case 'stt_connection_confirmed': |
| this.updateStatus(`β
${data.service} v${data.version} connected - ${data.model} ready`); |
| break; |
| |
| case 'stt_transcription_complete': |
| this.updateTranscription(data.transcription); |
| const processingTime = data.timing?.processing_time || 'unknown'; |
| this.updateStatus(`β
Transcription completed (${processingTime}s)`); |
| break; |
| |
| case 'stt_transcription_error': |
| this.updateStatus(`β Transcription error: ${data.error}`); |
| break; |
| |
| case 'pong': |
| console.log('STT service pong received'); |
| break; |
| |
| default: |
| console.log('Unknown STT response type:', data.type); |
| } |
| } |
| |
| async startRecording() { |
| // Connect to STT service first |
| const connected = await this.connectToSTT(); |
| if (!connected) { |
| return; |
| } |
| |
| try { |
| const stream = await navigator.mediaDevices.getUserMedia({ |
| audio: { |
| sampleRate: 16000, |
| channelCount: 1, |
| echoCancellation: true, |
| noiseSuppression: true |
| } |
| }); |
| |
| // unmute.sh pattern: WebM format with small chunks |
| this.mediaRecorder = new MediaRecorder(stream, { |
| mimeType: 'audio/webm;codecs=opus' |
| }); |
| |
| this.audioChunks = []; |
| |
| this.mediaRecorder.ondataavailable = (event) => { |
| if (event.data.size > 0) { |
| this.audioChunks.push(event.data); |
| } |
| }; |
| |
| this.mediaRecorder.onstop = () => { |
| this.processRecordedAudio(); |
| stream.getTracks().forEach(track => track.stop()); |
| }; |
| |
| // Start recording |
| this.mediaRecorder.start(); |
| this.isRecording = true; |
| |
| // Update UI |
| document.getElementById('start-recording').disabled = true; |
| document.getElementById('stop-recording').disabled = false; |
| this.updateStatus('ποΈ Recording audio - Speak now...'); |
| |
| } catch (error) { |
| console.error('Recording failed:', error); |
| this.updateStatus('β Microphone access failed'); |
| } |
| } |
| |
| stopRecording() { |
| if (this.mediaRecorder && this.isRecording) { |
| this.mediaRecorder.stop(); |
| this.isRecording = false; |
| |
| // Update UI |
| document.getElementById('start-recording').disabled = false; |
| document.getElementById('stop-recording').disabled = true; |
| this.updateStatus('βΉοΈ Recording stopped - Processing audio...'); |
| } |
| } |
| |
| async processRecordedAudio() { |
| if (this.audioChunks.length === 0) { |
| this.updateStatus('β No audio data recorded'); |
| return; |
| } |
| |
| try { |
| this.updateStatus('βοΈ Processing audio with WebSocket STT...'); |
| |
| // Combine all audio chunks (unmute.sh pattern) |
| const audioBlob = new Blob(this.audioChunks, { type: 'audio/webm' }); |
| |
| // Send to STT service via WebSocket |
| await this.sendAudioViaWebSocket(audioBlob); |
| |
| } catch (error) { |
| console.error('Audio processing failed:', error); |
| this.updateStatus('β Audio processing failed'); |
| } |
| } |
| |
| async sendAudioViaWebSocket(audioBlob) { |
| try { |
| if (!this.sttWebSocket || this.sttWebSocket.readyState !== WebSocket.OPEN) { |
| this.updateStatus('β WebSocket not connected'); |
| return; |
| } |
| |
| this.updateStatus('π€ Sending audio to STT via WebSocket...'); |
| |
| // Convert audio blob to base64 for WebSocket transmission |
| const arrayBuffer = await audioBlob.arrayBuffer(); |
| const base64Audio = btoa(String.fromCharCode(...new Uint8Array(arrayBuffer))); |
| |
| // Send audio data via WebSocket to standalone STT service v1.0.0 |
| this.sttWebSocket.send(JSON.stringify({ |
| type: "stt_audio_chunk", |
| audio_data: base64Audio, |
| language: "auto", |
| model_size: "base", |
| client_id: this.clientId |
| })); |
| |
| console.log('Audio sent via WebSocket:', base64Audio.length, 'bytes'); |
| |
| } catch (error) { |
| console.error('WebSocket audio transmission failed:', error); |
| this.updateStatus('β WebSocket transmission failed: ' + error.message); |
| } |
| } |
| |
| /* COMMENTED OUT: HTTP API fallback - focusing on WebSocket-only connectivity |
| async sendAudioToSTTAPI(audioBlob) { |
| try { |
| this.updateStatus('π€ Sending audio to STT via Gradio API...'); |
| |
| // Create FormData for Gradio API |
| const formData = new FormData(); |
| formData.append('data', audioBlob, 'audio.webm'); |
| |
| // Gradio API expects this format: data: ["auto", "base", true] |
| formData.append('data', JSON.stringify(["auto", "base", true])); |
| |
| // Send to Gradio API |
| const response = await fetch('https://pgits-stt-gpu-service.hf.space/api/predict', { |
| method: 'POST', |
| body: formData |
| }); |
| |
| if (response.ok) { |
| const result = await response.json(); |
| console.log('STT API Response:', result); |
| |
| // Extract transcription from Gradio response format |
| if (result && result.data && result.data.length > 1) { |
| const transcription = result.data[1]; // [status, transcription, timestamps] |
| if (transcription && transcription.trim()) { |
| this.updateTranscription(transcription); |
| this.updateStatus('β
Transcription completed via Gradio API'); |
| } else { |
| this.updateStatus('β οΈ No transcription received'); |
| } |
| } else { |
| this.updateStatus('β Unexpected API response format'); |
| console.error('Unexpected response:', result); |
| } |
| } else { |
| throw new Error(`HTTP ${response.status}: ${response.statusText}`); |
| } |
| |
| } catch (error) { |
| console.error('STT API request failed:', error); |
| this.updateStatus('β STT API request failed: ' + error.message); |
| } |
| } |
| */ // END COMMENTED OUT HTTP API fallback |
| |
| updateStatus(message) { |
| document.getElementById('status').innerHTML = `Status: ${message}`; |
| } |
| |
| updateTranscription(text) { |
| document.getElementById('transcription-text').innerHTML = text; |
| } |
| } |
| |
| // Initialize when DOM is ready |
| if (document.readyState === 'loading') { |
| document.addEventListener('DOMContentLoaded', () => { |
| window.voiceCalDirectSTT = new VoiceCalDirectSTT(); |
| }); |
| } else { |
| window.voiceCalDirectSTT = new VoiceCalDirectSTT(); |
| } |
| </script> |
| """ |
| |
| # Render the WebRTC interface |
| st.components.v1.html(webrtc_html, height=500) |
| |
| # Technical Information |
| st.markdown("---") |
| st.header("π§ Technical Details") |
| |
| col1, col2 = st.columns(2) |
| |
| with col1: |
| st.subheader("π‘ WebSocket Connection") |
| st.code(""" |
| STT WebSocket: wss://pgits-stt-gpu-service.hf.space/ws/stt |
| Audio Format: WebM/Opus (16kHz, Mono) |
| Service: Standalone STT v1.0.0 |
| Pattern: unmute.sh methodology |
| Connection: Pure WebSocket (no fallbacks) |
| """) |
| |
| with col2: |
| st.subheader("π― Features") |
| st.write("β
Pure WebSocket STT connection") |
| st.write("β
WebRTC MediaRecorder integration") |
| st.write("β
unmute.sh audio processing") |
| st.write("β
Real-time voice transcription") |
| st.write("β
Standalone STT service v1.0.0") |
| st.write("β
No HTTP API fallbacks") |
| st.write("β
Base64 audio transmission") |
| |
| # Connection Status |
| st.subheader("π Service Status") |
| st.json({ |
| "stt_websocket": "wss://pgits-stt-gpu-service.hf.space/ws/stt", |
| "stt_service": "Standalone WebSocket STT v1.0.0", |
| "connection_type": "pure_websocket", |
| "audio_format": "WebM/Opus 16kHz", |
| "transmission": "Base64 encoded", |
| "pattern": "unmute.sh WebSocket methodology", |
| "fallbacks": "disabled", |
| "status": "Ready for WebSocket voice interaction" |
| }) |
| |
| # Footer |
| st.markdown("---") |
| st.markdown("π **VoiceCal WebSocket STT** - Pure WebSocket WebRTC with standalone STT service v1.0.0") |
|
|
| if __name__ == "__main__": |
| main() |