| |
| """ |
| VoiceCal Streamlit App with WebRTC Integration (unmute.sh pattern) |
| """ |
|
|
| import streamlit as st |
| import sys |
| from datetime import datetime |
| import os |
| import asyncio |
| import json |
|
|
| def main(): |
| st.set_page_config( |
| page_title="VoiceCal - Voice Assistant", |
| page_icon="π€", |
| layout="wide" |
| ) |
| |
| st.title("π€π
VoiceCal - Voice-Enabled AI Assistant") |
| st.markdown("**WebRTC Voice Integration Following unmute.sh Pattern**") |
| |
| |
| col1, col2, col3 = st.columns(3) |
| |
| with col1: |
| st.metric("π€ VoiceCal", "Online", "β
") |
| st.metric("π‘ WebRTC", "Ready", "π") |
| |
| with col2: |
| st.metric("π§ STT Service", "Available", "β
") |
| st.metric("π TTS Service", "Available", "β
") |
| |
| with col3: |
| st.metric("π WebSocket", "Initializing", "β³") |
| st.metric("π± Client", "Pending", "π") |
| |
| |
| st.markdown("---") |
| st.header("π WebRTC Voice Integration") |
| |
| |
| webrtc_html = """ |
| <div id="voice-interface" style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); padding: 20px; border-radius: 10px; margin: 20px 0;"> |
| <h3 style="color: white; margin-top: 0;">π€ Voice Interface (unmute.sh Pattern)</h3> |
| |
| <div style="display: flex; gap: 10px; margin: 20px 0;"> |
| <button id="start-recording" style="background: #ff4757; color: white; border: none; padding: 10px 20px; border-radius: 5px; cursor: pointer;"> |
| ποΈ Start Recording |
| </button> |
| <button id="stop-recording" style="background: #2f3542; color: white; border: none; padding: 10px 20px; border-radius: 5px; cursor: pointer;" disabled> |
| βΉοΈ Stop Recording |
| </button> |
| <button id="test-tts" style="background: #5352ed; color: white; border: none; padding: 10px 20px; border-radius: 5px; cursor: pointer;"> |
| π Test TTS |
| </button> |
| </div> |
| |
| <div id="status" style="background: rgba(0,0,0,0.2); padding: 10px; border-radius: 5px; color: white; font-family: monospace;"> |
| Status: Initializing WebRTC connection... |
| </div> |
| |
| <div id="transcription" style="background: rgba(255,255,255,0.9); padding: 15px; border-radius: 5px; margin-top: 10px; min-height: 50px;"> |
| <strong>Transcription:</strong> <span id="transcription-text">Ready for voice input...</span> |
| </div> |
| |
| <div id="audio-controls" style="margin-top: 15px;"> |
| <audio id="tts-audio" controls style="width: 100%; display: none;"></audio> |
| </div> |
| </div> |
| |
| <script> |
| // WebRTC Implementation following unmute.sh pattern |
| class VoiceCalWebRTC { |
| constructor() { |
| this.websocket = null; |
| this.mediaRecorder = null; |
| this.audioChunks = []; |
| this.isRecording = false; |
| this.clientId = 'demo-' + Math.random().toString(36).substr(2, 9); |
| this.sttWebSocketUrl = 'wss://pgits-stt-gpu-service.hf.space/ws/stt'; |
| // Use same host and port with different endpoint path |
| const wsProtocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:'; |
| const wsHost = window.location.host; // includes port |
| this.voiceCalWebSocketUrl = `${wsProtocol}//${wsHost}/ws/webrtc/${this.clientId}`; |
| |
| this.init(); |
| } |
| |
| async init() { |
| this.updateStatus('π Connecting to WebSocket...'); |
| await this.connectWebSocket(); |
| this.setupEventListeners(); |
| } |
| |
| async connectWebSocket() { |
| try { |
| // Follow unmute.sh pattern: Connect to VoiceCal WebRTC handler |
| this.websocket = new WebSocket(this.voiceCalWebSocketUrl); |
| |
| this.websocket.onopen = () => { |
| this.updateStatus('β
WebSocket connected - Ready for voice interaction'); |
| console.log('WebSocket connected successfully'); |
| }; |
| |
| this.websocket.onmessage = (event) => { |
| const data = JSON.parse(event.data); |
| this.handleWebSocketMessage(data); |
| }; |
| |
| this.websocket.onclose = () => { |
| this.updateStatus('β WebSocket disconnected - Attempting reconnection...'); |
| setTimeout(() => this.connectWebSocket(), 3000); |
| }; |
| |
| this.websocket.onerror = (error) => { |
| console.error('WebSocket error:', error); |
| this.updateStatus('β WebSocket connection error'); |
| }; |
| |
| } catch (error) { |
| console.error('WebSocket connection failed:', error); |
| this.updateStatus('β Failed to connect to WebSocket'); |
| } |
| } |
| |
| handleWebSocketMessage(data) { |
| console.log('Received:', data); |
| |
| switch(data.type) { |
| case 'connection_confirmed': |
| this.updateStatus('β
Connected - Ready for voice commands'); |
| break; |
| |
| case 'transcription': |
| this.updateTranscription(data.text); |
| this.updateStatus('β
Transcription completed'); |
| break; |
| |
| case 'tts_playback': |
| this.playTTSAudio(data.audio_data, data.audio_format); |
| break; |
| |
| case 'recording_started': |
| this.updateStatus('ποΈ Recording in progress...'); |
| break; |
| |
| case 'recording_stopped': |
| this.updateStatus('β³ Processing audio (unmute.sh flush trick)...'); |
| break; |
| |
| case 'chunk_buffered': |
| this.updateStatus(`π¦ Buffering audio chunks (${data.buffer_chunks} chunks)`); |
| break; |
| |
| case 'error': |
| case 'transcription_error': |
| case 'tts_error': |
| this.updateStatus(`β Error: ${data.message}`); |
| break; |
| } |
| } |
| |
| setupEventListeners() { |
| document.getElementById('start-recording').addEventListener('click', () => { |
| this.startRecording(); |
| }); |
| |
| document.getElementById('stop-recording').addEventListener('click', () => { |
| this.stopRecording(); |
| }); |
| |
| document.getElementById('test-tts').addEventListener('click', () => { |
| this.testTTS(); |
| }); |
| } |
| |
| async startRecording() { |
| try { |
| const stream = await navigator.mediaDevices.getUserMedia({ |
| audio: { |
| sampleRate: 16000, |
| channelCount: 1, |
| echoCancellation: true, |
| noiseSuppression: true |
| } |
| }); |
| |
| // unmute.sh pattern: Use MediaRecorder with WebM format |
| this.mediaRecorder = new MediaRecorder(stream, { |
| mimeType: 'audio/webm;codecs=opus' |
| }); |
| |
| this.audioChunks = []; |
| |
| this.mediaRecorder.ondataavailable = (event) => { |
| if (event.data.size > 0) { |
| this.audioChunks.push(event.data); |
| |
| // Real-time streaming: Send chunks as they arrive (unmute.sh pattern) |
| const reader = new FileReader(); |
| reader.onload = () => { |
| const audioData = btoa(String.fromCharCode(...new Uint8Array(reader.result))); |
| this.sendWebSocketMessage({ |
| type: 'audio_chunk', |
| audio_data: audioData, |
| sample_rate: 16000 |
| }); |
| }; |
| reader.readAsArrayBuffer(event.data); |
| } |
| }; |
| |
| this.mediaRecorder.onstop = () => { |
| // unmute.sh flush trick: Signal end of recording |
| this.sendWebSocketMessage({ |
| type: 'stop_recording' |
| }); |
| |
| stream.getTracks().forEach(track => track.stop()); |
| }; |
| |
| // Start recording with small timeslice for real-time streaming |
| this.mediaRecorder.start(250); // 250ms chunks following unmute.sh pattern |
| this.isRecording = true; |
| |
| // Send start recording message |
| this.sendWebSocketMessage({ |
| type: 'start_recording' |
| }); |
| |
| // Update UI |
| document.getElementById('start-recording').disabled = true; |
| document.getElementById('stop-recording').disabled = false; |
| this.updateStatus('ποΈ Recording started - Speak now...'); |
| |
| } catch (error) { |
| console.error('Recording failed:', error); |
| this.updateStatus('β Microphone access failed'); |
| } |
| } |
| |
| stopRecording() { |
| if (this.mediaRecorder && this.isRecording) { |
| this.mediaRecorder.stop(); |
| this.isRecording = false; |
| |
| // Update UI |
| document.getElementById('start-recording').disabled = false; |
| document.getElementById('stop-recording').disabled = true; |
| this.updateStatus('βΉοΈ Recording stopped - Processing...'); |
| } |
| } |
| |
| sendWebSocketMessage(message) { |
| if (this.websocket && this.websocket.readyState === WebSocket.OPEN) { |
| this.websocket.send(JSON.stringify(message)); |
| } |
| } |
| |
| updateStatus(message) { |
| document.getElementById('status').innerHTML = `Status: ${message}`; |
| } |
| |
| updateTranscription(text) { |
| document.getElementById('transcription-text').innerHTML = text; |
| } |
| |
| playTTSAudio(audioData, format) { |
| try { |
| const audioElement = document.getElementById('tts-audio'); |
| const audioBytes = atob(audioData); |
| const audioArray = new Uint8Array(audioBytes.length); |
| |
| for (let i = 0; i < audioBytes.length; i++) { |
| audioArray[i] = audioBytes.charCodeAt(i); |
| } |
| |
| const audioBlob = new Blob([audioArray], { type: `audio/${format}` }); |
| const audioUrl = URL.createObjectURL(audioBlob); |
| |
| audioElement.src = audioUrl; |
| audioElement.style.display = 'block'; |
| audioElement.play(); |
| |
| this.updateStatus('π Playing TTS audio response'); |
| |
| } catch (error) { |
| console.error('TTS playback failed:', error); |
| this.updateStatus('β TTS playback failed'); |
| } |
| } |
| |
| testTTS() { |
| const testText = "Hello! This is a test of the voice synthesis system. VoiceCal is working with WebRTC integration following the unmute.sh pattern."; |
| |
| this.sendWebSocketMessage({ |
| type: 'tts_request', |
| text: testText, |
| voice_preset: 'v2/en_speaker_6' |
| }); |
| |
| this.updateStatus('π Requesting TTS synthesis...'); |
| } |
| } |
| |
| // Initialize when DOM is ready |
| document.addEventListener('DOMContentLoaded', () => { |
| window.voiceCalWebRTC = new VoiceCalWebRTC(); |
| }); |
| |
| // Initialize immediately if DOM is already loaded |
| if (document.readyState === 'loading') { |
| document.addEventListener('DOMContentLoaded', () => { |
| window.voiceCalWebRTC = new VoiceCalWebRTC(); |
| }); |
| } else { |
| window.voiceCalWebRTC = new VoiceCalWebRTC(); |
| } |
| </script> |
| """ |
| |
| |
| st.components.v1.html(webrtc_html, height=600) |
| |
| |
| st.markdown("---") |
| st.header("π§ Technical Details") |
| |
| col1, col2 = st.columns(2) |
| |
| with col1: |
| st.subheader("π‘ WebRTC Configuration") |
| st.code(f""" |
| WebSocket URL: wss://pgits-voicecal.hf.space/ws/webrtc/{{client_id}} |
| STT Endpoint: wss://pgits-stt-gpu-service.hf.space/ws/stt |
| TTS Endpoint: wss://pgits-tts-gpu-service.hf.space/ws/tts |
| Audio Format: WebM/Opus (16kHz, Mono) |
| Chunk Size: 250ms (unmute.sh pattern) |
| """) |
| |
| with col2: |
| st.subheader("π― Features") |
| st.write("β
Real-time audio streaming") |
| st.write("β
WebRTC MediaRecorder integration") |
| st.write("β
unmute.sh pattern implementation") |
| st.write("β
Automatic chunking & buffering") |
| st.write("β
Flush trick for end-of-stream") |
| st.write("β
Bidirectional voice communication") |
| |
| |
| st.subheader("π Service Endpoints") |
| st.json({ |
| "voicecal_websocket": f"wss://pgits-voicecal.hf.space/ws/webrtc/demo-xxxx", |
| "stt_service": "wss://pgits-stt-gpu-service.hf.space/ws/stt", |
| "tts_service": "wss://pgits-tts-gpu-service.hf.space/ws/tts", |
| "pattern": "unmute.sh WebRTC implementation", |
| "status": "Ready for voice interaction" |
| }) |
| |
| |
| st.markdown("---") |
| st.markdown("π **VoiceCal WebRTC Integration** - Following unmute.sh pattern for optimal voice processing") |
|
|
| if __name__ == "__main__": |
| main() |