voiceCal / streamlit_websocket_app.py
Peter Michael Gits
restore: Bring back full VoiceCal WebRTC interface
3763b20
#!/usr/bin/env python3
"""
Streamlit app with embedded WebSocket server for VoiceCal WebRTC
Single-service approach for HuggingFace Spaces compatibility
"""
import streamlit as st
import asyncio
import threading
import json
import sys
from datetime import datetime
import os
# Configure Streamlit page
st.set_page_config(
page_title="VoiceCal - Voice Assistant",
page_icon="🎀",
layout="wide"
)
def main():
st.title("πŸŽ€πŸ“… VoiceCal - Voice-Enabled AI Assistant")
st.markdown("**WebRTC Voice Integration Following unmute.sh Pattern**")
# Service status dashboard
col1, col2, col3 = st.columns(3)
with col1:
st.metric("🎀 VoiceCal", "Online", "βœ…")
st.metric("πŸ“‘ WebSocket", "Embedded", "πŸ”§")
with col2:
st.metric("🧠 STT Service", "Ready", "βœ…")
st.metric("πŸ”Š TTS Service", "Ready", "βœ…")
with col3:
st.metric("🌐 Connection", "Direct", "⚑")
st.metric("πŸ“± Pattern", "unmute.sh", "🎯")
# Connection Status
st.success("🎯 **STT Service Connected**: `wss://pgits-stt-gpu-service.hf.space/ws/stt`")
# WebRTC Integration Section
st.markdown("---")
st.header("🌐 WebRTC Voice Interface")
# Simplified WebRTC interface that connects directly to STT service
webrtc_html = """
<div id="voice-interface" style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); padding: 20px; border-radius: 10px; margin: 20px 0;">
<h3 style="color: white; margin-top: 0;">🎀 Voice Interface (Direct STT Connection)</h3>
<div style="display: flex; gap: 10px; margin: 20px 0;">
<button id="start-recording" style="background: #ff4757; color: white; border: none; padding: 10px 20px; border-radius: 5px; cursor: pointer;">
πŸŽ™οΈ Start Recording
</button>
<button id="stop-recording" style="background: #2f3542; color: white; border: none; padding: 10px 20px; border-radius: 5px; cursor: pointer;" disabled>
⏹️ Stop Recording
</button>
<button id="test-connection" style="background: #5352ed; color: white; border: none; padding: 10px 20px; border-radius: 5px; cursor: pointer;">
πŸ”— Test STT Connection
</button>
</div>
<div id="status" style="background: rgba(0,0,0,0.2); padding: 10px; border-radius: 5px; color: white; font-family: monospace;">
Status: Ready to connect to STT service...
</div>
<div id="transcription" style="background: rgba(255,255,255,0.9); padding: 15px; border-radius: 5px; margin-top: 10px; min-height: 50px;">
<strong>Transcription:</strong> <span id="transcription-text">Ready for voice input...</span>
</div>
</div>
<script>
// Direct STT WebSocket Connection (unmute.sh Pattern)
class VoiceCalDirectSTT {
constructor() {
this.sttWebSocket = null;
this.mediaRecorder = null;
this.audioChunks = [];
this.isRecording = false;
this.clientId = 'voicecal-' + Math.random().toString(36).substr(2, 9);
// Connect to standalone WebSocket STT service v1.0.0
this.sttWebSocketUrl = 'wss://pgits-stt-gpu-service.hf.space/ws/stt';
this.setupEventListeners();
}
setupEventListeners() {
document.getElementById('start-recording').addEventListener('click', () => {
this.startRecording();
});
document.getElementById('stop-recording').addEventListener('click', () => {
this.stopRecording();
});
document.getElementById('test-connection').addEventListener('click', () => {
this.testSTTConnection();
});
}
async testSTTConnection() {
this.updateStatus('πŸ”— Testing WebSocket STT service connection...');
try {
// Test WebSocket connection to standalone STT service v1.0.0
const testSocket = new WebSocket(this.sttWebSocketUrl);
testSocket.onopen = () => {
this.updateStatus('βœ… STT WebSocket connection successful!');
console.log('STT service WebSocket is ready');
testSocket.close();
};
testSocket.onerror = (error) => {
this.updateStatus('❌ STT WebSocket connection failed');
console.error('STT WebSocket error:', error);
};
} catch (error) {
this.updateStatus('❌ Failed to test STT WebSocket connection');
console.error('STT connection test error:', error);
}
}
async connectToSTT() {
this.updateStatus('πŸ”Œ Connecting to STT service...');
try {
this.sttWebSocket = new WebSocket(this.sttWebSocketUrl);
this.sttWebSocket.onopen = () => {
this.updateStatus('βœ… Connected to STT service - Ready for audio');
};
this.sttWebSocket.onmessage = (event) => {
const data = JSON.parse(event.data);
this.handleSTTResponse(data);
};
this.sttWebSocket.onclose = () => {
this.updateStatus('πŸ”Œ STT connection closed');
};
this.sttWebSocket.onerror = (error) => {
this.updateStatus('❌ STT connection error');
console.error('STT WebSocket error:', error);
};
return true;
} catch (error) {
this.updateStatus('❌ Failed to connect to STT service');
console.error('STT connection failed:', error);
return false;
}
}
handleSTTResponse(data) {
console.log('STT WebSocket Response:', data);
switch(data.type) {
case 'stt_connection_confirmed':
this.updateStatus(`βœ… ${data.service} v${data.version} connected - ${data.model} ready`);
break;
case 'stt_transcription_complete':
this.updateTranscription(data.transcription);
const processingTime = data.timing?.processing_time || 'unknown';
this.updateStatus(`βœ… Transcription completed (${processingTime}s)`);
break;
case 'stt_transcription_error':
this.updateStatus(`❌ Transcription error: ${data.error}`);
break;
case 'pong':
console.log('STT service pong received');
break;
default:
console.log('Unknown STT response type:', data.type);
}
}
async startRecording() {
// Connect to STT service first
const connected = await this.connectToSTT();
if (!connected) {
return;
}
try {
const stream = await navigator.mediaDevices.getUserMedia({
audio: {
sampleRate: 16000,
channelCount: 1,
echoCancellation: true,
noiseSuppression: true
}
});
// unmute.sh pattern: WebM format with small chunks
this.mediaRecorder = new MediaRecorder(stream, {
mimeType: 'audio/webm;codecs=opus'
});
this.audioChunks = [];
this.mediaRecorder.ondataavailable = (event) => {
if (event.data.size > 0) {
this.audioChunks.push(event.data);
}
};
this.mediaRecorder.onstop = () => {
this.processRecordedAudio();
stream.getTracks().forEach(track => track.stop());
};
// Start recording
this.mediaRecorder.start();
this.isRecording = true;
// Update UI
document.getElementById('start-recording').disabled = true;
document.getElementById('stop-recording').disabled = false;
this.updateStatus('πŸŽ™οΈ Recording audio - Speak now...');
} catch (error) {
console.error('Recording failed:', error);
this.updateStatus('❌ Microphone access failed');
}
}
stopRecording() {
if (this.mediaRecorder && this.isRecording) {
this.mediaRecorder.stop();
this.isRecording = false;
// Update UI
document.getElementById('start-recording').disabled = false;
document.getElementById('stop-recording').disabled = true;
this.updateStatus('⏹️ Recording stopped - Processing audio...');
}
}
async processRecordedAudio() {
if (this.audioChunks.length === 0) {
this.updateStatus('❌ No audio data recorded');
return;
}
try {
this.updateStatus('βš™οΈ Processing audio with WebSocket STT...');
// Combine all audio chunks (unmute.sh pattern)
const audioBlob = new Blob(this.audioChunks, { type: 'audio/webm' });
// Send to STT service via WebSocket
await this.sendAudioViaWebSocket(audioBlob);
} catch (error) {
console.error('Audio processing failed:', error);
this.updateStatus('❌ Audio processing failed');
}
}
async sendAudioViaWebSocket(audioBlob) {
try {
if (!this.sttWebSocket || this.sttWebSocket.readyState !== WebSocket.OPEN) {
this.updateStatus('❌ WebSocket not connected');
return;
}
this.updateStatus('πŸ“€ Sending audio to STT via WebSocket...');
// Convert audio blob to base64 for WebSocket transmission
const arrayBuffer = await audioBlob.arrayBuffer();
const base64Audio = btoa(String.fromCharCode(...new Uint8Array(arrayBuffer)));
// Send audio data via WebSocket to standalone STT service v1.0.0
this.sttWebSocket.send(JSON.stringify({
type: "stt_audio_chunk",
audio_data: base64Audio,
language: "auto",
model_size: "base",
client_id: this.clientId
}));
console.log('Audio sent via WebSocket:', base64Audio.length, 'bytes');
} catch (error) {
console.error('WebSocket audio transmission failed:', error);
this.updateStatus('❌ WebSocket transmission failed: ' + error.message);
}
}
/* COMMENTED OUT: HTTP API fallback - focusing on WebSocket-only connectivity
async sendAudioToSTTAPI(audioBlob) {
try {
this.updateStatus('πŸ“€ Sending audio to STT via Gradio API...');
// Create FormData for Gradio API
const formData = new FormData();
formData.append('data', audioBlob, 'audio.webm');
// Gradio API expects this format: data: ["auto", "base", true]
formData.append('data', JSON.stringify(["auto", "base", true]));
// Send to Gradio API
const response = await fetch('https://pgits-stt-gpu-service.hf.space/api/predict', {
method: 'POST',
body: formData
});
if (response.ok) {
const result = await response.json();
console.log('STT API Response:', result);
// Extract transcription from Gradio response format
if (result && result.data && result.data.length > 1) {
const transcription = result.data[1]; // [status, transcription, timestamps]
if (transcription && transcription.trim()) {
this.updateTranscription(transcription);
this.updateStatus('βœ… Transcription completed via Gradio API');
} else {
this.updateStatus('⚠️ No transcription received');
}
} else {
this.updateStatus('❌ Unexpected API response format');
console.error('Unexpected response:', result);
}
} else {
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
}
} catch (error) {
console.error('STT API request failed:', error);
this.updateStatus('❌ STT API request failed: ' + error.message);
}
}
*/ // END COMMENTED OUT HTTP API fallback
updateStatus(message) {
document.getElementById('status').innerHTML = `Status: ${message}`;
}
updateTranscription(text) {
document.getElementById('transcription-text').innerHTML = text;
}
}
// Initialize when DOM is ready
if (document.readyState === 'loading') {
document.addEventListener('DOMContentLoaded', () => {
window.voiceCalDirectSTT = new VoiceCalDirectSTT();
});
} else {
window.voiceCalDirectSTT = new VoiceCalDirectSTT();
}
</script>
"""
# Render the WebRTC interface
st.components.v1.html(webrtc_html, height=500)
# Technical Information
st.markdown("---")
st.header("πŸ”§ Technical Details")
col1, col2 = st.columns(2)
with col1:
st.subheader("πŸ“‘ WebSocket Connection")
st.code("""
STT WebSocket: wss://pgits-stt-gpu-service.hf.space/ws/stt
Audio Format: WebM/Opus (16kHz, Mono)
Service: Standalone STT v1.0.0
Pattern: unmute.sh methodology
Connection: Pure WebSocket (no fallbacks)
""")
with col2:
st.subheader("🎯 Features")
st.write("βœ… Pure WebSocket STT connection")
st.write("βœ… WebRTC MediaRecorder integration")
st.write("βœ… unmute.sh audio processing")
st.write("βœ… Real-time voice transcription")
st.write("βœ… Standalone STT service v1.0.0")
st.write("βœ… No HTTP API fallbacks")
st.write("βœ… Base64 audio transmission")
# Connection Status
st.subheader("πŸ”— Service Status")
st.json({
"stt_websocket": "wss://pgits-stt-gpu-service.hf.space/ws/stt",
"stt_service": "Standalone WebSocket STT v1.0.0",
"connection_type": "pure_websocket",
"audio_format": "WebM/Opus 16kHz",
"transmission": "Base64 encoded",
"pattern": "unmute.sh WebSocket methodology",
"fallbacks": "disabled",
"status": "Ready for WebSocket voice interaction"
})
# Footer
st.markdown("---")
st.markdown("πŸš€ **VoiceCal WebSocket STT** - Pure WebSocket WebRTC with standalone STT service v1.0.0")
if __name__ == "__main__":
main()