voiceCalendar / webrtc /server /fastapi_integration.py
Peter Michael Gits
feat: Add Streamlit-native WebRTC speech-to-text using unmute.sh patterns
21fac9b
"""
FastAPI integration for WebRTC WebSocket endpoints
Mounts alongside Gradio for real-time audio streaming
"""
from fastapi import FastAPI, WebSocket, WebSocketDisconnect
from fastapi.responses import HTMLResponse
from fastapi.staticfiles import StaticFiles
import json
import logging
import uuid
from .websocket_handler import webrtc_handler
logger = logging.getLogger(__name__)
def create_fastapi_app() -> FastAPI:
"""Create FastAPI app with WebRTC WebSocket endpoints"""
app = FastAPI(
title="ChatCal WebRTC API",
description="Real-time audio streaming API for ChatCal Voice",
version="0.4.1"
)
@app.websocket("/ws/webrtc/{client_id}")
async def websocket_endpoint(websocket: WebSocket, client_id: str):
"""WebRTC WebSocket endpoint for real-time audio streaming"""
try:
await webrtc_handler.connect(websocket, client_id)
while True:
# Receive message from client
try:
message = await websocket.receive_text()
data = json.loads(message)
# Handle message through WebRTC handler
await webrtc_handler.handle_message(client_id, data)
except json.JSONDecodeError:
await webrtc_handler.send_message(client_id, {
"type": "error",
"message": "Invalid JSON message format"
})
except WebSocketDisconnect:
logger.info(f"Client {client_id} disconnected")
except Exception as e:
logger.error(f"WebSocket error for {client_id}: {e}")
finally:
await webrtc_handler.disconnect(client_id)
@app.get("/webrtc/test")
async def webrtc_test():
"""Test endpoint to verify WebRTC API is working"""
return {
"status": "ok",
"message": "WebRTC API is running",
"version": "0.4.1",
"endpoints": {
"websocket": "/ws/webrtc/{client_id}",
"test_page": "/webrtc/demo"
}
}
@app.get("/webrtc/demo")
async def webrtc_demo():
"""Serve WebRTC demo page for testing"""
demo_html = """
<!DOCTYPE html>
<html>
<head>
<title>ChatCal WebRTC Demo</title>
<style>
body { font-family: Arial, sans-serif; margin: 40px; }
.container { max-width: 800px; margin: 0 auto; }
.status { padding: 10px; margin: 10px 0; border-radius: 5px; }
.status.connected { background: #d4edda; border: 1px solid #c3e6cb; color: #155724; }
.status.error { background: #f8d7da; border: 1px solid #f5c6cb; color: #721c24; }
.controls { margin: 20px 0; }
button { padding: 10px 20px; margin: 5px; border: none; border-radius: 5px; cursor: pointer; }
.record-btn { background: #dc3545; color: white; }
.stop-btn { background: #6c757d; color: white; }
.transcriptions { background: #f8f9fa; border: 1px solid #dee2e6; padding: 15px; margin: 10px 0; border-radius: 5px; min-height: 100px; }
.transcription-item { margin: 5px 0; padding: 5px; background: white; border-radius: 3px; }
</style>
</head>
<body>
<div class="container">
<h1>🎀 ChatCal WebRTC Demo</h1>
<div id="status" class="status">Connecting...</div>
<div class="controls">
<button id="recordBtn" class="record-btn" disabled>🎀 Start Recording</button>
<button id="stopBtn" class="stop-btn" disabled>⏹️ Stop Recording</button>
</div>
<div id="transcriptions" class="transcriptions">
<div><em>Transcriptions will appear here...</em></div>
</div>
</div>
<script>
let websocket = null;
let mediaRecorder = null;
let audioStream = null;
let isRecording = false;
const clientId = 'demo-' + Math.random().toString(36).substr(2, 9);
const statusDiv = document.getElementById('status');
const recordBtn = document.getElementById('recordBtn');
const stopBtn = document.getElementById('stopBtn');
const transcriptionsDiv = document.getElementById('transcriptions');
// Connect to WebSocket
function connect() {
// Use wss:// for HTTPS (Hugging Face Spaces) or ws:// for local development
const protocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:';
const wsUrl = `${protocol}//${window.location.host}/ws/webrtc/${clientId}`;
console.log('Connecting to WebSocket:', wsUrl);
websocket = new WebSocket(wsUrl);
websocket.onopen = function() {
console.log('WebSocket connected successfully');
statusDiv.textContent = `Connected (ID: ${clientId})`;
statusDiv.className = 'status connected';
recordBtn.disabled = false;
};
websocket.onmessage = function(event) {
console.log('WebSocket message received:', event.data);
try {
const data = JSON.parse(event.data);
handleMessage(data);
} catch (e) {
console.error('Failed to parse WebSocket message:', e);
addTranscription('Error parsing server response', new Date().toISOString(), true);
}
};
websocket.onclose = function(event) {
console.log('WebSocket closed:', event.code, event.reason);
statusDiv.textContent = `Disconnected (Code: ${event.code})`;
statusDiv.className = 'status error';
recordBtn.disabled = true;
stopBtn.disabled = true;
};
websocket.onerror = function(error) {
console.error('WebSocket error:', error);
statusDiv.textContent = 'Connection error - Check console';
statusDiv.className = 'status error';
};
}
function handleMessage(data) {
console.log('Received:', data);
if (data.type === 'transcription') {
addTranscription(data.text, data.timestamp);
// Demo TTS response disabled due to poor transcription accuracy
// Only first 3 words were being transcribed correctly
// if (data.text && data.text.trim()) {
// const demoResponse = `I heard you say: "${data.text}". This is a demo TTS response.`;
// setTimeout(() => {
// requestTTSPlayback(demoResponse);
// }, 1000); // Wait 1 second before TTS response
// }
} else if (data.type === 'tts_playback') {
playTTSAudio(data.audio_data, data.text);
} else if (data.type === 'tts_error') {
console.error('TTS Error:', data.message);
addTranscription(`TTS Error: ${data.message}`, data.timestamp, true);
} else if (data.type === 'error') {
addTranscription(`Error: ${data.message}`, data.timestamp, true);
}
}
function addTranscription(text, timestamp, isError = false) {
const item = document.createElement('div');
item.className = 'transcription-item';
if (isError) item.style.backgroundColor = '#f8d7da';
const time = new Date(timestamp).toLocaleTimeString();
item.innerHTML = `<strong>${time}:</strong> ${text}`;
if (transcriptionsDiv.children[0].tagName === 'EM') {
transcriptionsDiv.innerHTML = '';
}
transcriptionsDiv.appendChild(item);
transcriptionsDiv.scrollTop = transcriptionsDiv.scrollHeight;
}
// Audio recording functions
async function startRecording() {
try {
console.log('Requesting microphone access...');
addTranscription('Requesting microphone access...', new Date().toISOString());
audioStream = await navigator.mediaDevices.getUserMedia({
audio: { sampleRate: 16000, channelCount: 1 }
});
console.log('Microphone access granted');
addTranscription('Microphone access granted', new Date().toISOString());
// Configure MediaRecorder with consistent WebM/Opus format (unmute.sh methodology)
const mimeType = 'audio/webm;codecs=opus';
if (!MediaRecorder.isTypeSupported(mimeType)) {
console.warn('WebM/Opus not supported, falling back to default format');
mediaRecorder = new MediaRecorder(audioStream);
} else {
console.log('Using WebM/Opus format for consistent encoding');
mediaRecorder = new MediaRecorder(audioStream, {
mimeType: mimeType,
audioBitsPerSecond: 128000 // 128 kbps for good quality
});
}
mediaRecorder.ondataavailable = function(event) {
console.log('Audio chunk available, size:', event.data.size);
if (event.data.size > 0 && websocket.readyState === WebSocket.OPEN) {
console.log('Sending audio chunk to server...');
// Convert blob to base64 and send
const reader = new FileReader();
reader.onloadend = function() {
const base64 = reader.result.split(',')[1];
websocket.send(JSON.stringify({
type: 'audio_chunk',
audio_data: base64,
sample_rate: 16000
}));
console.log('Audio chunk sent');
};
reader.readAsDataURL(event.data);
} else {
if (event.data.size === 0) console.log('Empty audio chunk');
if (websocket.readyState !== WebSocket.OPEN) console.log('WebSocket not ready');
}
};
mediaRecorder.start(1000); // Send chunks every 1 second
isRecording = true;
recordBtn.disabled = true;
stopBtn.disabled = false;
recordBtn.textContent = '🎀 Recording...';
// Send start recording message
websocket.send(JSON.stringify({
type: 'start_recording'
}));
} catch (error) {
console.error('Error starting recording:', error);
addTranscription('Error: Could not access microphone', new Date().toISOString(), true);
}
}
function stopRecording() {
if (mediaRecorder && isRecording) {
mediaRecorder.stop();
audioStream.getTracks().forEach(track => track.stop());
isRecording = false;
recordBtn.disabled = false;
stopBtn.disabled = true;
recordBtn.textContent = '🎀 Start Recording';
// Send stop recording message
websocket.send(JSON.stringify({
type: 'stop_recording'
}));
}
}
function requestTTSPlayback(text, voicePreset = 'v2/en_speaker_6') {
console.log('Requesting TTS playback:', text);
if (websocket && websocket.readyState === WebSocket.OPEN) {
websocket.send(JSON.stringify({
type: 'tts_request',
text: text,
voice_preset: voicePreset
}));
} else {
console.error('WebSocket not available for TTS request');
}
}
function playTTSAudio(audioBase64, text) {
console.log('Playing TTS audio for:', text);
try {
// Convert base64 to audio blob
const audioData = atob(audioBase64);
const arrayBuffer = new ArrayBuffer(audioData.length);
const uint8Array = new Uint8Array(arrayBuffer);
for (let i = 0; i < audioData.length; i++) {
uint8Array[i] = audioData.charCodeAt(i);
}
const audioBlob = new Blob([arrayBuffer], { type: 'audio/wav' });
const audioUrl = URL.createObjectURL(audioBlob);
const audio = new Audio(audioUrl);
audio.onloadeddata = () => {
console.log('TTS audio loaded, playing...');
addTranscription(`πŸ”Š Playing: ${text}`, new Date().toISOString(), false);
};
audio.onended = () => {
console.log('TTS audio finished playing');
URL.revokeObjectURL(audioUrl); // Clean up
};
audio.onerror = (error) => {
console.error('TTS audio playback error:', error);
addTranscription(`TTS Playback Error: ${error}`, new Date().toISOString(), true);
};
audio.play().catch(error => {
console.error('Failed to play TTS audio:', error);
addTranscription(`TTS Play Error: User interaction may be required`, new Date().toISOString(), true);
});
} catch (error) {
console.error('Error processing TTS audio:', error);
addTranscription(`TTS Processing Error: ${error}`, new Date().toISOString(), true);
}
}
// Event listeners
recordBtn.addEventListener('click', startRecording);
stopBtn.addEventListener('click', stopRecording);
// Initialize
connect();
</script>
</body>
</html>
"""
return HTMLResponse(content=demo_html)
return app