| """ |
| FastAPI integration for WebRTC WebSocket endpoints |
| Mounts alongside Gradio for real-time audio streaming |
| """ |
|
|
| from fastapi import FastAPI, WebSocket, WebSocketDisconnect |
| from fastapi.responses import HTMLResponse |
| from fastapi.staticfiles import StaticFiles |
| import json |
| import logging |
| import uuid |
| from .websocket_handler import webrtc_handler |
|
|
| logger = logging.getLogger(__name__) |
|
|
| def create_fastapi_app() -> FastAPI: |
| """Create FastAPI app with WebRTC WebSocket endpoints""" |
| |
| app = FastAPI( |
| title="ChatCal WebRTC API", |
| description="Real-time audio streaming API for ChatCal Voice", |
| version="0.4.1" |
| ) |
| |
| @app.websocket("/ws/webrtc/{client_id}") |
| async def websocket_endpoint(websocket: WebSocket, client_id: str): |
| """WebRTC WebSocket endpoint for real-time audio streaming""" |
| try: |
| await webrtc_handler.connect(websocket, client_id) |
| |
| while True: |
| |
| try: |
| message = await websocket.receive_text() |
| data = json.loads(message) |
| |
| |
| await webrtc_handler.handle_message(client_id, data) |
| |
| except json.JSONDecodeError: |
| await webrtc_handler.send_message(client_id, { |
| "type": "error", |
| "message": "Invalid JSON message format" |
| }) |
| |
| except WebSocketDisconnect: |
| logger.info(f"Client {client_id} disconnected") |
| except Exception as e: |
| logger.error(f"WebSocket error for {client_id}: {e}") |
| finally: |
| await webrtc_handler.disconnect(client_id) |
| |
| @app.get("/webrtc/test") |
| async def webrtc_test(): |
| """Test endpoint to verify WebRTC API is working""" |
| return { |
| "status": "ok", |
| "message": "WebRTC API is running", |
| "version": "0.4.1", |
| "endpoints": { |
| "websocket": "/ws/webrtc/{client_id}", |
| "test_page": "/webrtc/demo" |
| } |
| } |
| |
| @app.get("/webrtc/demo") |
| async def webrtc_demo(): |
| """Serve WebRTC demo page for testing""" |
| demo_html = """ |
| <!DOCTYPE html> |
| <html> |
| <head> |
| <title>ChatCal WebRTC Demo</title> |
| <style> |
| body { font-family: Arial, sans-serif; margin: 40px; } |
| .container { max-width: 800px; margin: 0 auto; } |
| .status { padding: 10px; margin: 10px 0; border-radius: 5px; } |
| .status.connected { background: #d4edda; border: 1px solid #c3e6cb; color: #155724; } |
| .status.error { background: #f8d7da; border: 1px solid #f5c6cb; color: #721c24; } |
| .controls { margin: 20px 0; } |
| button { padding: 10px 20px; margin: 5px; border: none; border-radius: 5px; cursor: pointer; } |
| .record-btn { background: #dc3545; color: white; } |
| .stop-btn { background: #6c757d; color: white; } |
| .transcriptions { background: #f8f9fa; border: 1px solid #dee2e6; padding: 15px; margin: 10px 0; border-radius: 5px; min-height: 100px; } |
| .transcription-item { margin: 5px 0; padding: 5px; background: white; border-radius: 3px; } |
| </style> |
| </head> |
| <body> |
| <div class="container"> |
| <h1>π€ ChatCal WebRTC Demo</h1> |
| <div id="status" class="status">Connecting...</div> |
| |
| <div class="controls"> |
| <button id="recordBtn" class="record-btn" disabled>π€ Start Recording</button> |
| <button id="stopBtn" class="stop-btn" disabled>βΉοΈ Stop Recording</button> |
| </div> |
| |
| <div id="transcriptions" class="transcriptions"> |
| <div><em>Transcriptions will appear here...</em></div> |
| </div> |
| </div> |
| |
| <script> |
| let websocket = null; |
| let mediaRecorder = null; |
| let audioStream = null; |
| let isRecording = false; |
| |
| const clientId = 'demo-' + Math.random().toString(36).substr(2, 9); |
| const statusDiv = document.getElementById('status'); |
| const recordBtn = document.getElementById('recordBtn'); |
| const stopBtn = document.getElementById('stopBtn'); |
| const transcriptionsDiv = document.getElementById('transcriptions'); |
| |
| // Connect to WebSocket |
| function connect() { |
| // Use wss:// for HTTPS (Hugging Face Spaces) or ws:// for local development |
| const protocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:'; |
| const wsUrl = `${protocol}//${window.location.host}/ws/webrtc/${clientId}`; |
| console.log('Connecting to WebSocket:', wsUrl); |
| websocket = new WebSocket(wsUrl); |
| |
| websocket.onopen = function() { |
| console.log('WebSocket connected successfully'); |
| statusDiv.textContent = `Connected (ID: ${clientId})`; |
| statusDiv.className = 'status connected'; |
| recordBtn.disabled = false; |
| }; |
| |
| websocket.onmessage = function(event) { |
| console.log('WebSocket message received:', event.data); |
| try { |
| const data = JSON.parse(event.data); |
| handleMessage(data); |
| } catch (e) { |
| console.error('Failed to parse WebSocket message:', e); |
| addTranscription('Error parsing server response', new Date().toISOString(), true); |
| } |
| }; |
| |
| websocket.onclose = function(event) { |
| console.log('WebSocket closed:', event.code, event.reason); |
| statusDiv.textContent = `Disconnected (Code: ${event.code})`; |
| statusDiv.className = 'status error'; |
| recordBtn.disabled = true; |
| stopBtn.disabled = true; |
| }; |
| |
| websocket.onerror = function(error) { |
| console.error('WebSocket error:', error); |
| statusDiv.textContent = 'Connection error - Check console'; |
| statusDiv.className = 'status error'; |
| }; |
| } |
| |
| function handleMessage(data) { |
| console.log('Received:', data); |
| |
| if (data.type === 'transcription') { |
| addTranscription(data.text, data.timestamp); |
| |
| // Demo TTS response disabled due to poor transcription accuracy |
| // Only first 3 words were being transcribed correctly |
| // if (data.text && data.text.trim()) { |
| // const demoResponse = `I heard you say: "${data.text}". This is a demo TTS response.`; |
| // setTimeout(() => { |
| // requestTTSPlayback(demoResponse); |
| // }, 1000); // Wait 1 second before TTS response |
| // } |
| } else if (data.type === 'tts_playback') { |
| playTTSAudio(data.audio_data, data.text); |
| } else if (data.type === 'tts_error') { |
| console.error('TTS Error:', data.message); |
| addTranscription(`TTS Error: ${data.message}`, data.timestamp, true); |
| } else if (data.type === 'error') { |
| addTranscription(`Error: ${data.message}`, data.timestamp, true); |
| } |
| } |
| |
| function addTranscription(text, timestamp, isError = false) { |
| const item = document.createElement('div'); |
| item.className = 'transcription-item'; |
| if (isError) item.style.backgroundColor = '#f8d7da'; |
| |
| const time = new Date(timestamp).toLocaleTimeString(); |
| item.innerHTML = `<strong>${time}:</strong> ${text}`; |
| |
| if (transcriptionsDiv.children[0].tagName === 'EM') { |
| transcriptionsDiv.innerHTML = ''; |
| } |
| transcriptionsDiv.appendChild(item); |
| transcriptionsDiv.scrollTop = transcriptionsDiv.scrollHeight; |
| } |
| |
| // Audio recording functions |
| async function startRecording() { |
| try { |
| console.log('Requesting microphone access...'); |
| addTranscription('Requesting microphone access...', new Date().toISOString()); |
| |
| audioStream = await navigator.mediaDevices.getUserMedia({ |
| audio: { sampleRate: 16000, channelCount: 1 } |
| }); |
| |
| console.log('Microphone access granted'); |
| addTranscription('Microphone access granted', new Date().toISOString()); |
| |
| // Configure MediaRecorder with consistent WebM/Opus format (unmute.sh methodology) |
| const mimeType = 'audio/webm;codecs=opus'; |
| if (!MediaRecorder.isTypeSupported(mimeType)) { |
| console.warn('WebM/Opus not supported, falling back to default format'); |
| mediaRecorder = new MediaRecorder(audioStream); |
| } else { |
| console.log('Using WebM/Opus format for consistent encoding'); |
| mediaRecorder = new MediaRecorder(audioStream, { |
| mimeType: mimeType, |
| audioBitsPerSecond: 128000 // 128 kbps for good quality |
| }); |
| } |
| |
| mediaRecorder.ondataavailable = function(event) { |
| console.log('Audio chunk available, size:', event.data.size); |
| if (event.data.size > 0 && websocket.readyState === WebSocket.OPEN) { |
| console.log('Sending audio chunk to server...'); |
| // Convert blob to base64 and send |
| const reader = new FileReader(); |
| reader.onloadend = function() { |
| const base64 = reader.result.split(',')[1]; |
| websocket.send(JSON.stringify({ |
| type: 'audio_chunk', |
| audio_data: base64, |
| sample_rate: 16000 |
| })); |
| console.log('Audio chunk sent'); |
| }; |
| reader.readAsDataURL(event.data); |
| } else { |
| if (event.data.size === 0) console.log('Empty audio chunk'); |
| if (websocket.readyState !== WebSocket.OPEN) console.log('WebSocket not ready'); |
| } |
| }; |
| |
| mediaRecorder.start(1000); // Send chunks every 1 second |
| isRecording = true; |
| |
| recordBtn.disabled = true; |
| stopBtn.disabled = false; |
| recordBtn.textContent = 'π€ Recording...'; |
| |
| // Send start recording message |
| websocket.send(JSON.stringify({ |
| type: 'start_recording' |
| })); |
| |
| } catch (error) { |
| console.error('Error starting recording:', error); |
| addTranscription('Error: Could not access microphone', new Date().toISOString(), true); |
| } |
| } |
| |
| function stopRecording() { |
| if (mediaRecorder && isRecording) { |
| mediaRecorder.stop(); |
| audioStream.getTracks().forEach(track => track.stop()); |
| isRecording = false; |
| |
| recordBtn.disabled = false; |
| stopBtn.disabled = true; |
| recordBtn.textContent = 'π€ Start Recording'; |
| |
| // Send stop recording message |
| websocket.send(JSON.stringify({ |
| type: 'stop_recording' |
| })); |
| } |
| } |
| |
| function requestTTSPlayback(text, voicePreset = 'v2/en_speaker_6') { |
| console.log('Requesting TTS playback:', text); |
| if (websocket && websocket.readyState === WebSocket.OPEN) { |
| websocket.send(JSON.stringify({ |
| type: 'tts_request', |
| text: text, |
| voice_preset: voicePreset |
| })); |
| } else { |
| console.error('WebSocket not available for TTS request'); |
| } |
| } |
| |
| function playTTSAudio(audioBase64, text) { |
| console.log('Playing TTS audio for:', text); |
| try { |
| // Convert base64 to audio blob |
| const audioData = atob(audioBase64); |
| const arrayBuffer = new ArrayBuffer(audioData.length); |
| const uint8Array = new Uint8Array(arrayBuffer); |
| |
| for (let i = 0; i < audioData.length; i++) { |
| uint8Array[i] = audioData.charCodeAt(i); |
| } |
| |
| const audioBlob = new Blob([arrayBuffer], { type: 'audio/wav' }); |
| const audioUrl = URL.createObjectURL(audioBlob); |
| |
| const audio = new Audio(audioUrl); |
| audio.onloadeddata = () => { |
| console.log('TTS audio loaded, playing...'); |
| addTranscription(`π Playing: ${text}`, new Date().toISOString(), false); |
| }; |
| |
| audio.onended = () => { |
| console.log('TTS audio finished playing'); |
| URL.revokeObjectURL(audioUrl); // Clean up |
| }; |
| |
| audio.onerror = (error) => { |
| console.error('TTS audio playback error:', error); |
| addTranscription(`TTS Playback Error: ${error}`, new Date().toISOString(), true); |
| }; |
| |
| audio.play().catch(error => { |
| console.error('Failed to play TTS audio:', error); |
| addTranscription(`TTS Play Error: User interaction may be required`, new Date().toISOString(), true); |
| }); |
| |
| } catch (error) { |
| console.error('Error processing TTS audio:', error); |
| addTranscription(`TTS Processing Error: ${error}`, new Date().toISOString(), true); |
| } |
| } |
| |
| // Event listeners |
| recordBtn.addEventListener('click', startRecording); |
| stopBtn.addEventListener('click', stopRecording); |
| |
| // Initialize |
| connect(); |
| </script> |
| </body> |
| </html> |
| """ |
| return HTMLResponse(content=demo_html) |
| |
| return app |