Spaces:
Paused
Paused
| """ | |
| FastAPI integration for WebRTC WebSocket endpoints | |
| Mounts alongside Gradio for real-time audio streaming | |
| """ | |
| from fastapi import FastAPI, WebSocket, WebSocketDisconnect | |
| from fastapi.responses import HTMLResponse | |
| from fastapi.staticfiles import StaticFiles | |
| import json | |
| import logging | |
| import uuid | |
| from .websocket_handler import webrtc_handler | |
| logger = logging.getLogger(__name__) | |
| def create_fastapi_app() -> FastAPI: | |
| """Create FastAPI app with WebRTC WebSocket endpoints""" | |
| app = FastAPI( | |
| title="ChatCal WebRTC API", | |
| description="Real-time audio streaming API for ChatCal Voice", | |
| version="0.4.1" | |
| ) | |
| async def websocket_endpoint(websocket: WebSocket, client_id: str): | |
| """WebRTC WebSocket endpoint for real-time audio streaming""" | |
| try: | |
| await webrtc_handler.connect(websocket, client_id) | |
| while True: | |
| # Receive message from client | |
| try: | |
| message = await websocket.receive_text() | |
| data = json.loads(message) | |
| # Handle message through WebRTC handler | |
| await webrtc_handler.handle_message(client_id, data) | |
| except json.JSONDecodeError: | |
| await webrtc_handler.send_message(client_id, { | |
| "type": "error", | |
| "message": "Invalid JSON message format" | |
| }) | |
| except WebSocketDisconnect: | |
| logger.info(f"Client {client_id} disconnected") | |
| except Exception as e: | |
| logger.error(f"WebSocket error for {client_id}: {e}") | |
| finally: | |
| await webrtc_handler.disconnect(client_id) | |
| async def webrtc_test(): | |
| """Test endpoint to verify WebRTC API is working""" | |
| return { | |
| "status": "ok", | |
| "message": "WebRTC API is running", | |
| "version": "0.4.1", | |
| "endpoints": { | |
| "websocket": "/ws/webrtc/{client_id}", | |
| "test_page": "/webrtc/demo" | |
| } | |
| } | |
| async def webrtc_demo(): | |
| """Serve WebRTC demo page for testing""" | |
| demo_html = """ | |
| <!DOCTYPE html> | |
| <html> | |
| <head> | |
| <title>ChatCal WebRTC Demo</title> | |
| <style> | |
| body { font-family: Arial, sans-serif; margin: 40px; } | |
| .container { max-width: 800px; margin: 0 auto; } | |
| .status { padding: 10px; margin: 10px 0; border-radius: 5px; } | |
| .status.connected { background: #d4edda; border: 1px solid #c3e6cb; color: #155724; } | |
| .status.error { background: #f8d7da; border: 1px solid #f5c6cb; color: #721c24; } | |
| .controls { margin: 20px 0; } | |
| button { padding: 10px 20px; margin: 5px; border: none; border-radius: 5px; cursor: pointer; } | |
| .record-btn { background: #dc3545; color: white; } | |
| .stop-btn { background: #6c757d; color: white; } | |
| .transcriptions { background: #f8f9fa; border: 1px solid #dee2e6; padding: 15px; margin: 10px 0; border-radius: 5px; min-height: 100px; } | |
| .transcription-item { margin: 5px 0; padding: 5px; background: white; border-radius: 3px; } | |
| </style> | |
| </head> | |
| <body> | |
| <div class="container"> | |
| <h1>π€ ChatCal WebRTC Demo</h1> | |
| <div id="status" class="status">Connecting...</div> | |
| <div class="controls"> | |
| <button id="recordBtn" class="record-btn" disabled>π€ Start Recording</button> | |
| <button id="stopBtn" class="stop-btn" disabled>βΉοΈ Stop Recording</button> | |
| </div> | |
| <div id="transcriptions" class="transcriptions"> | |
| <div><em>Transcriptions will appear here...</em></div> | |
| </div> | |
| </div> | |
| <script> | |
| let websocket = null; | |
| let mediaRecorder = null; | |
| let audioStream = null; | |
| let isRecording = false; | |
| const clientId = 'demo-' + Math.random().toString(36).substr(2, 9); | |
| const statusDiv = document.getElementById('status'); | |
| const recordBtn = document.getElementById('recordBtn'); | |
| const stopBtn = document.getElementById('stopBtn'); | |
| const transcriptionsDiv = document.getElementById('transcriptions'); | |
| // Connect to WebSocket | |
| function connect() { | |
| // Use wss:// for HTTPS (Hugging Face Spaces) or ws:// for local development | |
| const protocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:'; | |
| const wsUrl = `${protocol}//${window.location.host}/ws/webrtc/${clientId}`; | |
| console.log('Connecting to WebSocket:', wsUrl); | |
| websocket = new WebSocket(wsUrl); | |
| websocket.onopen = function() { | |
| console.log('WebSocket connected successfully'); | |
| statusDiv.textContent = `Connected (ID: ${clientId})`; | |
| statusDiv.className = 'status connected'; | |
| recordBtn.disabled = false; | |
| }; | |
| websocket.onmessage = function(event) { | |
| console.log('WebSocket message received:', event.data); | |
| try { | |
| const data = JSON.parse(event.data); | |
| handleMessage(data); | |
| } catch (e) { | |
| console.error('Failed to parse WebSocket message:', e); | |
| addTranscription('Error parsing server response', new Date().toISOString(), true); | |
| } | |
| }; | |
| websocket.onclose = function(event) { | |
| console.log('WebSocket closed:', event.code, event.reason); | |
| statusDiv.textContent = `Disconnected (Code: ${event.code})`; | |
| statusDiv.className = 'status error'; | |
| recordBtn.disabled = true; | |
| stopBtn.disabled = true; | |
| }; | |
| websocket.onerror = function(error) { | |
| console.error('WebSocket error:', error); | |
| statusDiv.textContent = 'Connection error - Check console'; | |
| statusDiv.className = 'status error'; | |
| }; | |
| } | |
| function handleMessage(data) { | |
| console.log('Received:', data); | |
| if (data.type === 'transcription') { | |
| addTranscription(data.text, data.timestamp); | |
| // Auto-generate TTS response for demo | |
| if (data.text && data.text.trim()) { | |
| const demoResponse = `I heard you say: "${data.text}". This is a demo TTS response.`; | |
| setTimeout(() => { | |
| requestTTSPlayback(demoResponse); | |
| }, 1000); // Wait 1 second before TTS response | |
| } | |
| } else if (data.type === 'tts_playback') { | |
| playTTSAudio(data.audio_data, data.text); | |
| } else if (data.type === 'tts_error') { | |
| console.error('TTS Error:', data.message); | |
| addTranscription(`TTS Error: ${data.message}`, data.timestamp, true); | |
| } else if (data.type === 'error') { | |
| addTranscription(`Error: ${data.message}`, data.timestamp, true); | |
| } | |
| } | |
| function addTranscription(text, timestamp, isError = false) { | |
| const item = document.createElement('div'); | |
| item.className = 'transcription-item'; | |
| if (isError) item.style.backgroundColor = '#f8d7da'; | |
| const time = new Date(timestamp).toLocaleTimeString(); | |
| item.innerHTML = `<strong>${time}:</strong> ${text}`; | |
| if (transcriptionsDiv.children[0].tagName === 'EM') { | |
| transcriptionsDiv.innerHTML = ''; | |
| } | |
| transcriptionsDiv.appendChild(item); | |
| transcriptionsDiv.scrollTop = transcriptionsDiv.scrollHeight; | |
| } | |
| // Audio recording functions | |
| async function startRecording() { | |
| try { | |
| console.log('Requesting microphone access...'); | |
| addTranscription('Requesting microphone access...', new Date().toISOString()); | |
| audioStream = await navigator.mediaDevices.getUserMedia({ | |
| audio: { sampleRate: 16000, channelCount: 1 } | |
| }); | |
| console.log('Microphone access granted'); | |
| addTranscription('Microphone access granted', new Date().toISOString()); | |
| mediaRecorder = new MediaRecorder(audioStream); | |
| mediaRecorder.ondataavailable = function(event) { | |
| console.log('Audio chunk available, size:', event.data.size); | |
| if (event.data.size > 0 && websocket.readyState === WebSocket.OPEN) { | |
| console.log('Sending audio chunk to server...'); | |
| // Convert blob to base64 and send | |
| const reader = new FileReader(); | |
| reader.onloadend = function() { | |
| const base64 = reader.result.split(',')[1]; | |
| websocket.send(JSON.stringify({ | |
| type: 'audio_chunk', | |
| audio_data: base64, | |
| sample_rate: 16000 | |
| })); | |
| console.log('Audio chunk sent'); | |
| }; | |
| reader.readAsDataURL(event.data); | |
| } else { | |
| if (event.data.size === 0) console.log('Empty audio chunk'); | |
| if (websocket.readyState !== WebSocket.OPEN) console.log('WebSocket not ready'); | |
| } | |
| }; | |
| mediaRecorder.start(1000); // Send chunks every 1 second | |
| isRecording = true; | |
| recordBtn.disabled = true; | |
| stopBtn.disabled = false; | |
| recordBtn.textContent = 'π€ Recording...'; | |
| // Send start recording message | |
| websocket.send(JSON.stringify({ | |
| type: 'start_recording' | |
| })); | |
| } catch (error) { | |
| console.error('Error starting recording:', error); | |
| addTranscription('Error: Could not access microphone', new Date().toISOString(), true); | |
| } | |
| } | |
| function stopRecording() { | |
| if (mediaRecorder && isRecording) { | |
| mediaRecorder.stop(); | |
| audioStream.getTracks().forEach(track => track.stop()); | |
| isRecording = false; | |
| recordBtn.disabled = false; | |
| stopBtn.disabled = true; | |
| recordBtn.textContent = 'π€ Start Recording'; | |
| // Send stop recording message | |
| websocket.send(JSON.stringify({ | |
| type: 'stop_recording' | |
| })); | |
| } | |
| } | |
| function requestTTSPlayback(text, voicePreset = 'v2/en_speaker_6') { | |
| console.log('Requesting TTS playback:', text); | |
| if (websocket && websocket.readyState === WebSocket.OPEN) { | |
| websocket.send(JSON.stringify({ | |
| type: 'tts_request', | |
| text: text, | |
| voice_preset: voicePreset | |
| })); | |
| } else { | |
| console.error('WebSocket not available for TTS request'); | |
| } | |
| } | |
| function playTTSAudio(audioBase64, text) { | |
| console.log('Playing TTS audio for:', text); | |
| try { | |
| // Convert base64 to audio blob | |
| const audioData = atob(audioBase64); | |
| const arrayBuffer = new ArrayBuffer(audioData.length); | |
| const uint8Array = new Uint8Array(arrayBuffer); | |
| for (let i = 0; i < audioData.length; i++) { | |
| uint8Array[i] = audioData.charCodeAt(i); | |
| } | |
| const audioBlob = new Blob([arrayBuffer], { type: 'audio/wav' }); | |
| const audioUrl = URL.createObjectURL(audioBlob); | |
| const audio = new Audio(audioUrl); | |
| audio.onloadeddata = () => { | |
| console.log('TTS audio loaded, playing...'); | |
| addTranscription(`π Playing: ${text}`, new Date().toISOString(), false); | |
| }; | |
| audio.onended = () => { | |
| console.log('TTS audio finished playing'); | |
| URL.revokeObjectURL(audioUrl); // Clean up | |
| }; | |
| audio.onerror = (error) => { | |
| console.error('TTS audio playback error:', error); | |
| addTranscription(`TTS Playback Error: ${error}`, new Date().toISOString(), true); | |
| }; | |
| audio.play().catch(error => { | |
| console.error('Failed to play TTS audio:', error); | |
| addTranscription(`TTS Play Error: User interaction may be required`, new Date().toISOString(), true); | |
| }); | |
| } catch (error) { | |
| console.error('Error processing TTS audio:', error); | |
| addTranscription(`TTS Processing Error: ${error}`, new Date().toISOString(), true); | |
| } | |
| } | |
| // Event listeners | |
| recordBtn.addEventListener('click', startRecording); | |
| stopBtn.addEventListener('click', stopRecording); | |
| // Initialize | |
| connect(); | |
| </script> | |
| </body> | |
| </html> | |
| """ | |
| return HTMLResponse(content=demo_html) | |
| return app |