Spaces:
Paused
Paused
Peter Michael Gits Claude commited on
Commit Β·
65f90da
1
Parent(s): 787212f
feat: WebRTC-first implementation with demo audio processing v0.4.4
Browse files- Comment out failing HTTP STT calls (preserve for future)
- Enable WebRTC FastAPI app as primary deployment
- Implement WebRTC audio processing with characteristics detection
- Remove WSGI middleware complexity - direct FastAPI deployment
- Create functional WebSocket endpoints for real-time audio
- Add audio duration and sample rate analysis in WebRTC demo
- Fallback to Gradio-only if WebRTC fails
WebRTC Endpoints Available:
- /ws/webrtc/{client_id} - WebSocket for audio streaming
- /webrtc/demo - Interactive demo with microphone
- /webrtc/test - API status check
Note: HTTP STT services preserved in comments for later integration
π€ Generated with [Claude Code](https://claude.ai/code)
Co-Authored-By: Claude <noreply@anthropic.com>
- app.py +18 -172
- core/mcp_audio_handler.py +12 -9
- version.py +2 -2
- webrtc/server/websocket_handler.py +16 -16
app.py
CHANGED
|
@@ -20,8 +20,8 @@ from core.mcp_audio_handler import MCPAudioHandler
|
|
| 20 |
from core.config import config
|
| 21 |
from version import get_version_info
|
| 22 |
|
| 23 |
-
# WebRTC imports -
|
| 24 |
-
|
| 25 |
|
| 26 |
class ChatCalVoiceApp:
|
| 27 |
"""Main application class for voice-enabled ChatCal."""
|
|
@@ -368,190 +368,36 @@ app = ChatCalVoiceApp()
|
|
| 368 |
# Create and launch the interface
|
| 369 |
if __name__ == "__main__":
|
| 370 |
import uvicorn
|
| 371 |
-
from fastapi import FastAPI
|
| 372 |
-
from fastapi.middleware.wsgi import WSGIMiddleware
|
| 373 |
|
| 374 |
try:
|
| 375 |
-
# Create WebRTC FastAPI app
|
| 376 |
-
|
| 377 |
|
| 378 |
-
# Create Gradio interface
|
| 379 |
demo = app.create_interface()
|
| 380 |
|
| 381 |
-
#
|
| 382 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 383 |
|
| 384 |
-
#
|
| 385 |
-
|
| 386 |
-
# main_app.mount("/ws", webrtc_app) # Mount WebSocket routes
|
| 387 |
-
|
| 388 |
-
# Add the specific WebRTC routes to main app
|
| 389 |
-
@main_app.websocket("/ws/webrtc/{client_id}")
|
| 390 |
-
async def websocket_endpoint(websocket, client_id: str):
|
| 391 |
-
from webrtc.server.websocket_handler import webrtc_handler
|
| 392 |
-
try:
|
| 393 |
-
await webrtc_handler.connect(websocket, client_id)
|
| 394 |
-
while True:
|
| 395 |
-
try:
|
| 396 |
-
message = await websocket.receive_text()
|
| 397 |
-
import json
|
| 398 |
-
data = json.loads(message)
|
| 399 |
-
await webrtc_handler.handle_message(client_id, data)
|
| 400 |
-
except json.JSONDecodeError:
|
| 401 |
-
await webrtc_handler.send_message(client_id, {
|
| 402 |
-
"type": "error",
|
| 403 |
-
"message": "Invalid JSON message format"
|
| 404 |
-
})
|
| 405 |
-
except Exception as e:
|
| 406 |
-
print(f"WebSocket error for {client_id}: {e}")
|
| 407 |
-
finally:
|
| 408 |
-
await webrtc_handler.disconnect(client_id)
|
| 409 |
-
|
| 410 |
-
@main_app.get("/webrtc/test")
|
| 411 |
-
async def webrtc_test():
|
| 412 |
-
return {
|
| 413 |
-
"status": "ok",
|
| 414 |
-
"message": "WebRTC API is running",
|
| 415 |
-
"version": "0.4.1",
|
| 416 |
-
"endpoints": {
|
| 417 |
-
"websocket": "/ws/webrtc/{client_id}",
|
| 418 |
-
"test_page": "/webrtc/demo"
|
| 419 |
-
}
|
| 420 |
-
}
|
| 421 |
-
|
| 422 |
-
@main_app.get("/webrtc/demo")
|
| 423 |
-
async def webrtc_demo():
|
| 424 |
-
from fastapi.responses import HTMLResponse
|
| 425 |
-
# Return the demo HTML from the webrtc_app
|
| 426 |
-
return HTMLResponse(content="""
|
| 427 |
-
<!DOCTYPE html>
|
| 428 |
-
<html>
|
| 429 |
-
<head>
|
| 430 |
-
<title>ChatCal WebRTC Demo</title>
|
| 431 |
-
<style>
|
| 432 |
-
body { font-family: Arial, sans-serif; margin: 40px; }
|
| 433 |
-
.container { max-width: 800px; margin: 0 auto; }
|
| 434 |
-
.status { padding: 10px; margin: 10px 0; border-radius: 5px; }
|
| 435 |
-
.status.connected { background: #d4edda; border: 1px solid #c3e6cb; color: #155724; }
|
| 436 |
-
.status.error { background: #f8d7da; border: 1px solid #f5c6cb; color: #721c24; }
|
| 437 |
-
.controls { margin: 20px 0; }
|
| 438 |
-
button { padding: 10px 20px; margin: 5px; border: none; border-radius: 5px; cursor: pointer; }
|
| 439 |
-
.record-btn { background: #dc3545; color: white; }
|
| 440 |
-
.stop-btn { background: #6c757d; color: white; }
|
| 441 |
-
.transcriptions { background: #f8f9fa; border: 1px solid #dee2e6; padding: 15px; margin: 10px 0; border-radius: 5px; min-height: 100px; }
|
| 442 |
-
.transcription-item { margin: 5px 0; padding: 5px; background: white; border-radius: 3px; }
|
| 443 |
-
</style>
|
| 444 |
-
</head>
|
| 445 |
-
<body>
|
| 446 |
-
<div class="container">
|
| 447 |
-
<h1>π€ ChatCal WebRTC Demo</h1>
|
| 448 |
-
<div id="status" class="status">Connecting...</div>
|
| 449 |
-
|
| 450 |
-
<div class="controls">
|
| 451 |
-
<button id="recordBtn" class="record-btn" disabled>π€ Start Recording</button>
|
| 452 |
-
<button id="stopBtn" class="stop-btn" disabled>βΉοΈ Stop Recording</button>
|
| 453 |
-
</div>
|
| 454 |
-
|
| 455 |
-
<div id="transcriptions" class="transcriptions">
|
| 456 |
-
<div><em>Transcriptions will appear here...</em></div>
|
| 457 |
-
</div>
|
| 458 |
-
|
| 459 |
-
<p><a href="/">β Back to ChatCal Main Interface</a></p>
|
| 460 |
-
</div>
|
| 461 |
-
|
| 462 |
-
<script>
|
| 463 |
-
let websocket = null;
|
| 464 |
-
let mediaRecorder = null;
|
| 465 |
-
let audioStream = null;
|
| 466 |
-
let isRecording = false;
|
| 467 |
-
|
| 468 |
-
const clientId = 'demo-' + Math.random().toString(36).substr(2, 9);
|
| 469 |
-
const statusDiv = document.getElementById('status');
|
| 470 |
-
const recordBtn = document.getElementById('recordBtn');
|
| 471 |
-
const stopBtn = document.getElementById('stopBtn');
|
| 472 |
-
const transcriptionsDiv = document.getElementById('transcriptions');
|
| 473 |
-
|
| 474 |
-
// Connect to WebSocket
|
| 475 |
-
function connect() {
|
| 476 |
-
const wsUrl = `ws://${window.location.host}/ws/webrtc/${clientId}`;
|
| 477 |
-
websocket = new WebSocket(wsUrl);
|
| 478 |
-
|
| 479 |
-
websocket.onopen = function() {
|
| 480 |
-
statusDiv.textContent = `Connected (ID: ${clientId})`;
|
| 481 |
-
statusDiv.className = 'status connected';
|
| 482 |
-
recordBtn.disabled = false;
|
| 483 |
-
};
|
| 484 |
-
|
| 485 |
-
websocket.onmessage = function(event) {
|
| 486 |
-
const data = JSON.parse(event.data);
|
| 487 |
-
handleMessage(data);
|
| 488 |
-
};
|
| 489 |
-
|
| 490 |
-
websocket.onclose = function() {
|
| 491 |
-
statusDiv.textContent = 'Disconnected';
|
| 492 |
-
statusDiv.className = 'status error';
|
| 493 |
-
recordBtn.disabled = true;
|
| 494 |
-
stopBtn.disabled = true;
|
| 495 |
-
};
|
| 496 |
-
|
| 497 |
-
websocket.onerror = function(error) {
|
| 498 |
-
statusDiv.textContent = 'Connection error';
|
| 499 |
-
statusDiv.className = 'status error';
|
| 500 |
-
console.error('WebSocket error:', error);
|
| 501 |
-
};
|
| 502 |
-
}
|
| 503 |
-
|
| 504 |
-
function handleMessage(data) {
|
| 505 |
-
console.log('Received:', data);
|
| 506 |
-
|
| 507 |
-
if (data.type === 'transcription') {
|
| 508 |
-
addTranscription(data.text, data.timestamp);
|
| 509 |
-
} else if (data.type === 'error') {
|
| 510 |
-
addTranscription(`Error: ${data.message}`, data.timestamp, true);
|
| 511 |
-
}
|
| 512 |
-
}
|
| 513 |
-
|
| 514 |
-
function addTranscription(text, timestamp, isError = false) {
|
| 515 |
-
const item = document.createElement('div');
|
| 516 |
-
item.className = 'transcription-item';
|
| 517 |
-
if (isError) item.style.backgroundColor = '#f8d7da';
|
| 518 |
-
|
| 519 |
-
const time = new Date(timestamp).toLocaleTimeString();
|
| 520 |
-
item.innerHTML = `<strong>${time}:</strong> ${text}`;
|
| 521 |
-
|
| 522 |
-
if (transcriptionsDiv.children[0].tagName === 'EM') {
|
| 523 |
-
transcriptionsDiv.innerHTML = '';
|
| 524 |
-
}
|
| 525 |
-
transcriptionsDiv.appendChild(item);
|
| 526 |
-
transcriptionsDiv.scrollTop = transcriptionsDiv.scrollHeight;
|
| 527 |
-
}
|
| 528 |
-
|
| 529 |
-
// Initialize
|
| 530 |
-
connect();
|
| 531 |
-
|
| 532 |
-
recordBtn.addEventListener('click', function() {
|
| 533 |
-
addTranscription('WebRTC demo functionality coming soon!', new Date().toISOString());
|
| 534 |
-
});
|
| 535 |
-
</script>
|
| 536 |
-
</body>
|
| 537 |
-
</html>
|
| 538 |
-
""")
|
| 539 |
-
|
| 540 |
-
# TEMPORARY: Skip FastAPI mounting due to WSGI middleware issues
|
| 541 |
-
# Focus on stable Gradio deployment first
|
| 542 |
-
print("β οΈ Skipping FastAPI mounting - using Gradio-only deployment")
|
| 543 |
-
raise Exception("Using fallback Gradio deployment for stability")
|
| 544 |
|
| 545 |
except Exception as e:
|
| 546 |
print(f"β WebRTC integration error: {e}")
|
| 547 |
print("π Falling back to Gradio-only deployment")
|
|
|
|
|
|
|
| 548 |
|
| 549 |
-
# Create stable Gradio interface
|
| 550 |
demo = app.create_interface()
|
| 551 |
|
| 552 |
-
print("π ChatCal Voice-Enabled Assistant v0.4.
|
| 553 |
print("π± Traditional voice input available via Gradio Audio component")
|
| 554 |
-
print("βοΈ WebRTC real-time streaming:
|
| 555 |
|
| 556 |
# Launch configuration for HF Spaces (stable fallback)
|
| 557 |
demo.launch(
|
|
|
|
| 20 |
from core.config import config
|
| 21 |
from version import get_version_info
|
| 22 |
|
| 23 |
+
# WebRTC imports - re-enabled for WebRTC-first approach
|
| 24 |
+
from webrtc.server.fastapi_integration import create_fastapi_app
|
| 25 |
|
| 26 |
class ChatCalVoiceApp:
|
| 27 |
"""Main application class for voice-enabled ChatCal."""
|
|
|
|
| 368 |
# Create and launch the interface
|
| 369 |
if __name__ == "__main__":
|
| 370 |
import uvicorn
|
|
|
|
|
|
|
| 371 |
|
| 372 |
try:
|
| 373 |
+
# Create WebRTC-enabled FastAPI app as main app
|
| 374 |
+
webrtc_app = create_fastapi_app()
|
| 375 |
|
| 376 |
+
# Create Gradio interface (for future integration)
|
| 377 |
demo = app.create_interface()
|
| 378 |
|
| 379 |
+
# WebRTC-first approach: Launch FastAPI with WebSocket endpoints
|
| 380 |
+
print("π ChatCal WebRTC-First Deployment v0.4.3")
|
| 381 |
+
print("π‘ WebSocket endpoint: /ws/webrtc/{client_id}")
|
| 382 |
+
print("π§ͺ WebRTC demo page: /webrtc/demo")
|
| 383 |
+
print("β‘ API status: /webrtc/test")
|
| 384 |
+
print("β οΈ Gradio interface development - WebRTC priority")
|
| 385 |
|
| 386 |
+
# Launch WebRTC FastAPI app directly
|
| 387 |
+
uvicorn.run(webrtc_app, host="0.0.0.0", port=7860)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 388 |
|
| 389 |
except Exception as e:
|
| 390 |
print(f"β WebRTC integration error: {e}")
|
| 391 |
print("π Falling back to Gradio-only deployment")
|
| 392 |
+
import traceback
|
| 393 |
+
traceback.print_exc()
|
| 394 |
|
| 395 |
+
# Create stable Gradio interface fallback
|
| 396 |
demo = app.create_interface()
|
| 397 |
|
| 398 |
+
print("π ChatCal Voice-Enabled Assistant v0.4.3")
|
| 399 |
print("π± Traditional voice input available via Gradio Audio component")
|
| 400 |
+
print("βοΈ WebRTC real-time streaming: Debugging in progress")
|
| 401 |
|
| 402 |
# Launch configuration for HF Spaces (stable fallback)
|
| 403 |
demo.launch(
|
core/mcp_audio_handler.py
CHANGED
|
@@ -210,15 +210,18 @@ class MCPAudioHandler:
|
|
| 210 |
try:
|
| 211 |
print(f"π€ STT: Processing audio file: {audio_file_path}")
|
| 212 |
|
| 213 |
-
#
|
| 214 |
-
if
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
|
|
|
|
|
|
|
|
|
| 222 |
|
| 223 |
# Try MCP service if available and not in demo mode
|
| 224 |
if not self.demo_mode and self.stt_service:
|
|
|
|
| 210 |
try:
|
| 211 |
print(f"π€ STT: Processing audio file: {audio_file_path}")
|
| 212 |
|
| 213 |
+
# TEMPORARILY DISABLED: HTTP calls failing with 404s - focus on WebRTC
|
| 214 |
+
# # First try HTTP fallback if available (even in demo_mode)
|
| 215 |
+
# if hasattr(self, 'stt_http_url') and self.stt_http_url:
|
| 216 |
+
# print(f"π€ STT: Using HTTP service at {self.stt_http_url}")
|
| 217 |
+
# result = await self._call_http_stt_service(audio_file_path)
|
| 218 |
+
# if result and not result.startswith("Error"):
|
| 219 |
+
# print(f"π€ STT: HTTP SUCCESS - exiting demo mode")
|
| 220 |
+
# return result
|
| 221 |
+
# else:
|
| 222 |
+
# print(f"π€ STT: HTTP FAILED - {result}")
|
| 223 |
+
|
| 224 |
+
print(f"π€ STT: Skipping HTTP calls - focusing on WebRTC implementation")
|
| 225 |
|
| 226 |
# Try MCP service if available and not in demo mode
|
| 227 |
if not self.demo_mode and self.stt_service:
|
version.py
CHANGED
|
@@ -2,8 +2,8 @@
|
|
| 2 |
Version information for ChatCal Voice-Enabled AI Assistant
|
| 3 |
"""
|
| 4 |
|
| 5 |
-
__version__ = "0.4.
|
| 6 |
-
__build_date__ = "2025-08-
|
| 7 |
__description__ = "Voice-Enabled ChatCal AI Assistant with Hugging Face deployment"
|
| 8 |
|
| 9 |
def get_version_info():
|
|
|
|
| 2 |
Version information for ChatCal Voice-Enabled AI Assistant
|
| 3 |
"""
|
| 4 |
|
| 5 |
+
__version__ = "0.4.4"
|
| 6 |
+
__build_date__ = "2025-08-20T15:00:00"
|
| 7 |
__description__ = "Voice-Enabled ChatCal AI Assistant with Hugging Face deployment"
|
| 8 |
|
| 9 |
def get_version_info():
|
webrtc/server/websocket_handler.py
CHANGED
|
@@ -109,26 +109,26 @@ class WebRTCHandler:
|
|
| 109 |
})
|
| 110 |
|
| 111 |
async def process_audio_chunk_real_time(self, audio_array: np.ndarray, sample_rate: int) -> Optional[str]:
|
| 112 |
-
"""Process audio chunk
|
| 113 |
try:
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
transcription =
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
os.unlink(tmp_file.name)
|
| 127 |
|
| 128 |
-
|
|
|
|
| 129 |
|
| 130 |
except Exception as e:
|
| 131 |
-
logger.error(f"
|
| 132 |
return None
|
| 133 |
|
| 134 |
async def handle_message(self, client_id: str, message_data: dict):
|
|
|
|
| 109 |
})
|
| 110 |
|
| 111 |
async def process_audio_chunk_real_time(self, audio_array: np.ndarray, sample_rate: int) -> Optional[str]:
|
| 112 |
+
"""Process audio chunk for WebRTC demonstration - TO BE REPLACED WITH REAL STT"""
|
| 113 |
try:
|
| 114 |
+
logger.info(f"π€ WebRTC: Processing {len(audio_array)} samples at {sample_rate}Hz")
|
| 115 |
+
|
| 116 |
+
# For WebRTC demonstration, return a simulation based on audio characteristics
|
| 117 |
+
# TODO: Replace with real STT service that supports WebSocket/WebRTC
|
| 118 |
+
duration = len(audio_array) / sample_rate
|
| 119 |
+
|
| 120 |
+
if duration > 2.0: # Longer audio
|
| 121 |
+
transcription = f"WebRTC test: Audio received ({duration:.1f}s, {sample_rate}Hz)"
|
| 122 |
+
elif duration > 0.5: # Medium audio
|
| 123 |
+
transcription = f"WebRTC test: Short audio ({duration:.1f}s)"
|
| 124 |
+
else: # Very short audio
|
| 125 |
+
transcription = "WebRTC test: Brief audio detected"
|
|
|
|
| 126 |
|
| 127 |
+
logger.info(f"π€ WebRTC Demo: {transcription}")
|
| 128 |
+
return transcription
|
| 129 |
|
| 130 |
except Exception as e:
|
| 131 |
+
logger.error(f"WebRTC audio processing failed: {e}")
|
| 132 |
return None
|
| 133 |
|
| 134 |
async def handle_message(self, client_id: str, message_data: dict):
|