ChatCal.ai-1 / webrtc /tests /test_stt_tts_integration.py
Peter Michael Gits
feat: Complete voice services integration with comprehensive test suite v0.5.0
27649f2
#!/usr/bin/env python3
"""
Test cases for STT/TTS WebSocket integration
Tests the complete voice pipeline: Audio β†’ STT β†’ TTS β†’ Audio
"""
import asyncio
import websockets
import json
import base64
import tempfile
import os
from datetime import datetime
import logging
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Service URLs
STT_WEBSOCKET_URL = "wss://pgits-stt-gpu-service.hf.space/ws/stt"
TTS_WEBSOCKET_URL = "wss://pgits-tts-gpu-service.hf.space/ws/tts"
CHATCAL_WEBSOCKET_URL = "ws://localhost:7860/ws/webrtc/test-client"
class VoiceServiceTester:
"""Test suite for voice services integration"""
def __init__(self):
self.test_results = []
async def test_stt_service(self):
"""Test STT WebSocket service"""
logger.info("🎀 Testing STT WebSocket service...")
try:
# Create a simple test audio file (sine wave)
test_audio_data = self.create_test_audio()
# Connect to STT service
async with websockets.connect(STT_WEBSOCKET_URL) as websocket:
# Wait for connection confirmation
confirmation = await websocket.recv()
confirmation_data = json.loads(confirmation)
assert confirmation_data.get("type") == "stt_connection_confirmed"
logger.info("βœ… STT connection confirmed")
# Send test audio
message = {
"type": "stt_audio_chunk",
"audio_data": base64.b64encode(test_audio_data).decode('utf-8'),
"language": "auto",
"model_size": "base"
}
await websocket.send(json.dumps(message))
logger.info("πŸ“€ Sent test audio to STT service")
# Wait for transcription response
response = await asyncio.wait_for(websocket.recv(), timeout=30.0)
response_data = json.loads(response)
if response_data.get("type") == "stt_transcription":
transcription = response_data.get("text", "")
logger.info(f"πŸ“ STT transcription received: {transcription}")
self.test_results.append(("STT Service", True, f"Transcription: {transcription}"))
return True
elif response_data.get("type") == "stt_error":
error_msg = response_data.get("message", "Unknown error")
logger.error(f"❌ STT error: {error_msg}")
self.test_results.append(("STT Service", False, f"Error: {error_msg}"))
return False
else:
logger.warning(f"⚠️ Unexpected STT response: {response_data}")
self.test_results.append(("STT Service", False, f"Unexpected response: {response_data}"))
return False
except Exception as e:
logger.error(f"❌ STT service test failed: {e}")
self.test_results.append(("STT Service", False, f"Exception: {str(e)}"))
return False
async def test_tts_service(self):
"""Test TTS WebSocket service"""
logger.info("πŸ”Š Testing TTS WebSocket service...")
try:
test_text = "Hello, this is a test of the text-to-speech service."
# Connect to TTS service
async with websockets.connect(TTS_WEBSOCKET_URL) as websocket:
# Wait for connection confirmation
confirmation = await websocket.recv()
confirmation_data = json.loads(confirmation)
assert confirmation_data.get("type") == "tts_connection_confirmed"
logger.info("βœ… TTS connection confirmed")
# Send test text for synthesis
message = {
"type": "tts_synthesize",
"text": test_text,
"voice_preset": "v2/en_speaker_6"
}
await websocket.send(json.dumps(message))
logger.info(f"πŸ“€ Sent test text to TTS service: {test_text}")
# Wait for audio response
response = await asyncio.wait_for(websocket.recv(), timeout=60.0)
response_data = json.loads(response)
if response_data.get("type") == "tts_audio_response":
audio_data = response_data.get("audio_data", "")
audio_size = response_data.get("audio_size", 0)
logger.info(f"πŸ”Š TTS audio received: {audio_size} bytes")
self.test_results.append(("TTS Service", True, f"Audio generated: {audio_size} bytes"))
# Save test audio file for verification
if audio_data:
audio_bytes = base64.b64decode(audio_data)
test_output_path = "/tmp/tts_test_output.wav"
with open(test_output_path, 'wb') as f:
f.write(audio_bytes)
logger.info(f"πŸ’Ύ Test audio saved to: {test_output_path}")
return True
elif response_data.get("type") == "tts_error":
error_msg = response_data.get("message", "Unknown error")
logger.error(f"❌ TTS error: {error_msg}")
self.test_results.append(("TTS Service", False, f"Error: {error_msg}"))
return False
else:
logger.warning(f"⚠️ Unexpected TTS response: {response_data}")
self.test_results.append(("TTS Service", False, f"Unexpected response: {response_data}"))
return False
except Exception as e:
logger.error(f"❌ TTS service test failed: {e}")
self.test_results.append(("TTS Service", False, f"Exception: {str(e)}"))
return False
async def test_chatcal_integration(self):
"""Test ChatCal WebRTC integration with STT/TTS"""
logger.info("🌐 Testing ChatCal WebRTC integration...")
try:
# This test requires ChatCal WebRTC server to be running locally
test_audio_data = self.create_test_audio()
async with websockets.connect(CHATCAL_WEBSOCKET_URL) as websocket:
# Wait for connection confirmation
confirmation = await websocket.recv()
confirmation_data = json.loads(confirmation)
assert confirmation_data.get("type") == "connection_confirmed"
logger.info("βœ… ChatCal WebRTC connection confirmed")
# Send test audio chunk
message = {
"type": "audio_chunk",
"audio_data": base64.b64encode(test_audio_data).decode('utf-8'),
"sample_rate": 16000
}
await websocket.send(json.dumps(message))
logger.info("πŸ“€ Sent test audio to ChatCal WebRTC")
# Wait for transcription
transcription_received = False
tts_playback_received = False
for _ in range(3): # Wait for up to 3 messages
response = await asyncio.wait_for(websocket.recv(), timeout=30.0)
response_data = json.loads(response)
if response_data.get("type") == "transcription":
transcription = response_data.get("text", "")
logger.info(f"πŸ“ Transcription received: {transcription}")
transcription_received = True
elif response_data.get("type") == "tts_playback":
audio_size = response_data.get("audio_size", 0)
logger.info(f"πŸ”Š TTS playback received: {audio_size} bytes")
tts_playback_received = True
# If we have both, break
if transcription_received:
break
elif response_data.get("type") == "error":
logger.error(f"❌ ChatCal error: {response_data.get('message')}")
if transcription_received and tts_playback_received:
self.test_results.append(("ChatCal Integration", True, "Complete voice loop working"))
return True
elif transcription_received:
self.test_results.append(("ChatCal Integration", False, "STT working but no TTS"))
return False
else:
self.test_results.append(("ChatCal Integration", False, "No transcription received"))
return False
except Exception as e:
logger.error(f"❌ ChatCal integration test failed: {e}")
self.test_results.append(("ChatCal Integration", False, f"Exception: {str(e)}"))
return False
def create_test_audio(self):
"""Create a simple test audio file (WebM format for MediaRecorder compatibility)"""
# Create a minimal WebM audio file with silent audio
# This is a simplified version - in practice you'd want actual audio data
webm_header = b'GIF89a' # Simplified - actual WebM would be more complex
return webm_header + b'\x00' * 1000 # 1KB of test data
async def run_all_tests(self):
"""Run all voice service integration tests"""
logger.info("πŸš€ Starting voice services integration tests...")
logger.info(f"Test started at: {datetime.now().isoformat()}")
# Test individual services
stt_result = await self.test_stt_service()
await asyncio.sleep(2) # Brief pause between tests
tts_result = await self.test_tts_service()
await asyncio.sleep(2)
# Test full integration (only if individual services work)
if stt_result and tts_result:
logger.info("πŸ”— Individual services working, testing integration...")
integration_result = await self.test_chatcal_integration()
else:
logger.warning("⚠️ Skipping integration test - individual services failed")
self.test_results.append(("ChatCal Integration", False, "Skipped - dependencies failed"))
# Print results
self.print_test_results()
def print_test_results(self):
"""Print formatted test results"""
logger.info("\n" + "="*60)
logger.info("πŸ“Š VOICE SERVICES TEST RESULTS")
logger.info("="*60)
passed = 0
total = len(self.test_results)
for test_name, success, message in self.test_results:
status = "βœ… PASS" if success else "❌ FAIL"
logger.info(f"{test_name:25} {status:8} - {message}")
if success:
passed += 1
logger.info("="*60)
logger.info(f"πŸ“ˆ Results: {passed}/{total} tests passed ({passed/total*100:.1f}%)")
logger.info(f"πŸ•’ Test completed at: {datetime.now().isoformat()}")
if passed == total:
logger.info("πŸŽ‰ All voice services integration tests PASSED!")
return True
else:
logger.warning(f"⚠️ {total - passed} test(s) failed")
return False
async def main():
"""Main test runner"""
tester = VoiceServiceTester()
success = await tester.run_all_tests()
return 0 if success else 1
if __name__ == "__main__":
try:
exit_code = asyncio.run(main())
exit(exit_code)
except KeyboardInterrupt:
logger.info("❌ Tests interrupted by user")
exit(1)
except Exception as e:
logger.error(f"❌ Test runner failed: {e}")
exit(1)