Spaces:

pgits
/

ChatCal.ai-1

Paused

ChatCal.ai-1 / webrtc /tests /test_stt_tts_integration.py

Peter Michael Gits

feat: Complete voice services integration with comprehensive test suite v0.5.0

27649f2 8 months ago

12.4 kB

	#!/usr/bin/env python3
	"""
	Test cases for STT/TTS WebSocket integration
	Tests the complete voice pipeline: Audio → STT → TTS → Audio
	"""

	import asyncio
	import websockets
	import json
	import base64
	import tempfile
	import os
	from datetime import datetime
	import logging

	# Configure logging
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	# Service URLs
	STT_WEBSOCKET_URL = "wss://pgits-stt-gpu-service.hf.space/ws/stt"
	TTS_WEBSOCKET_URL = "wss://pgits-tts-gpu-service.hf.space/ws/tts"
	CHATCAL_WEBSOCKET_URL = "ws://localhost:7860/ws/webrtc/test-client"

	class VoiceServiceTester:
	"""Test suite for voice services integration"""

	def __init__(self):
	self.test_results = []

	async def test_stt_service(self):
	"""Test STT WebSocket service"""
	logger.info("🎤 Testing STT WebSocket service...")

	try:
	# Create a simple test audio file (sine wave)
	test_audio_data = self.create_test_audio()

	# Connect to STT service
	async with websockets.connect(STT_WEBSOCKET_URL) as websocket:
	# Wait for connection confirmation
	confirmation = await websocket.recv()
	confirmation_data = json.loads(confirmation)

	assert confirmation_data.get("type") == "stt_connection_confirmed"
	logger.info("✅ STT connection confirmed")

	# Send test audio
	message = {
	"type": "stt_audio_chunk",
	"audio_data": base64.b64encode(test_audio_data).decode('utf-8'),
	"language": "auto",
	"model_size": "base"
	}

	await websocket.send(json.dumps(message))
	logger.info("📤 Sent test audio to STT service")

	# Wait for transcription response
	response = await asyncio.wait_for(websocket.recv(), timeout=30.0)
	response_data = json.loads(response)

	if response_data.get("type") == "stt_transcription":
	transcription = response_data.get("text", "")
	logger.info(f"📝 STT transcription received: {transcription}")
	self.test_results.append(("STT Service", True, f"Transcription: {transcription}"))
	return True
	elif response_data.get("type") == "stt_error":
	error_msg = response_data.get("message", "Unknown error")
	logger.error(f"❌ STT error: {error_msg}")
	self.test_results.append(("STT Service", False, f"Error: {error_msg}"))
	return False
	else:
	logger.warning(f"⚠️ Unexpected STT response: {response_data}")
	self.test_results.append(("STT Service", False, f"Unexpected response: {response_data}"))
	return False

	except Exception as e:
	logger.error(f"❌ STT service test failed: {e}")
	self.test_results.append(("STT Service", False, f"Exception: {str(e)}"))
	return False

	async def test_tts_service(self):
	"""Test TTS WebSocket service"""
	logger.info("🔊 Testing TTS WebSocket service...")

	try:
	test_text = "Hello, this is a test of the text-to-speech service."

	# Connect to TTS service
	async with websockets.connect(TTS_WEBSOCKET_URL) as websocket:
	# Wait for connection confirmation
	confirmation = await websocket.recv()
	confirmation_data = json.loads(confirmation)

	assert confirmation_data.get("type") == "tts_connection_confirmed"
	logger.info("✅ TTS connection confirmed")

	# Send test text for synthesis
	message = {
	"type": "tts_synthesize",
	"text": test_text,
	"voice_preset": "v2/en_speaker_6"
	}

	await websocket.send(json.dumps(message))
	logger.info(f"📤 Sent test text to TTS service: {test_text}")

	# Wait for audio response
	response = await asyncio.wait_for(websocket.recv(), timeout=60.0)
	response_data = json.loads(response)

	if response_data.get("type") == "tts_audio_response":
	audio_data = response_data.get("audio_data", "")
	audio_size = response_data.get("audio_size", 0)
	logger.info(f"🔊 TTS audio received: {audio_size} bytes")
	self.test_results.append(("TTS Service", True, f"Audio generated: {audio_size} bytes"))

	# Save test audio file for verification
	if audio_data:
	audio_bytes = base64.b64decode(audio_data)
	test_output_path = "/tmp/tts_test_output.wav"
	with open(test_output_path, 'wb') as f:
	f.write(audio_bytes)
	logger.info(f"💾 Test audio saved to: {test_output_path}")

	return True
	elif response_data.get("type") == "tts_error":
	error_msg = response_data.get("message", "Unknown error")
	logger.error(f"❌ TTS error: {error_msg}")
	self.test_results.append(("TTS Service", False, f"Error: {error_msg}"))
	return False
	else:
	logger.warning(f"⚠️ Unexpected TTS response: {response_data}")
	self.test_results.append(("TTS Service", False, f"Unexpected response: {response_data}"))
	return False

	except Exception as e:
	logger.error(f"❌ TTS service test failed: {e}")
	self.test_results.append(("TTS Service", False, f"Exception: {str(e)}"))
	return False

	async def test_chatcal_integration(self):
	"""Test ChatCal WebRTC integration with STT/TTS"""
	logger.info("🌐 Testing ChatCal WebRTC integration...")

	try:
	# This test requires ChatCal WebRTC server to be running locally
	test_audio_data = self.create_test_audio()

	async with websockets.connect(CHATCAL_WEBSOCKET_URL) as websocket:
	# Wait for connection confirmation
	confirmation = await websocket.recv()
	confirmation_data = json.loads(confirmation)

	assert confirmation_data.get("type") == "connection_confirmed"
	logger.info("✅ ChatCal WebRTC connection confirmed")

	# Send test audio chunk
	message = {
	"type": "audio_chunk",
	"audio_data": base64.b64encode(test_audio_data).decode('utf-8'),
	"sample_rate": 16000
	}

	await websocket.send(json.dumps(message))
	logger.info("📤 Sent test audio to ChatCal WebRTC")

	# Wait for transcription
	transcription_received = False
	tts_playback_received = False

	for _ in range(3): # Wait for up to 3 messages
	response = await asyncio.wait_for(websocket.recv(), timeout=30.0)
	response_data = json.loads(response)

	if response_data.get("type") == "transcription":
	transcription = response_data.get("text", "")
	logger.info(f"📝 Transcription received: {transcription}")
	transcription_received = True
	elif response_data.get("type") == "tts_playback":
	audio_size = response_data.get("audio_size", 0)
	logger.info(f"🔊 TTS playback received: {audio_size} bytes")
	tts_playback_received = True

	# If we have both, break
	if transcription_received:
	break
	elif response_data.get("type") == "error":
	logger.error(f"❌ ChatCal error: {response_data.get('message')}")

	if transcription_received and tts_playback_received:
	self.test_results.append(("ChatCal Integration", True, "Complete voice loop working"))
	return True
	elif transcription_received:
	self.test_results.append(("ChatCal Integration", False, "STT working but no TTS"))
	return False
	else:
	self.test_results.append(("ChatCal Integration", False, "No transcription received"))
	return False

	except Exception as e:
	logger.error(f"❌ ChatCal integration test failed: {e}")
	self.test_results.append(("ChatCal Integration", False, f"Exception: {str(e)}"))
	return False

	def create_test_audio(self):
	"""Create a simple test audio file (WebM format for MediaRecorder compatibility)"""
	# Create a minimal WebM audio file with silent audio
	# This is a simplified version - in practice you'd want actual audio data
	webm_header = b'GIF89a' # Simplified - actual WebM would be more complex
	return webm_header + b'\x00' * 1000 # 1KB of test data

	async def run_all_tests(self):
	"""Run all voice service integration tests"""
	logger.info("🚀 Starting voice services integration tests...")
	logger.info(f"Test started at: {datetime.now().isoformat()}")

	# Test individual services
	stt_result = await self.test_stt_service()
	await asyncio.sleep(2) # Brief pause between tests

	tts_result = await self.test_tts_service()
	await asyncio.sleep(2)

	# Test full integration (only if individual services work)
	if stt_result and tts_result:
	logger.info("🔗 Individual services working, testing integration...")
	integration_result = await self.test_chatcal_integration()
	else:
	logger.warning("⚠️ Skipping integration test - individual services failed")
	self.test_results.append(("ChatCal Integration", False, "Skipped - dependencies failed"))

	# Print results
	self.print_test_results()

	def print_test_results(self):
	"""Print formatted test results"""
	logger.info("\n" + "="*60)
	logger.info("📊 VOICE SERVICES TEST RESULTS")
	logger.info("="*60)

	passed = 0
	total = len(self.test_results)

	for test_name, success, message in self.test_results:
	status = "✅ PASS" if success else "❌ FAIL"
	logger.info(f"{test_name:25} {status:8} - {message}")
	if success:
	passed += 1

	logger.info("="*60)
	logger.info(f"📈 Results: {passed}/{total} tests passed ({passed/total*100:.1f}%)")
	logger.info(f"🕒 Test completed at: {datetime.now().isoformat()}")

	if passed == total:
	logger.info("🎉 All voice services integration tests PASSED!")
	return True
	else:
	logger.warning(f"⚠️ {total - passed} test(s) failed")
	return False

	async def main():
	"""Main test runner"""
	tester = VoiceServiceTester()
	success = await tester.run_all_tests()
	return 0 if success else 1

	if __name__ == "__main__":
	try:
	exit_code = asyncio.run(main())
	exit(exit_code)
	except KeyboardInterrupt:
	logger.info("❌ Tests interrupted by user")
	exit(1)
	except Exception as e:
	logger.error(f"❌ Test runner failed: {e}")
	exit(1)