#!/usr/bin/env python3 """ Example MCP client for STT service integration This demonstrates how to use the MCP-enabled STT service from another application or service. """ import asyncio import json import tempfile import os from typing import Optional # Mock MCP client for demonstration # In real usage, you would use the official MCP client library class MockMCPClient: """Mock MCP client for demonstration purposes""" def __init__(self, server_command: str): self.server_command = server_command print(f"šŸ”Œ Connecting to MCP server: {server_command}") async def list_tools(self): """List available tools""" return [ {"name": "stt_transcribe", "description": "Transcribe single audio file"}, {"name": "stt_batch_transcribe", "description": "Batch transcribe multiple files"}, {"name": "stt_get_info", "description": "Get system information"} ] async def call_tool(self, tool_name: str, arguments: dict): """Simulate tool call""" print(f"šŸ“ž Calling tool: {tool_name}") print(f"šŸ“ Arguments: {json.dumps(arguments, indent=2)}") # Mock responses based on tool name if tool_name == "stt_get_info": return { "content": [{ "text": json.dumps({ "system_info": "ZeroGPU: Active\\nGPU Name: H200\\nMCP Server: āœ… Available", "mcp_status": "āœ… MCP Server Active", "available_tools": ["stt_transcribe", "stt_batch_transcribe", "stt_get_info"], "supported_languages": ["auto", "en", "es", "fr", "de"], "supported_models": ["tiny", "base", "small", "medium", "large-v2"] }) }] } elif tool_name == "stt_transcribe": return { "content": [{ "text": json.dumps({ "status": "āœ… Transcribed 5.2s audio in 1.1s on H200 (RTF: 0.21x)", "transcription": "Hello, this is a sample transcription from the STT service.", "timestamps": "[0.0s - 2.5s]: Hello, this is a sample\\n[2.5s - 5.2s]: transcription from the STT service.", "language": arguments.get("language", "auto"), "model_size": arguments.get("model_size", "base") }) }] } else: return {"content": [{"text": "Unknown tool"}]} async def demonstrate_stt_info(): """Demonstrate getting STT service information""" print("\\n" + "="*60) print("šŸ” Demonstrating STT Service Info") print("="*60) client = MockMCPClient("python app.py --mcp-only") # Get service information result = await client.call_tool("stt_get_info", {}) info_data = json.loads(result["content"][0]["text"]) print("šŸ“Š STT Service Information:") print(f" Status: {info_data['mcp_status']}") print(f" Available Tools: {', '.join(info_data['available_tools'])}") print(f" Supported Languages: {len(info_data['supported_languages'])} languages") print(f" Supported Models: {', '.join(info_data['supported_models'])}") async def demonstrate_single_transcription(): """Demonstrate single audio file transcription""" print("\\n" + "="*60) print("šŸŽ¤ Demonstrating Single Audio Transcription") print("="*60) client = MockMCPClient("python app.py --mcp-only") # Simulate transcribing an audio file audio_path = "/path/to/user_audio.wav" result = await client.call_tool("stt_transcribe", { "audio_path": audio_path, "language": "auto", "model_size": "base", "return_timestamps": True }) transcription_data = json.loads(result["content"][0]["text"]) print(f"šŸŽÆ Input: {audio_path}") print(f"šŸ“ Transcription: {transcription_data['transcription']}") print(f"ā±ļø Processing: {transcription_data['status']}") print(f"šŸŒ Language: {transcription_data['language']}") print(f"šŸ¤– Model: {transcription_data['model_size']}") if transcription_data.get('timestamps'): print("šŸ“ Timestamps:") for line in transcription_data['timestamps'].split('\\n'): if line.strip(): print(f" {line}") async def demonstrate_voice_calendar_integration(): """Demonstrate voice-enabled calendar integration""" print("\\n" + "="*60) print("šŸ—“ļø Demonstrating Voice Calendar Integration") print("="*60) # This shows how ChatCal could integrate with the STT service client = MockMCPClient("python app.py --mcp-only") # Step 1: User records voice request voice_request = "user_voice_request.wav" print(f"šŸŽ™ļø User records voice: '{voice_request}'") # Step 2: Transcribe voice to text print("šŸ”„ Transcribing voice request...") result = await client.call_tool("stt_transcribe", { "audio_path": voice_request, "language": "auto", "model_size": "base" }) transcription_data = json.loads(result["content"][0]["text"]) user_text = transcription_data['transcription'] print(f"šŸ“ Transcribed: '{user_text}'") # Step 3: Process with calendar agent (simulated) print("šŸ¤– Processing calendar request with LLM...") # This would be actual ChatCal logic calendar_response = { "action": "schedule_meeting", "details": "Meeting scheduled for tomorrow at 2 PM", "success": True } print(f"āœ… Calendar Action: {calendar_response['action']}") print(f"šŸ“… Result: {calendar_response['details']}") # Step 4: Convert response to voice (would use TTS service) print("šŸ”Š Converting response to voice...") print(" (This would use the TTS service with MCP)") return { "user_input": user_text, "calendar_action": calendar_response, "processing_time": "Total: 2.3s (STT: 1.1s, LLM: 0.8s, TTS: 0.4s)" } async def demonstrate_batch_processing(): """Demonstrate batch audio processing""" print("\\n" + "="*60) print("šŸ“¦ Demonstrating Batch Audio Processing") print("="*60) client = MockMCPClient("python app.py --mcp-only") # Simulate batch processing multiple audio files audio_files = [ "/path/to/meeting_recording_1.wav", "/path/to/meeting_recording_2.wav", "/path/to/voice_memo_1.wav" ] print(f"šŸŽµ Processing {len(audio_files)} audio files...") # This would call the actual batch transcribe tool print("šŸ“ž Calling stt_batch_transcribe...") print(f"šŸ“ Files: {', '.join([os.path.basename(f) for f in audio_files])}") # Mock batch result batch_result = { "batch_status": f"šŸš€ Batch completed: {len(audio_files)} files in 3.5s", "results": [ { "file": "meeting_recording_1.wav", "transcription": "Welcome everyone to today's meeting. Let's discuss the project updates.", "status": "āœ… Transcribed 45.2s audio in 2.1s" }, { "file": "meeting_recording_2.wav", "transcription": "The next milestone is scheduled for next Friday. Any questions?", "status": "āœ… Transcribed 32.1s audio in 1.8s" }, { "file": "voice_memo_1.wav", "transcription": "Remember to follow up with the client about the proposal.", "status": "āœ… Transcribed 8.3s audio in 0.4s" } ] } print(f"šŸ“Š {batch_result['batch_status']}") print("šŸ“„ Results:") for result in batch_result['results']: print(f" šŸ“ {result['file']}") print(f" šŸ“ '{result['transcription']}'") print(f" ⚔ {result['status']}") async def main(): """Run all demonstrations""" print("šŸš€ MCP-Enabled STT Service Integration Examples") print("šŸŽÆ This demonstrates how to integrate the STT service via MCP") # Run demonstrations await demonstrate_stt_info() await demonstrate_single_transcription() await demonstrate_batch_processing() # Voice calendar integration (main use case) integration_result = await demonstrate_voice_calendar_integration() # Summary print("\\n" + "="*60) print("šŸŽ‰ Integration Summary") print("="*60) print("šŸ”§ MCP Benefits Demonstrated:") print(" - Direct tool integration without HTTP overhead") print(" - Type-safe structured communication") print(" - Real-time audio processing capabilities") print(" - Seamless integration with voice workflows") print("\\nšŸŽÆ Use Cases Covered:") print(" - Single audio transcription") print(" - Batch audio processing") print(" - Voice-enabled calendar scheduling") print(" - System information and monitoring") print("\\nšŸš€ Voice Calendar Pipeline:") print(f" šŸ‘¤ User Input: Voice recording") print(f" šŸŽ¤ STT Service: Convert to text via MCP") print(f" šŸ¤– ChatCal Agent: Process calendar request") print(f" šŸ”Š TTS Service: Convert response to voice") print(f" ⚔ Total Processing: Sub-3 second response") print("\\nšŸ“ Next Steps:") print(" 1. Deploy STT service with MCP to Hugging Face") print(" 2. Update ChatCal to use MCP client") print(" 3. Add TTS service with MCP capabilities") print(" 4. Implement voice-first user interface") if __name__ == "__main__": asyncio.run(main())