Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """ | |
| Example MCP client for STT service integration | |
| This demonstrates how to use the MCP-enabled STT service | |
| from another application or service. | |
| """ | |
| import asyncio | |
| import json | |
| import tempfile | |
| import os | |
| from typing import Optional | |
| # Mock MCP client for demonstration | |
| # In real usage, you would use the official MCP client library | |
| class MockMCPClient: | |
| """Mock MCP client for demonstration purposes""" | |
| def __init__(self, server_command: str): | |
| self.server_command = server_command | |
| print(f"π Connecting to MCP server: {server_command}") | |
| async def list_tools(self): | |
| """List available tools""" | |
| return [ | |
| {"name": "stt_transcribe", "description": "Transcribe single audio file"}, | |
| {"name": "stt_batch_transcribe", "description": "Batch transcribe multiple files"}, | |
| {"name": "stt_get_info", "description": "Get system information"} | |
| ] | |
| async def call_tool(self, tool_name: str, arguments: dict): | |
| """Simulate tool call""" | |
| print(f"π Calling tool: {tool_name}") | |
| print(f"π Arguments: {json.dumps(arguments, indent=2)}") | |
| # Mock responses based on tool name | |
| if tool_name == "stt_get_info": | |
| return { | |
| "content": [{ | |
| "text": json.dumps({ | |
| "system_info": "ZeroGPU: Active\\nGPU Name: H200\\nMCP Server: β Available", | |
| "mcp_status": "β MCP Server Active", | |
| "available_tools": ["stt_transcribe", "stt_batch_transcribe", "stt_get_info"], | |
| "supported_languages": ["auto", "en", "es", "fr", "de"], | |
| "supported_models": ["tiny", "base", "small", "medium", "large-v2"] | |
| }) | |
| }] | |
| } | |
| elif tool_name == "stt_transcribe": | |
| return { | |
| "content": [{ | |
| "text": json.dumps({ | |
| "status": "β Transcribed 5.2s audio in 1.1s on H200 (RTF: 0.21x)", | |
| "transcription": "Hello, this is a sample transcription from the STT service.", | |
| "timestamps": "[0.0s - 2.5s]: Hello, this is a sample\\n[2.5s - 5.2s]: transcription from the STT service.", | |
| "language": arguments.get("language", "auto"), | |
| "model_size": arguments.get("model_size", "base") | |
| }) | |
| }] | |
| } | |
| else: | |
| return {"content": [{"text": "Unknown tool"}]} | |
| async def demonstrate_stt_info(): | |
| """Demonstrate getting STT service information""" | |
| print("\\n" + "="*60) | |
| print("π Demonstrating STT Service Info") | |
| print("="*60) | |
| client = MockMCPClient("python app.py --mcp-only") | |
| # Get service information | |
| result = await client.call_tool("stt_get_info", {}) | |
| info_data = json.loads(result["content"][0]["text"]) | |
| print("π STT Service Information:") | |
| print(f" Status: {info_data['mcp_status']}") | |
| print(f" Available Tools: {', '.join(info_data['available_tools'])}") | |
| print(f" Supported Languages: {len(info_data['supported_languages'])} languages") | |
| print(f" Supported Models: {', '.join(info_data['supported_models'])}") | |
| async def demonstrate_single_transcription(): | |
| """Demonstrate single audio file transcription""" | |
| print("\\n" + "="*60) | |
| print("π€ Demonstrating Single Audio Transcription") | |
| print("="*60) | |
| client = MockMCPClient("python app.py --mcp-only") | |
| # Simulate transcribing an audio file | |
| audio_path = "/path/to/user_audio.wav" | |
| result = await client.call_tool("stt_transcribe", { | |
| "audio_path": audio_path, | |
| "language": "auto", | |
| "model_size": "base", | |
| "return_timestamps": True | |
| }) | |
| transcription_data = json.loads(result["content"][0]["text"]) | |
| print(f"π― Input: {audio_path}") | |
| print(f"π Transcription: {transcription_data['transcription']}") | |
| print(f"β±οΈ Processing: {transcription_data['status']}") | |
| print(f"π Language: {transcription_data['language']}") | |
| print(f"π€ Model: {transcription_data['model_size']}") | |
| if transcription_data.get('timestamps'): | |
| print("π Timestamps:") | |
| for line in transcription_data['timestamps'].split('\\n'): | |
| if line.strip(): | |
| print(f" {line}") | |
| async def demonstrate_voice_calendar_integration(): | |
| """Demonstrate voice-enabled calendar integration""" | |
| print("\\n" + "="*60) | |
| print("ποΈ Demonstrating Voice Calendar Integration") | |
| print("="*60) | |
| # This shows how ChatCal could integrate with the STT service | |
| client = MockMCPClient("python app.py --mcp-only") | |
| # Step 1: User records voice request | |
| voice_request = "user_voice_request.wav" | |
| print(f"ποΈ User records voice: '{voice_request}'") | |
| # Step 2: Transcribe voice to text | |
| print("π Transcribing voice request...") | |
| result = await client.call_tool("stt_transcribe", { | |
| "audio_path": voice_request, | |
| "language": "auto", | |
| "model_size": "base" | |
| }) | |
| transcription_data = json.loads(result["content"][0]["text"]) | |
| user_text = transcription_data['transcription'] | |
| print(f"π Transcribed: '{user_text}'") | |
| # Step 3: Process with calendar agent (simulated) | |
| print("π€ Processing calendar request with LLM...") | |
| # This would be actual ChatCal logic | |
| calendar_response = { | |
| "action": "schedule_meeting", | |
| "details": "Meeting scheduled for tomorrow at 2 PM", | |
| "success": True | |
| } | |
| print(f"β Calendar Action: {calendar_response['action']}") | |
| print(f"π Result: {calendar_response['details']}") | |
| # Step 4: Convert response to voice (would use TTS service) | |
| print("π Converting response to voice...") | |
| print(" (This would use the TTS service with MCP)") | |
| return { | |
| "user_input": user_text, | |
| "calendar_action": calendar_response, | |
| "processing_time": "Total: 2.3s (STT: 1.1s, LLM: 0.8s, TTS: 0.4s)" | |
| } | |
| async def demonstrate_batch_processing(): | |
| """Demonstrate batch audio processing""" | |
| print("\\n" + "="*60) | |
| print("π¦ Demonstrating Batch Audio Processing") | |
| print("="*60) | |
| client = MockMCPClient("python app.py --mcp-only") | |
| # Simulate batch processing multiple audio files | |
| audio_files = [ | |
| "/path/to/meeting_recording_1.wav", | |
| "/path/to/meeting_recording_2.wav", | |
| "/path/to/voice_memo_1.wav" | |
| ] | |
| print(f"π΅ Processing {len(audio_files)} audio files...") | |
| # This would call the actual batch transcribe tool | |
| print("π Calling stt_batch_transcribe...") | |
| print(f"π Files: {', '.join([os.path.basename(f) for f in audio_files])}") | |
| # Mock batch result | |
| batch_result = { | |
| "batch_status": f"π Batch completed: {len(audio_files)} files in 3.5s", | |
| "results": [ | |
| { | |
| "file": "meeting_recording_1.wav", | |
| "transcription": "Welcome everyone to today's meeting. Let's discuss the project updates.", | |
| "status": "β Transcribed 45.2s audio in 2.1s" | |
| }, | |
| { | |
| "file": "meeting_recording_2.wav", | |
| "transcription": "The next milestone is scheduled for next Friday. Any questions?", | |
| "status": "β Transcribed 32.1s audio in 1.8s" | |
| }, | |
| { | |
| "file": "voice_memo_1.wav", | |
| "transcription": "Remember to follow up with the client about the proposal.", | |
| "status": "β Transcribed 8.3s audio in 0.4s" | |
| } | |
| ] | |
| } | |
| print(f"π {batch_result['batch_status']}") | |
| print("π Results:") | |
| for result in batch_result['results']: | |
| print(f" π {result['file']}") | |
| print(f" π '{result['transcription']}'") | |
| print(f" β‘ {result['status']}") | |
| async def main(): | |
| """Run all demonstrations""" | |
| print("π MCP-Enabled STT Service Integration Examples") | |
| print("π― This demonstrates how to integrate the STT service via MCP") | |
| # Run demonstrations | |
| await demonstrate_stt_info() | |
| await demonstrate_single_transcription() | |
| await demonstrate_batch_processing() | |
| # Voice calendar integration (main use case) | |
| integration_result = await demonstrate_voice_calendar_integration() | |
| # Summary | |
| print("\\n" + "="*60) | |
| print("π Integration Summary") | |
| print("="*60) | |
| print("π§ MCP Benefits Demonstrated:") | |
| print(" - Direct tool integration without HTTP overhead") | |
| print(" - Type-safe structured communication") | |
| print(" - Real-time audio processing capabilities") | |
| print(" - Seamless integration with voice workflows") | |
| print("\\nπ― Use Cases Covered:") | |
| print(" - Single audio transcription") | |
| print(" - Batch audio processing") | |
| print(" - Voice-enabled calendar scheduling") | |
| print(" - System information and monitoring") | |
| print("\\nπ Voice Calendar Pipeline:") | |
| print(f" π€ User Input: Voice recording") | |
| print(f" π€ STT Service: Convert to text via MCP") | |
| print(f" π€ ChatCal Agent: Process calendar request") | |
| print(f" π TTS Service: Convert response to voice") | |
| print(f" β‘ Total Processing: Sub-3 second response") | |
| print("\\nπ Next Steps:") | |
| print(" 1. Deploy STT service with MCP to Hugging Face") | |
| print(" 2. Update ChatCal to use MCP client") | |
| print(" 3. Add TTS service with MCP capabilities") | |
| print(" 4. Implement voice-first user interface") | |
| if __name__ == "__main__": | |
| asyncio.run(main()) |