stt-gpu-service / example_mcp_client.py
Peter Michael Gits
feat: Add MCP server capabilities alongside Gradio interface
05e2c32
#!/usr/bin/env python3
"""
Example MCP client for STT service integration
This demonstrates how to use the MCP-enabled STT service
from another application or service.
"""
import asyncio
import json
import tempfile
import os
from typing import Optional
# Mock MCP client for demonstration
# In real usage, you would use the official MCP client library
class MockMCPClient:
"""Mock MCP client for demonstration purposes"""
def __init__(self, server_command: str):
self.server_command = server_command
print(f"πŸ”Œ Connecting to MCP server: {server_command}")
async def list_tools(self):
"""List available tools"""
return [
{"name": "stt_transcribe", "description": "Transcribe single audio file"},
{"name": "stt_batch_transcribe", "description": "Batch transcribe multiple files"},
{"name": "stt_get_info", "description": "Get system information"}
]
async def call_tool(self, tool_name: str, arguments: dict):
"""Simulate tool call"""
print(f"πŸ“ž Calling tool: {tool_name}")
print(f"πŸ“ Arguments: {json.dumps(arguments, indent=2)}")
# Mock responses based on tool name
if tool_name == "stt_get_info":
return {
"content": [{
"text": json.dumps({
"system_info": "ZeroGPU: Active\\nGPU Name: H200\\nMCP Server: βœ… Available",
"mcp_status": "βœ… MCP Server Active",
"available_tools": ["stt_transcribe", "stt_batch_transcribe", "stt_get_info"],
"supported_languages": ["auto", "en", "es", "fr", "de"],
"supported_models": ["tiny", "base", "small", "medium", "large-v2"]
})
}]
}
elif tool_name == "stt_transcribe":
return {
"content": [{
"text": json.dumps({
"status": "βœ… Transcribed 5.2s audio in 1.1s on H200 (RTF: 0.21x)",
"transcription": "Hello, this is a sample transcription from the STT service.",
"timestamps": "[0.0s - 2.5s]: Hello, this is a sample\\n[2.5s - 5.2s]: transcription from the STT service.",
"language": arguments.get("language", "auto"),
"model_size": arguments.get("model_size", "base")
})
}]
}
else:
return {"content": [{"text": "Unknown tool"}]}
async def demonstrate_stt_info():
"""Demonstrate getting STT service information"""
print("\\n" + "="*60)
print("πŸ” Demonstrating STT Service Info")
print("="*60)
client = MockMCPClient("python app.py --mcp-only")
# Get service information
result = await client.call_tool("stt_get_info", {})
info_data = json.loads(result["content"][0]["text"])
print("πŸ“Š STT Service Information:")
print(f" Status: {info_data['mcp_status']}")
print(f" Available Tools: {', '.join(info_data['available_tools'])}")
print(f" Supported Languages: {len(info_data['supported_languages'])} languages")
print(f" Supported Models: {', '.join(info_data['supported_models'])}")
async def demonstrate_single_transcription():
"""Demonstrate single audio file transcription"""
print("\\n" + "="*60)
print("🎀 Demonstrating Single Audio Transcription")
print("="*60)
client = MockMCPClient("python app.py --mcp-only")
# Simulate transcribing an audio file
audio_path = "/path/to/user_audio.wav"
result = await client.call_tool("stt_transcribe", {
"audio_path": audio_path,
"language": "auto",
"model_size": "base",
"return_timestamps": True
})
transcription_data = json.loads(result["content"][0]["text"])
print(f"🎯 Input: {audio_path}")
print(f"πŸ“ Transcription: {transcription_data['transcription']}")
print(f"⏱️ Processing: {transcription_data['status']}")
print(f"🌍 Language: {transcription_data['language']}")
print(f"πŸ€– Model: {transcription_data['model_size']}")
if transcription_data.get('timestamps'):
print("πŸ“ Timestamps:")
for line in transcription_data['timestamps'].split('\\n'):
if line.strip():
print(f" {line}")
async def demonstrate_voice_calendar_integration():
"""Demonstrate voice-enabled calendar integration"""
print("\\n" + "="*60)
print("πŸ—“οΈ Demonstrating Voice Calendar Integration")
print("="*60)
# This shows how ChatCal could integrate with the STT service
client = MockMCPClient("python app.py --mcp-only")
# Step 1: User records voice request
voice_request = "user_voice_request.wav"
print(f"πŸŽ™οΈ User records voice: '{voice_request}'")
# Step 2: Transcribe voice to text
print("πŸ”„ Transcribing voice request...")
result = await client.call_tool("stt_transcribe", {
"audio_path": voice_request,
"language": "auto",
"model_size": "base"
})
transcription_data = json.loads(result["content"][0]["text"])
user_text = transcription_data['transcription']
print(f"πŸ“ Transcribed: '{user_text}'")
# Step 3: Process with calendar agent (simulated)
print("πŸ€– Processing calendar request with LLM...")
# This would be actual ChatCal logic
calendar_response = {
"action": "schedule_meeting",
"details": "Meeting scheduled for tomorrow at 2 PM",
"success": True
}
print(f"βœ… Calendar Action: {calendar_response['action']}")
print(f"πŸ“… Result: {calendar_response['details']}")
# Step 4: Convert response to voice (would use TTS service)
print("πŸ”Š Converting response to voice...")
print(" (This would use the TTS service with MCP)")
return {
"user_input": user_text,
"calendar_action": calendar_response,
"processing_time": "Total: 2.3s (STT: 1.1s, LLM: 0.8s, TTS: 0.4s)"
}
async def demonstrate_batch_processing():
"""Demonstrate batch audio processing"""
print("\\n" + "="*60)
print("πŸ“¦ Demonstrating Batch Audio Processing")
print("="*60)
client = MockMCPClient("python app.py --mcp-only")
# Simulate batch processing multiple audio files
audio_files = [
"/path/to/meeting_recording_1.wav",
"/path/to/meeting_recording_2.wav",
"/path/to/voice_memo_1.wav"
]
print(f"🎡 Processing {len(audio_files)} audio files...")
# This would call the actual batch transcribe tool
print("πŸ“ž Calling stt_batch_transcribe...")
print(f"πŸ“ Files: {', '.join([os.path.basename(f) for f in audio_files])}")
# Mock batch result
batch_result = {
"batch_status": f"πŸš€ Batch completed: {len(audio_files)} files in 3.5s",
"results": [
{
"file": "meeting_recording_1.wav",
"transcription": "Welcome everyone to today's meeting. Let's discuss the project updates.",
"status": "βœ… Transcribed 45.2s audio in 2.1s"
},
{
"file": "meeting_recording_2.wav",
"transcription": "The next milestone is scheduled for next Friday. Any questions?",
"status": "βœ… Transcribed 32.1s audio in 1.8s"
},
{
"file": "voice_memo_1.wav",
"transcription": "Remember to follow up with the client about the proposal.",
"status": "βœ… Transcribed 8.3s audio in 0.4s"
}
]
}
print(f"πŸ“Š {batch_result['batch_status']}")
print("πŸ“„ Results:")
for result in batch_result['results']:
print(f" πŸ“ {result['file']}")
print(f" πŸ“ '{result['transcription']}'")
print(f" ⚑ {result['status']}")
async def main():
"""Run all demonstrations"""
print("πŸš€ MCP-Enabled STT Service Integration Examples")
print("🎯 This demonstrates how to integrate the STT service via MCP")
# Run demonstrations
await demonstrate_stt_info()
await demonstrate_single_transcription()
await demonstrate_batch_processing()
# Voice calendar integration (main use case)
integration_result = await demonstrate_voice_calendar_integration()
# Summary
print("\\n" + "="*60)
print("πŸŽ‰ Integration Summary")
print("="*60)
print("πŸ”§ MCP Benefits Demonstrated:")
print(" - Direct tool integration without HTTP overhead")
print(" - Type-safe structured communication")
print(" - Real-time audio processing capabilities")
print(" - Seamless integration with voice workflows")
print("\\n🎯 Use Cases Covered:")
print(" - Single audio transcription")
print(" - Batch audio processing")
print(" - Voice-enabled calendar scheduling")
print(" - System information and monitoring")
print("\\nπŸš€ Voice Calendar Pipeline:")
print(f" πŸ‘€ User Input: Voice recording")
print(f" 🎀 STT Service: Convert to text via MCP")
print(f" πŸ€– ChatCal Agent: Process calendar request")
print(f" πŸ”Š TTS Service: Convert response to voice")
print(f" ⚑ Total Processing: Sub-3 second response")
print("\\nπŸ“ Next Steps:")
print(" 1. Deploy STT service with MCP to Hugging Face")
print(" 2. Update ChatCal to use MCP client")
print(" 3. Add TTS service with MCP capabilities")
print(" 4. Implement voice-first user interface")
if __name__ == "__main__":
asyncio.run(main())