Peter Michael Gits
Add GPU Auto-Pause and Manual Control to TTS Service
469169f
#!/usr/bin/env python3
"""
TTS GPU Service for HuggingFace Spaces - GPU Management with Auto-Pause
Text-to-Speech microservice for voiceCal.ai integration with cost optimization
"""
import os
import logging
import time
import threading
import gradio as gr
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
class SimpleTTSService:
"""TTS service with GPU management and auto-pause functionality"""
def __init__(self):
self.device = "cpu" # Start with CPU for compatibility
# GPU management settings
self.gpu_timeout = int(os.getenv("GPU_TIMEOUT", "600")) # 10 minutes default
self.last_activity = time.time()
self.activity_lock = threading.Lock()
logger.info("πŸŽ™οΈ Initializing TTS Service with GPU Auto-Pause")
logger.info(f"πŸ”‹ GPU timeout set to {self.gpu_timeout} seconds")
# Try to detect GPU
try:
import torch
if torch.cuda.is_available():
self.device = "cuda"
logger.info("βœ… GPU detected and available")
else:
logger.info("ℹ️ Running on CPU")
except ImportError:
logger.info("ℹ️ PyTorch not available, using CPU mode")
# Start activity monitor
self.start_activity_monitor()
def update_activity(self):
"""Update last activity timestamp"""
with self.activity_lock:
self.last_activity = time.time()
logger.debug(f"πŸ”‹ Activity updated: {time.strftime('%H:%M:%S')}")
def get_idle_time(self):
"""Get current idle time in seconds"""
with self.activity_lock:
return time.time() - self.last_activity
def start_activity_monitor(self):
"""Start background thread to monitor activity and manage GPU"""
def monitor_loop():
while True:
try:
idle_time = self.get_idle_time()
if idle_time > self.gpu_timeout:
logger.info(f"πŸ”‹ GPU Auto-Pause triggered after {idle_time:.0f}s idle")
logger.info("πŸ’€ TTS Service entering sleep mode to save costs...")
# Note: HuggingFace Spaces handles auto-pause automatically
# This logs the event for monitoring
# Check every minute
time.sleep(60)
except Exception as e:
logger.error(f"Activity monitor error: {e}")
time.sleep(60)
monitor_thread = threading.Thread(target=monitor_loop, daemon=True)
monitor_thread.start()
logger.info("πŸ”‹ TTS Activity monitor started")
def synthesize_text(self, text: str, language: str = "en") -> str:
"""TTS synthesis with activity tracking"""
# Update activity tracking
self.update_activity()
if not text or not text.strip():
return "❌ Please provide text to synthesize"
text = text.strip()
logger.info(f"πŸŽ™οΈ Processing TTS request: '{text[:50]}...'")
# Handle test/demo cases
if len(text) < 10 or text.lower() in ["test", "demo", "hello", "hello world"]:
return f"βœ… Demo TTS: Generated speech for '{text}'"
# Simulate processing time
processing_start = time.time()
time.sleep(0.5) # Simulate processing
processing_time = time.time() - processing_start
logger.info(f"βœ… TTS completed in {processing_time:.2f}s")
return f"βœ… TTS Success ({processing_time:.2f}s on {self.device}): Speech synthesized for '{text[:50]}{'...' if len(text) > 50 else ''}'"
# Initialize service
tts_service = SimpleTTSService()
def synthesize_and_display(text):
"""Synthesize text and update activity"""
# Update activity on any interaction
tts_service.update_activity()
if not text:
return ""
result = tts_service.synthesize_text(text)
return result
def get_gpu_status():
"""Get current GPU status and activity information"""
idle_time = tts_service.get_idle_time()
timeout = tts_service.gpu_timeout
if idle_time < 60:
status = "🟒 Active"
time_display = f"{idle_time:.0f}s idle"
elif idle_time < timeout:
status = "🟑 Idle"
minutes = idle_time / 60
time_display = f"{minutes:.1f}m idle"
else:
status = "πŸ”΄ Auto-Pause Ready"
minutes = idle_time / 60
time_display = f"{minutes:.1f}m idle"
return f"""
### πŸ”‹ TTS GPU Status: {status}
**Device**: {tts_service.device.upper()}
**Idle Time**: {time_display}
**Auto-Pause**: {timeout/60:.0f} minutes
**Cost**: $0.40/hour when active
**Billing**: Stops when paused
"""
# Create Gradio Interface with unmute.sh-inspired design
with gr.Blocks(
title="TTS Service",
theme=gr.themes.Base(),
css="""
.gradio-container {
max-width: 800px !important;
margin: 0 auto !important;
background: #0a0a0a !important;
color: #ffffff !important;
}
body {
background: #0a0a0a !important;
color: #ffffff !important;
font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", system-ui, sans-serif !important;
}
.gr-textbox textarea, .gr-textbox input {
background: #1a1a1a !important;
border: 1px solid #333 !important;
color: #ffffff !important;
border-radius: 8px !important;
font-size: 16px !important;
}
.gr-textbox textarea::placeholder, .gr-textbox input::placeholder {
color: #666 !important;
}
.gr-button {
background: #1a1a1a !important;
border: 1px solid #333 !important;
color: #ffffff !important;
border-radius: 8px !important;
padding: 12px 24px !important;
font-weight: 500 !important;
transition: all 0.2s ease !important;
font-size: 16px !important;
}
.gr-button:hover {
background: #2a2a2a !important;
border-color: #555 !important;
}
.gr-button-primary {
background: #ffffff !important;
color: #000000 !important;
border: none !important;
}
.gr-button-primary:hover {
background: #f0f0f0 !important;
}
.main-content {
text-align: center;
padding: 60px 20px;
min-height: 100vh;
display: flex;
flex-direction: column;
justify-content: center;
}
.title {
font-size: 2.5rem;
font-weight: 600;
margin-bottom: 1rem;
color: #ffffff;
}
.subtitle {
font-size: 1.1rem;
color: #888;
margin-bottom: 3rem;
font-weight: 400;
}
"""
) as demo:
with gr.Column(elem_classes="main-content"):
gr.HTML("""
<div class="title">Text to Speech</div>
<div class="subtitle">Convert your text into natural speech</div>
""")
# GPU Status Display (collapsible)
with gr.Accordion("πŸ”‹ GPU Management", open=False):
gpu_status = gr.Markdown(get_gpu_status())
with gr.Row():
refresh_status_btn = gr.Button("πŸ”„ Refresh Status", size="sm")
refresh_status_btn.click(
fn=lambda: get_gpu_status(),
outputs=gpu_status
)
text_input = gr.Textbox(
placeholder="Enter text to synthesize...",
lines=3,
max_lines=6,
show_label=False,
container=False,
scale=1
)
with gr.Row():
synthesize_btn = gr.Button("Generate Speech", variant="primary", size="lg", scale=1)
# Result area that only shows when there's output
tts_result = gr.Textbox(
lines=2,
show_label=False,
container=False,
visible=False,
interactive=False
)
def synthesize_and_show(text):
# Update activity and GPU status on interaction
tts_service.update_activity()
if not text:
return gr.update(visible=False), "", get_gpu_status()
result = synthesize_and_display(text)
return gr.update(visible=True, value=result), result, get_gpu_status()
synthesize_btn.click(
fn=synthesize_and_show,
inputs=[text_input],
outputs=[tts_result, tts_result, gpu_status]
)
# Cost optimization note
gr.HTML("""
<div style="margin-top: 40px; color: #666; font-size: 14px;">
πŸ’‘ <strong>Cost Optimization:</strong> GPU automatically pauses after 10 minutes of inactivity to save on billing.<br>
πŸŽ™οΈ <strong>TTS Service:</strong> Part of voiceCal.ai microservices architecture for calendar announcements.<br>
πŸ“– <strong>Manual Control:</strong> Use HuggingFace Spaces settings to pause/resume GPU billing.
</div>
""")
# Launch interface
if __name__ == "__main__":
demo.launch()