Spaces:

pgits
/

pgits-tts-gpu-service

Runtime error

Peter Michael Gits

Add GPU Auto-Pause and Manual Control to TTS Service

469169f 8 months ago

9.44 kB

	#!/usr/bin/env python3
	"""
	TTS GPU Service for HuggingFace Spaces - GPU Management with Auto-Pause
	Text-to-Speech microservice for voiceCal.ai integration with cost optimization
	"""

	import os
	import logging
	import time
	import threading
	import gradio as gr

	# Configure logging
	logging.basicConfig(level=logging.INFO)
	logger = logging.getLogger(__name__)

	class SimpleTTSService:
	"""TTS service with GPU management and auto-pause functionality"""

	def __init__(self):
	self.device = "cpu" # Start with CPU for compatibility

	# GPU management settings
	self.gpu_timeout = int(os.getenv("GPU_TIMEOUT", "600")) # 10 minutes default
	self.last_activity = time.time()
	self.activity_lock = threading.Lock()

	logger.info("🎙️ Initializing TTS Service with GPU Auto-Pause")
	logger.info(f"🔋 GPU timeout set to {self.gpu_timeout} seconds")

	# Try to detect GPU
	try:
	import torch
	if torch.cuda.is_available():
	self.device = "cuda"
	logger.info("✅ GPU detected and available")
	else:
	logger.info("ℹ️ Running on CPU")
	except ImportError:
	logger.info("ℹ️ PyTorch not available, using CPU mode")

	# Start activity monitor
	self.start_activity_monitor()

	def update_activity(self):
	"""Update last activity timestamp"""
	with self.activity_lock:
	self.last_activity = time.time()
	logger.debug(f"🔋 Activity updated: {time.strftime('%H:%M:%S')}")

	def get_idle_time(self):
	"""Get current idle time in seconds"""
	with self.activity_lock:
	return time.time() - self.last_activity

	def start_activity_monitor(self):
	"""Start background thread to monitor activity and manage GPU"""
	def monitor_loop():
	while True:
	try:
	idle_time = self.get_idle_time()

	if idle_time > self.gpu_timeout:
	logger.info(f"🔋 GPU Auto-Pause triggered after {idle_time:.0f}s idle")
	logger.info("💤 TTS Service entering sleep mode to save costs...")
	# Note: HuggingFace Spaces handles auto-pause automatically
	# This logs the event for monitoring

	# Check every minute
	time.sleep(60)

	except Exception as e:
	logger.error(f"Activity monitor error: {e}")
	time.sleep(60)

	monitor_thread = threading.Thread(target=monitor_loop, daemon=True)
	monitor_thread.start()
	logger.info("🔋 TTS Activity monitor started")

	def synthesize_text(self, text: str, language: str = "en") -> str:
	"""TTS synthesis with activity tracking"""
	# Update activity tracking
	self.update_activity()

	if not text or not text.strip():
	return "❌ Please provide text to synthesize"

	text = text.strip()

	logger.info(f"🎙️ Processing TTS request: '{text[:50]}...'")

	# Handle test/demo cases
	if len(text) < 10 or text.lower() in ["test", "demo", "hello", "hello world"]:
	return f"✅ Demo TTS: Generated speech for '{text}'"

	# Simulate processing time
	processing_start = time.time()
	time.sleep(0.5) # Simulate processing
	processing_time = time.time() - processing_start

	logger.info(f"✅ TTS completed in {processing_time:.2f}s")

	return f"✅ TTS Success ({processing_time:.2f}s on {self.device}): Speech synthesized for '{text[:50]}{'...' if len(text) > 50 else ''}'"

	# Initialize service
	tts_service = SimpleTTSService()

	def synthesize_and_display(text):
	"""Synthesize text and update activity"""
	# Update activity on any interaction
	tts_service.update_activity()

	if not text:
	return ""

	result = tts_service.synthesize_text(text)
	return result

	def get_gpu_status():
	"""Get current GPU status and activity information"""
	idle_time = tts_service.get_idle_time()
	timeout = tts_service.gpu_timeout

	if idle_time < 60:
	status = "🟢 Active"
	time_display = f"{idle_time:.0f}s idle"
	elif idle_time < timeout:
	status = "🟡 Idle"
	minutes = idle_time / 60
	time_display = f"{minutes:.1f}m idle"
	else:
	status = "🔴 Auto-Pause Ready"
	minutes = idle_time / 60
	time_display = f"{minutes:.1f}m idle"

	return f"""
	### 🔋 TTS GPU Status: {status}
	Device: {tts_service.device.upper()}
	Idle Time: {time_display}
	Auto-Pause: {timeout/60:.0f} minutes
	Cost: $0.40/hour when active
	Billing: Stops when paused
	"""

	# Create Gradio Interface with unmute.sh-inspired design
	with gr.Blocks(
	title="TTS Service",
	theme=gr.themes.Base(),
	css="""
	.gradio-container {
	max-width: 800px !important;
	margin: 0 auto !important;
	background: #0a0a0a !important;
	color: #ffffff !important;
	}
	body {
	background: #0a0a0a !important;
	color: #ffffff !important;
	font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", system-ui, sans-serif !important;
	}
	.gr-textbox textarea, .gr-textbox input {
	background: #1a1a1a !important;
	border: 1px solid #333 !important;
	color: #ffffff !important;
	border-radius: 8px !important;
	font-size: 16px !important;
	}
	.gr-textbox textarea::placeholder, .gr-textbox input::placeholder {
	color: #666 !important;
	}
	.gr-button {
	background: #1a1a1a !important;
	border: 1px solid #333 !important;
	color: #ffffff !important;
	border-radius: 8px !important;
	padding: 12px 24px !important;
	font-weight: 500 !important;
	transition: all 0.2s ease !important;
	font-size: 16px !important;
	}
	.gr-button:hover {
	background: #2a2a2a !important;
	border-color: #555 !important;
	}
	.gr-button-primary {
	background: #ffffff !important;
	color: #000000 !important;
	border: none !important;
	}
	.gr-button-primary:hover {
	background: #f0f0f0 !important;
	}
	.main-content {
	text-align: center;
	padding: 60px 20px;
	min-height: 100vh;
	display: flex;
	flex-direction: column;
	justify-content: center;
	}
	.title {
	font-size: 2.5rem;
	font-weight: 600;
	margin-bottom: 1rem;
	color: #ffffff;
	}
	.subtitle {
	font-size: 1.1rem;
	color: #888;
	margin-bottom: 3rem;
	font-weight: 400;
	}
	"""
	) as demo:

	with gr.Column(elem_classes="main-content"):
	gr.HTML("""
	<div class="title">Text to Speech</div>
	<div class="subtitle">Convert your text into natural speech</div>
	""")

	# GPU Status Display (collapsible)
	with gr.Accordion("🔋 GPU Management", open=False):
	gpu_status = gr.Markdown(get_gpu_status())

	with gr.Row():
	refresh_status_btn = gr.Button("🔄 Refresh Status", size="sm")

	refresh_status_btn.click(
	fn=lambda: get_gpu_status(),
	outputs=gpu_status
	)

	text_input = gr.Textbox(
	placeholder="Enter text to synthesize...",
	lines=3,
	max_lines=6,
	show_label=False,
	container=False,
	scale=1
	)

	with gr.Row():
	synthesize_btn = gr.Button("Generate Speech", variant="primary", size="lg", scale=1)

	# Result area that only shows when there's output
	tts_result = gr.Textbox(
	lines=2,
	show_label=False,
	container=False,
	visible=False,
	interactive=False
	)

	def synthesize_and_show(text):
	# Update activity and GPU status on interaction
	tts_service.update_activity()

	if not text:
	return gr.update(visible=False), "", get_gpu_status()

	result = synthesize_and_display(text)
	return gr.update(visible=True, value=result), result, get_gpu_status()

	synthesize_btn.click(
	fn=synthesize_and_show,
	inputs=[text_input],
	outputs=[tts_result, tts_result, gpu_status]
	)

	# Cost optimization note
	gr.HTML("""
	<div style="margin-top: 40px; color: #666; font-size: 14px;">
	💡 <strong>Cost Optimization:</strong> GPU automatically pauses after 10 minutes of inactivity to save on billing.<br>
	🎙️ <strong>TTS Service:</strong> Part of voiceCal.ai microservices architecture for calendar announcements.<br>
	📖 <strong>Manual Control:</strong> Use HuggingFace Spaces settings to pause/resume GPU billing.
	</div>
	""")

	# Launch interface
	if __name__ == "__main__":
	demo.launch()