#!/usr/bin/env python3
"""
TTS GPU Service for HuggingFace Spaces - GPU Management with Auto-Pause
Text-to-Speech microservice for voiceCal.ai integration with cost optimization
"""

import os
import logging
import time
import threading
import gradio as gr

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

class SimpleTTSService:
    """TTS service with GPU management and auto-pause functionality"""
    
    def __init__(self):
        self.device = "cpu"  # Start with CPU for compatibility
        
        # GPU management settings
        self.gpu_timeout = int(os.getenv("GPU_TIMEOUT", "600"))  # 10 minutes default
        self.last_activity = time.time()
        self.activity_lock = threading.Lock()
        
        logger.info("🎙️ Initializing TTS Service with GPU Auto-Pause")
        logger.info(f"🔋 GPU timeout set to {self.gpu_timeout} seconds")
        
        # Try to detect GPU
        try:
            import torch
            if torch.cuda.is_available():
                self.device = "cuda"
                logger.info("✅ GPU detected and available")
            else:
                logger.info("ℹ️ Running on CPU")
        except ImportError:
            logger.info("ℹ️ PyTorch not available, using CPU mode")
        
        # Start activity monitor
        self.start_activity_monitor()
    
    def update_activity(self):
        """Update last activity timestamp"""
        with self.activity_lock:
            self.last_activity = time.time()
            logger.debug(f"🔋 Activity updated: {time.strftime('%H:%M:%S')}")
    
    def get_idle_time(self):
        """Get current idle time in seconds"""
        with self.activity_lock:
            return time.time() - self.last_activity
    
    def start_activity_monitor(self):
        """Start background thread to monitor activity and manage GPU"""
        def monitor_loop():
            while True:
                try:
                    idle_time = self.get_idle_time()
                    
                    if idle_time > self.gpu_timeout:
                        logger.info(f"🔋 GPU Auto-Pause triggered after {idle_time:.0f}s idle")
                        logger.info("💤 TTS Service entering sleep mode to save costs...")
                        # Note: HuggingFace Spaces handles auto-pause automatically
                        # This logs the event for monitoring
                        
                    # Check every minute
                    time.sleep(60)
                    
                except Exception as e:
                    logger.error(f"Activity monitor error: {e}")
                    time.sleep(60)
        
        monitor_thread = threading.Thread(target=monitor_loop, daemon=True)
        monitor_thread.start()
        logger.info("🔋 TTS Activity monitor started")
    
    def synthesize_text(self, text: str, language: str = "en") -> str:
        """TTS synthesis with activity tracking"""
        # Update activity tracking
        self.update_activity()
        
        if not text or not text.strip():
            return "❌ Please provide text to synthesize"
        
        text = text.strip()
        
        logger.info(f"🎙️ Processing TTS request: '{text[:50]}...'")
        
        # Handle test/demo cases
        if len(text) < 10 or text.lower() in ["test", "demo", "hello", "hello world"]:
            return f"✅ Demo TTS: Generated speech for '{text}'"
        
        # Simulate processing time
        processing_start = time.time()
        time.sleep(0.5)  # Simulate processing
        processing_time = time.time() - processing_start
        
        logger.info(f"✅ TTS completed in {processing_time:.2f}s")
        
        return f"✅ TTS Success ({processing_time:.2f}s on {self.device}): Speech synthesized for '{text[:50]}{'...' if len(text) > 50 else ''}'"

# Initialize service
tts_service = SimpleTTSService()

def synthesize_and_display(text):
    """Synthesize text and update activity"""
    # Update activity on any interaction
    tts_service.update_activity()
    
    if not text:
        return ""
    
    result = tts_service.synthesize_text(text)
    return result

def get_gpu_status():
    """Get current GPU status and activity information"""
    idle_time = tts_service.get_idle_time()
    timeout = tts_service.gpu_timeout
    
    if idle_time < 60:
        status = "🟢 Active"
        time_display = f"{idle_time:.0f}s idle"
    elif idle_time < timeout:
        status = "🟡 Idle" 
        minutes = idle_time / 60
        time_display = f"{minutes:.1f}m idle"
    else:
        status = "🔴 Auto-Pause Ready"
        minutes = idle_time / 60
        time_display = f"{minutes:.1f}m idle"
    
    return f"""
### 🔋 TTS GPU Status: {status}
**Device**: {tts_service.device.upper()}  
**Idle Time**: {time_display}  
**Auto-Pause**: {timeout/60:.0f} minutes  
**Cost**: $0.40/hour when active  
**Billing**: Stops when paused  
    """

# Create Gradio Interface with unmute.sh-inspired design
with gr.Blocks(
    title="TTS Service",
    theme=gr.themes.Base(),
    css="""
    .gradio-container {
        max-width: 800px !important;
        margin: 0 auto !important;
        background: #0a0a0a !important;
        color: #ffffff !important;
    }
    body {
        background: #0a0a0a !important;
        color: #ffffff !important;
        font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", system-ui, sans-serif !important;
    }
    .gr-textbox textarea, .gr-textbox input {
        background: #1a1a1a !important;
        border: 1px solid #333 !important;
        color: #ffffff !important;
        border-radius: 8px !important;
        font-size: 16px !important;
    }
    .gr-textbox textarea::placeholder, .gr-textbox input::placeholder {
        color: #666 !important;
    }
    .gr-button {
        background: #1a1a1a !important;
        border: 1px solid #333 !important;
        color: #ffffff !important;
        border-radius: 8px !important;
        padding: 12px 24px !important;
        font-weight: 500 !important;
        transition: all 0.2s ease !important;
        font-size: 16px !important;
    }
    .gr-button:hover {
        background: #2a2a2a !important;
        border-color: #555 !important;
    }
    .gr-button-primary {
        background: #ffffff !important;
        color: #000000 !important;
        border: none !important;
    }
    .gr-button-primary:hover {
        background: #f0f0f0 !important;
    }
    .main-content {
        text-align: center;
        padding: 60px 20px;
        min-height: 100vh;
        display: flex;
        flex-direction: column;
        justify-content: center;
    }
    .title {
        font-size: 2.5rem;
        font-weight: 600;
        margin-bottom: 1rem;
        color: #ffffff;
    }
    .subtitle {
        font-size: 1.1rem;
        color: #888;
        margin-bottom: 3rem;
        font-weight: 400;
    }
    """
) as demo:
    
    with gr.Column(elem_classes="main-content"):
        gr.HTML("""
        <div class="title">Text to Speech</div>
        <div class="subtitle">Convert your text into natural speech</div>
        """)
        
        # GPU Status Display (collapsible)
        with gr.Accordion("🔋 GPU Management", open=False):
            gpu_status = gr.Markdown(get_gpu_status())
            
            with gr.Row():
                refresh_status_btn = gr.Button("🔄 Refresh Status", size="sm")
                
            refresh_status_btn.click(
                fn=lambda: get_gpu_status(),
                outputs=gpu_status
            )
        
        text_input = gr.Textbox(
            placeholder="Enter text to synthesize...",
            lines=3,
            max_lines=6,
            show_label=False,
            container=False,
            scale=1
        )
        
        with gr.Row():
            synthesize_btn = gr.Button("Generate Speech", variant="primary", size="lg", scale=1)
        
        # Result area that only shows when there's output
        tts_result = gr.Textbox(
            lines=2,
            show_label=False,
            container=False,
            visible=False,
            interactive=False
        )
        
        def synthesize_and_show(text):
            # Update activity and GPU status on interaction
            tts_service.update_activity()
            
            if not text:
                return gr.update(visible=False), "", get_gpu_status()
            
            result = synthesize_and_display(text)
            return gr.update(visible=True, value=result), result, get_gpu_status()
        
        synthesize_btn.click(
            fn=synthesize_and_show,
            inputs=[text_input],
            outputs=[tts_result, tts_result, gpu_status]
        )
        
        # Cost optimization note
        gr.HTML("""
        <div style="margin-top: 40px; color: #666; font-size: 14px;">
        💡 <strong>Cost Optimization:</strong> GPU automatically pauses after 10 minutes of inactivity to save on billing.<br>
        🎙️ <strong>TTS Service:</strong> Part of voiceCal.ai microservices architecture for calendar announcements.<br>
        📖 <strong>Manual Control:</strong> Use HuggingFace Spaces settings to pause/resume GPU billing.
        </div>
        """)

# Launch interface
if __name__ == "__main__":
    demo.launch()