"""
Unified Entry Point - API Architecture

This file now uses a unified API-based architecture for all deployments.
Both local development and Hugging Face Spaces use the same API layer.

Architecture:
    1. Starts API server in background (subprocess)
    2. Starts Gradio UI that connects to the API
    3. Everything goes through HTTP/REST

Benefits:
    - Single code path to maintain
    - Consistent behavior everywhere
    - Easy to test and debug
    - Proper separation of concerns

Usage:
    python app.py
    
The script will automatically:
    - Start the API server on http://localhost:8000
    - Start the Gradio UI on http://localhost:7860
"""

import os
os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1'

import subprocess
import time
import sys
import signal
import requests
from functools import partial

# Use shared UI components
from ui.shared_interface import create_interface
from ui.detection_wrapper import detect_with_api


# Configuration
API_HOST = os.getenv("API_HOST", "0.0.0.0")
API_PORT = int(os.getenv("API_PORT", "8000"))
API_URL = f"http://localhost:{API_PORT}"

UI_HOST = os.getenv("GRADIO_SERVER_NAME", "0.0.0.0")
UI_PORT = int(os.getenv("GRADIO_SERVER_PORT", "7860"))


def start_api_server():
    """Start the API server in a subprocess"""
    print("🚀 Starting API server...")
    
    # Start API server as subprocess
    api_process = subprocess.Popen(
        [sys.executable, "app_api.py"],
        env={**os.environ, "UVICORN_HOST": API_HOST, "UVICORN_PORT": str(API_PORT)},
        stdout=subprocess.PIPE,
        stderr=subprocess.STDOUT,
        text=True,
        bufsize=1
    )
    
    # Wait for API to be ready
    max_wait = 60  # seconds
    wait_interval = 0.5
    elapsed = 0
    
    print(f"⏳ Waiting for API server at {API_URL}...")
    
    while elapsed < max_wait:
        try:
            response = requests.get(f"{API_URL}/health", timeout=2)
            if response.status_code == 200:
                print(f"✅ API server ready at {API_URL}")
                
                # Optional: Warmup models to avoid timeout on first request
                # This is especially useful for CPU-only environments
                warmup_enabled = os.getenv("CU1_WARMUP_MODELS", "true").lower() in {"1", "true", "yes", "y"}
                if warmup_enabled:
                    print("🔥 Warming up models (this may take 1-3 minutes on first run)...")
                    try:
                        warmup_timeout = int(os.getenv("CU1_WARMUP_TIMEOUT", "180"))  # 3 minutes default
                        warmup_response = requests.post(f"{API_URL}/warmup", timeout=warmup_timeout)
                        if warmup_response.status_code == 200:
                            print("✅ Models warmed up successfully!")
                        else:
                            print(f"⚠️  Warmup returned status {warmup_response.status_code}, continuing anyway...")
                    except requests.exceptions.Timeout:
                        print("⚠️  Warmup timed out, but API is ready. First request may be slower.")
                    except requests.exceptions.RequestException as e:
                        print(f"⚠️  Warmup failed: {e}, but API is ready. First request may be slower.")
                
                return api_process
        except requests.exceptions.RequestException:
            pass
        
        time.sleep(wait_interval)
        elapsed += wait_interval
        
        # Check if process died
        if api_process.poll() is not None:
            print("❌ API server failed to start!")
            print("\nAPI server output:")
            if api_process.stdout:
                print(api_process.stdout.read())
            sys.exit(1)
    
    print(f"❌ API server did not start within {max_wait} seconds")
    api_process.terminate()
    sys.exit(1)


def main():
    """Main entry point - Unified API architecture"""
    
    print("=" * 70)
    print("🎯 CU-1 UI Element Detector - Unified API Mode")
    print("=" * 70)
    print("\n📡 Architecture: All traffic goes through API layer")
    print(f"   - API Server: {API_URL}")
    print(f"   - Gradio UI: http://localhost:{UI_PORT}")
    print("\n🏗️  Benefits:")
    print("   - Single code path (easier to maintain)")
    print("   - Consistent behavior everywhere")
    print("   - Proper microservices architecture")
    print("=" * 70 + "\n")
    
    # Start API server in background
    api_process = start_api_server()
    
    # Setup cleanup on exit
    def cleanup(signum=None, frame=None):
        print("\n\n🛑 Shutting down...")
        if api_process and api_process.poll() is None:
            print("   Stopping API server...")
            api_process.terminate()
            try:
                api_process.wait(timeout=5)
            except subprocess.TimeoutExpired:
                api_process.kill()
        print("   Goodbye! 👋")
        sys.exit(0)
    
    signal.signal(signal.SIGINT, cleanup)
    signal.signal(signal.SIGTERM, cleanup)
    
    try:
        # Create Gradio interface with API detection function
        detection_fn = partial(detect_with_api, api_url=API_URL)
        
        demo = create_interface(
            detection_fn=detection_fn,
            title_suffix="Unified API Mode",
            show_api_info=True,
            api_url=API_URL
        )
        
        print(f"\n🎨 Starting Gradio UI on http://localhost:{UI_PORT}...\n")
        
        # Launch Gradio with automatic port fallback
        # API is automatically exposed at /api/predict for HF Spaces
        # Configure queue with longer timeout for CPU processing and model loading
        try:
            demo.queue(
                max_size=10,  # Allow up to 10 queued requests
                default_concurrency_limit=1  # Process one at a time to avoid memory issues
            ).launch(
                server_name=UI_HOST,
                server_port=UI_PORT,
                share=False,
                max_threads=1  # Single thread to avoid memory issues
            )
        except OSError as e:
            if "Cannot find empty port" in str(e):
                print(f"⚠️  Port {UI_PORT} is busy, trying to find a free port...")
                demo.queue(
                    max_size=10,
                    default_concurrency_limit=1
                ).launch(
                    server_name=UI_HOST,
                    server_port=None,  # Auto-select free port
                    share=False,
                    max_threads=1
                )
            else:
                raise
    except KeyboardInterrupt:
        cleanup()
    except Exception as e:
        print(f"\n❌ Error: {e}")
        cleanup()
    finally:
        cleanup()


if __name__ == "__main__":
    main()