""" Unified Entry Point - API Architecture This file now uses a unified API-based architecture for all deployments. Both local development and Hugging Face Spaces use the same API layer. Architecture: 1. Starts API server in background (subprocess) 2. Starts Gradio UI that connects to the API 3. Everything goes through HTTP/REST Benefits: - Single code path to maintain - Consistent behavior everywhere - Easy to test and debug - Proper separation of concerns Usage: python app.py The script will automatically: - Start the API server on http://localhost:8000 - Start the Gradio UI on http://localhost:7860 """ import os os.environ['PYTORCH_ENABLE_MPS_FALLBACK'] = '1' import subprocess import time import sys import signal import requests from functools import partial # Use shared UI components from ui.shared_interface import create_interface from ui.detection_wrapper import detect_with_api # Configuration API_HOST = os.getenv("API_HOST", "0.0.0.0") API_PORT = int(os.getenv("API_PORT", "8000")) API_URL = f"http://localhost:{API_PORT}" UI_HOST = os.getenv("GRADIO_SERVER_NAME", "0.0.0.0") UI_PORT = int(os.getenv("GRADIO_SERVER_PORT", "7860")) def start_api_server(): """Start the API server in a subprocess""" print("šŸš€ Starting API server...") # Start API server as subprocess api_process = subprocess.Popen( [sys.executable, "app_api.py"], env={**os.environ, "UVICORN_HOST": API_HOST, "UVICORN_PORT": str(API_PORT)}, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True, bufsize=1 ) # Wait for API to be ready max_wait = 60 # seconds wait_interval = 0.5 elapsed = 0 print(f"ā³ Waiting for API server at {API_URL}...") while elapsed < max_wait: try: response = requests.get(f"{API_URL}/health", timeout=2) if response.status_code == 200: print(f"āœ… API server ready at {API_URL}") # Optional: Warmup models to avoid timeout on first request # This is especially useful for CPU-only environments warmup_enabled = os.getenv("CU1_WARMUP_MODELS", "true").lower() in {"1", "true", "yes", "y"} if warmup_enabled: print("šŸ”„ Warming up models (this may take 1-3 minutes on first run)...") try: warmup_timeout = int(os.getenv("CU1_WARMUP_TIMEOUT", "180")) # 3 minutes default warmup_response = requests.post(f"{API_URL}/warmup", timeout=warmup_timeout) if warmup_response.status_code == 200: print("āœ… Models warmed up successfully!") else: print(f"āš ļø Warmup returned status {warmup_response.status_code}, continuing anyway...") except requests.exceptions.Timeout: print("āš ļø Warmup timed out, but API is ready. First request may be slower.") except requests.exceptions.RequestException as e: print(f"āš ļø Warmup failed: {e}, but API is ready. First request may be slower.") return api_process except requests.exceptions.RequestException: pass time.sleep(wait_interval) elapsed += wait_interval # Check if process died if api_process.poll() is not None: print("āŒ API server failed to start!") print("\nAPI server output:") if api_process.stdout: print(api_process.stdout.read()) sys.exit(1) print(f"āŒ API server did not start within {max_wait} seconds") api_process.terminate() sys.exit(1) def main(): """Main entry point - Unified API architecture""" print("=" * 70) print("šŸŽÆ CU-1 UI Element Detector - Unified API Mode") print("=" * 70) print("\nšŸ“” Architecture: All traffic goes through API layer") print(f" - API Server: {API_URL}") print(f" - Gradio UI: http://localhost:{UI_PORT}") print("\nšŸ—ļø Benefits:") print(" - Single code path (easier to maintain)") print(" - Consistent behavior everywhere") print(" - Proper microservices architecture") print("=" * 70 + "\n") # Start API server in background api_process = start_api_server() # Setup cleanup on exit def cleanup(signum=None, frame=None): print("\n\nšŸ›‘ Shutting down...") if api_process and api_process.poll() is None: print(" Stopping API server...") api_process.terminate() try: api_process.wait(timeout=5) except subprocess.TimeoutExpired: api_process.kill() print(" Goodbye! šŸ‘‹") sys.exit(0) signal.signal(signal.SIGINT, cleanup) signal.signal(signal.SIGTERM, cleanup) try: # Create Gradio interface with API detection function detection_fn = partial(detect_with_api, api_url=API_URL) demo = create_interface( detection_fn=detection_fn, title_suffix="Unified API Mode", show_api_info=True, api_url=API_URL ) print(f"\nšŸŽØ Starting Gradio UI on http://localhost:{UI_PORT}...\n") # Launch Gradio with automatic port fallback # API is automatically exposed at /api/predict for HF Spaces # Configure queue with longer timeout for CPU processing and model loading try: demo.queue( max_size=10, # Allow up to 10 queued requests default_concurrency_limit=1 # Process one at a time to avoid memory issues ).launch( server_name=UI_HOST, server_port=UI_PORT, share=False, max_threads=1 # Single thread to avoid memory issues ) except OSError as e: if "Cannot find empty port" in str(e): print(f"āš ļø Port {UI_PORT} is busy, trying to find a free port...") demo.queue( max_size=10, default_concurrency_limit=1 ).launch( server_name=UI_HOST, server_port=None, # Auto-select free port share=False, max_threads=1 ) else: raise except KeyboardInterrupt: cleanup() except Exception as e: print(f"\nāŒ Error: {e}") cleanup() finally: cleanup() if __name__ == "__main__": main()