| |
| """ |
| Service Health Monitor API |
| Real-time monitoring of all API services and data providers |
| Shows status, response times, success rates, and health metrics |
| """ |
|
|
| from fastapi import APIRouter, HTTPException |
| from pydantic import BaseModel |
| from typing import Dict, Any, List, Optional |
| from datetime import datetime |
| import logging |
| import asyncio |
| import httpx |
| import time |
|
|
| logger = logging.getLogger(__name__) |
|
|
| router = APIRouter(prefix="/api/health", tags=["Health Monitor"]) |
|
|
|
|
| class ServiceStatus(BaseModel): |
| """Service status model""" |
| name: str |
| status: str |
| response_time_ms: Optional[float] = None |
| last_check: str |
| last_error: Optional[str] = None |
| success_rate: Optional[float] = None |
| details: Optional[Dict[str, Any]] = None |
|
|
|
|
| class HealthMonitorResponse(BaseModel): |
| """Health monitor response""" |
| timestamp: str |
| total_services: int |
| online: int |
| offline: int |
| rate_limited: int |
| degraded: int |
| services: List[ServiceStatus] |
| overall_health: str |
|
|
|
|
| |
| SERVICES_CONFIG = { |
| "crypto_api_clean": { |
| "name": "Crypto API Clean", |
| "category": "Resource Database", |
| "endpoint": "https://really-amin-crypto-api-clean-fixed.hf.space/api/resources/stats", |
| "timeout": 10, |
| "sub_services": ["rpc_nodes (24)", "block_explorers (33)", "market_data_apis (33)", "news_apis (17)", "sentiment_apis (14)"], |
| "description": "281+ cryptocurrency resources across 12 categories", |
| "priority": 2 |
| }, |
| "crypto_dt_source": { |
| "name": "Crypto DT Source", |
| "category": "Unified Data API", |
| "endpoint": "https://crypto-dt-source.onrender.com/api/v1/status", |
| "timeout": 15, |
| "sub_services": ["prices", "klines", "sentiment", "models", "datasets"], |
| "description": "Unified API v2.0.0 with 4 AI models and 5 datasets", |
| "priority": 2 |
| }, |
| "coingecko": { |
| "name": "CoinGecko", |
| "category": "Data Provider", |
| "endpoint": "https://api.coingecko.com/api/v3/ping", |
| "timeout": 5, |
| "sub_services": ["prices", "market_data", "ohlcv"] |
| }, |
| "binance": { |
| "name": "Binance", |
| "category": "Exchange", |
| "endpoint": "https://api.binance.com/api/v3/ping", |
| "timeout": 5, |
| "sub_services": ["spot", "futures", "websocket"] |
| }, |
| "coincap": { |
| "name": "CoinCap", |
| "category": "Data Provider", |
| "endpoint": "https://api.coincap.io/v2/assets/bitcoin", |
| "timeout": 5, |
| "sub_services": ["assets", "markets", "rates"] |
| }, |
| "cryptocompare": { |
| "name": "CryptoCompare", |
| "category": "Data Provider", |
| "endpoint": "https://min-api.cryptocompare.com/data/price?fsym=BTC&tsyms=USD", |
| "timeout": 5, |
| "sub_services": ["price", "historical", "social"] |
| }, |
| "huggingface": { |
| "name": "HuggingFace Space", |
| "category": "Internal", |
| "endpoint": "/api/health/self", |
| "timeout": 3, |
| "sub_services": ["api", "websocket", "database"], |
| "internal": True |
| }, |
| "backend_indicators": { |
| "name": "Technical Indicators", |
| "category": "Internal", |
| "endpoint": "/api/indicators/services", |
| "timeout": 3, |
| "sub_services": ["rsi", "macd", "bollinger_bands", "comprehensive"], |
| "internal": True |
| }, |
| "backend_market": { |
| "name": "Market Data API", |
| "category": "Internal", |
| "endpoint": "/api/market/crypto/list", |
| "timeout": 3, |
| "sub_services": ["prices", "ohlcv", "tickers"], |
| "internal": True |
| } |
| } |
|
|
|
|
| def get_base_url() -> str: |
| """Get the base URL for internal services""" |
| import os |
| |
| if os.getenv("SPACE_ID"): |
| return f"https://{os.getenv('SPACE_ID')}.hf.space" |
| |
| return "http://localhost:7860" |
|
|
|
|
| async def check_service_health(service_id: str, config: Dict[str, Any]) -> ServiceStatus: |
| """ |
| Check the health of a single service |
| """ |
| start_time = time.time() |
| |
| try: |
| |
| endpoint = config["endpoint"] |
| if config.get("internal", False): |
| base_url = get_base_url() |
| endpoint = f"{base_url}{endpoint}" if not endpoint.startswith("http") else endpoint |
| |
| async with httpx.AsyncClient() as client: |
| response = await client.get( |
| endpoint, |
| timeout=config.get("timeout", 5), |
| follow_redirects=True |
| ) |
| |
| response_time = (time.time() - start_time) * 1000 |
| |
| |
| if response.status_code == 200: |
| status = "online" |
| elif response.status_code == 429: |
| status = "rate_limited" |
| elif 500 <= response.status_code < 600: |
| status = "degraded" |
| else: |
| status = "offline" |
| |
| |
| success_rate = 100.0 if status == "online" else 50.0 if status == "degraded" else 0.0 |
| |
| return ServiceStatus( |
| name=config["name"], |
| status=status, |
| response_time_ms=round(response_time, 2), |
| last_check=datetime.utcnow().isoformat() + "Z", |
| last_error=None if status == "online" else f"HTTP {response.status_code}", |
| success_rate=success_rate, |
| details={ |
| "category": config.get("category", "Unknown"), |
| "sub_services": config.get("sub_services", []), |
| "http_status": response.status_code |
| } |
| ) |
| |
| except asyncio.TimeoutError: |
| return ServiceStatus( |
| name=config["name"], |
| status="offline", |
| response_time_ms=config.get("timeout", 5) * 1000, |
| last_check=datetime.utcnow().isoformat() + "Z", |
| last_error="Request timeout", |
| success_rate=0.0, |
| details={ |
| "category": config.get("category", "Unknown"), |
| "sub_services": config.get("sub_services", []), |
| "error_type": "timeout" |
| } |
| ) |
| except httpx.ConnectError as e: |
| return ServiceStatus( |
| name=config["name"], |
| status="offline", |
| response_time_ms=None, |
| last_check=datetime.utcnow().isoformat() + "Z", |
| last_error=f"Connection failed: {str(e)[:100]}", |
| success_rate=0.0, |
| details={ |
| "category": config.get("category", "Unknown"), |
| "sub_services": config.get("sub_services", []), |
| "error_type": "connection_error" |
| } |
| ) |
| except Exception as e: |
| logger.error(f"Error checking {service_id}: {e}") |
| return ServiceStatus( |
| name=config["name"], |
| status="offline", |
| response_time_ms=None, |
| last_check=datetime.utcnow().isoformat() + "Z", |
| last_error=str(e)[:100], |
| success_rate=0.0, |
| details={ |
| "category": config.get("category", "Unknown"), |
| "sub_services": config.get("sub_services", []), |
| "error_type": "unknown_error" |
| } |
| ) |
|
|
|
|
| @router.get("/monitor", response_model=HealthMonitorResponse) |
| async def get_service_health(): |
| """ |
| Get health status of all services |
| Returns real-time status of all API providers and internal services |
| """ |
| try: |
| |
| tasks = [ |
| check_service_health(service_id, config) |
| for service_id, config in SERVICES_CONFIG.items() |
| ] |
| |
| services = await asyncio.gather(*tasks) |
| |
| |
| online = sum(1 for s in services if s.status == "online") |
| offline = sum(1 for s in services if s.status == "offline") |
| rate_limited = sum(1 for s in services if s.status == "rate_limited") |
| degraded = sum(1 for s in services if s.status == "degraded") |
| |
| |
| total = len(services) |
| if online == total: |
| overall_health = "healthy" |
| elif online >= total * 0.7: |
| overall_health = "degraded" |
| else: |
| overall_health = "critical" |
| |
| return HealthMonitorResponse( |
| timestamp=datetime.utcnow().isoformat() + "Z", |
| total_services=total, |
| online=online, |
| offline=offline, |
| rate_limited=rate_limited, |
| degraded=degraded, |
| services=services, |
| overall_health=overall_health |
| ) |
| |
| except Exception as e: |
| logger.error(f"Health monitor error: {e}") |
| raise HTTPException(status_code=500, detail=f"Failed to check service health: {str(e)}") |
|
|
|
|
| @router.get("/self") |
| async def health_check(): |
| """ |
| Simple health check endpoint for this service |
| """ |
| return { |
| "status": "healthy", |
| "service": "crypto-intelligence-hub", |
| "timestamp": datetime.utcnow().isoformat() + "Z", |
| "version": "1.0.0" |
| } |
|
|
|
|
| @router.get("/services") |
| async def list_monitored_services(): |
| """ |
| List all monitored services with their configuration |
| """ |
| return { |
| "success": True, |
| "total_services": len(SERVICES_CONFIG), |
| "services": [ |
| { |
| "id": service_id, |
| "name": config["name"], |
| "category": config.get("category", "Unknown"), |
| "sub_services": config.get("sub_services", []) |
| } |
| for service_id, config in SERVICES_CONFIG.items() |
| ], |
| "timestamp": datetime.utcnow().isoformat() + "Z" |
| } |
|
|