Spaces:
Running
Running
| """ | |
| Monitoring and health check endpoints. | |
| """ | |
| from flask import Blueprint, jsonify | |
| import os | |
| from datetime import datetime | |
| from utils.metrics import metrics | |
| from utils.request_logger import request_logger | |
| from utils.cache import cache_manager | |
| from utils.circuit_breaker import circuit_breaker_manager | |
| from config import config | |
| monitoring_bp = Blueprint('monitoring', __name__, url_prefix='/api') | |
| def health_check(): | |
| """ | |
| Basic health check endpoint. | |
| Returns 200 if service is running. | |
| """ | |
| return jsonify({ | |
| "status": "healthy", | |
| "timestamp": datetime.now().isoformat(), | |
| "version": config.VERSION, | |
| "environment": config.ENVIRONMENT.value | |
| }), 200 | |
| def readiness_check(): | |
| """ | |
| Readiness check - validates critical dependencies. | |
| Returns 200 if all dependencies are available. | |
| """ | |
| checks = {} | |
| overall_ready = True | |
| # Check vector store | |
| try: | |
| vector_store_path = config.VECTOR_STORE_PATH | |
| if os.path.exists(vector_store_path): | |
| checks["vector_store"] = "ready" | |
| else: | |
| checks["vector_store"] = "not_found" | |
| overall_ready = False | |
| except Exception as e: | |
| checks["vector_store"] = f"error: {str(e)}" | |
| overall_ready = False | |
| # Check LLM API key | |
| if config.GROQ_API_KEY: | |
| checks["llm_api"] = "configured" | |
| else: | |
| checks["llm_api"] = "missing_api_key" | |
| overall_ready = False | |
| # Check circuit breakers | |
| breaker_states = circuit_breaker_manager.get_all_states() | |
| open_breakers = [name for name, state in breaker_states.items() if state["state"] == "open"] | |
| if open_breakers: | |
| checks["circuit_breakers"] = f"open: {', '.join(open_breakers)}" | |
| overall_ready = False | |
| else: | |
| checks["circuit_breakers"] = "all_closed" | |
| status_code = 200 if overall_ready else 503 | |
| return jsonify({ | |
| "ready": overall_ready, | |
| "checks": checks, | |
| "timestamp": datetime.now().isoformat() | |
| }), status_code | |
| def get_metrics(): | |
| """ | |
| Get application metrics in JSON format. | |
| """ | |
| if not config.ENABLE_METRICS: | |
| return jsonify({"error": "Metrics disabled"}), 403 | |
| app_metrics = metrics.get_metrics() | |
| cache_stats = cache_manager.get_all_stats() | |
| circuit_states = circuit_breaker_manager.get_all_states() | |
| return jsonify({ | |
| "application": app_metrics, | |
| "cache": cache_stats, | |
| "circuit_breakers": circuit_states | |
| }), 200 | |
| def get_stats(): | |
| """ | |
| Get human-readable statistics. | |
| """ | |
| app_metrics = metrics.get_metrics() | |
| # Get additional stats from request logger | |
| recent_requests = request_logger.get_recent_requests(limit=10) | |
| intent_dist = request_logger.get_intent_distribution(hours=24) | |
| error_rate_24h = request_logger.get_error_rate(hours=24) | |
| return jsonify({ | |
| "summary": { | |
| "total_requests": app_metrics["requests"]["total"], | |
| "active_requests": app_metrics["requests"]["active"], | |
| "error_rate": app_metrics["requests"]["error_rate"], | |
| "avg_latency_ms": app_metrics["requests"]["latency_ms"]["p50"], | |
| "uptime_hours": app_metrics["uptime_seconds"] / 3600, | |
| }, | |
| "intent_distribution": intent_dist, | |
| "recent_requests": recent_requests, | |
| "error_rate_24h": error_rate_24h, | |
| "cache_performance": { | |
| "llm_cache_hit_rate": app_metrics["llm"]["cache_hit_rate"], | |
| "app_cache_hit_rate": app_metrics["cache"]["hit_rate"], | |
| } | |
| }), 200 | |
| def get_recent_logs(): | |
| """ | |
| Get recent request logs. | |
| """ | |
| limit = int(request.args.get('limit', 50)) | |
| limit = min(limit, 500) # Cap at 500 | |
| recent = request_logger.get_recent_requests(limit=limit) | |
| return jsonify({ | |
| "count": len(recent), | |
| "requests": recent | |
| }), 200 | |