Insurance-RAG / api /monitoring.py
DeltaVenom's picture
Update app code and initialize runtime databases
72bff80
"""
Monitoring and health check endpoints.
"""
from flask import Blueprint, jsonify
import os
from datetime import datetime
from utils.metrics import metrics
from utils.request_logger import request_logger
from utils.cache import cache_manager
from utils.circuit_breaker import circuit_breaker_manager
from config import config
monitoring_bp = Blueprint('monitoring', __name__, url_prefix='/api')
@monitoring_bp.route('/health', methods=['GET'])
def health_check():
"""
Basic health check endpoint.
Returns 200 if service is running.
"""
return jsonify({
"status": "healthy",
"timestamp": datetime.now().isoformat(),
"version": config.VERSION,
"environment": config.ENVIRONMENT.value
}), 200
@monitoring_bp.route('/ready', methods=['GET'])
def readiness_check():
"""
Readiness check - validates critical dependencies.
Returns 200 if all dependencies are available.
"""
checks = {}
overall_ready = True
# Check vector store
try:
vector_store_path = config.VECTOR_STORE_PATH
if os.path.exists(vector_store_path):
checks["vector_store"] = "ready"
else:
checks["vector_store"] = "not_found"
overall_ready = False
except Exception as e:
checks["vector_store"] = f"error: {str(e)}"
overall_ready = False
# Check LLM API key
if config.GROQ_API_KEY:
checks["llm_api"] = "configured"
else:
checks["llm_api"] = "missing_api_key"
overall_ready = False
# Check circuit breakers
breaker_states = circuit_breaker_manager.get_all_states()
open_breakers = [name for name, state in breaker_states.items() if state["state"] == "open"]
if open_breakers:
checks["circuit_breakers"] = f"open: {', '.join(open_breakers)}"
overall_ready = False
else:
checks["circuit_breakers"] = "all_closed"
status_code = 200 if overall_ready else 503
return jsonify({
"ready": overall_ready,
"checks": checks,
"timestamp": datetime.now().isoformat()
}), status_code
@monitoring_bp.route('/metrics', methods=['GET'])
def get_metrics():
"""
Get application metrics in JSON format.
"""
if not config.ENABLE_METRICS:
return jsonify({"error": "Metrics disabled"}), 403
app_metrics = metrics.get_metrics()
cache_stats = cache_manager.get_all_stats()
circuit_states = circuit_breaker_manager.get_all_states()
return jsonify({
"application": app_metrics,
"cache": cache_stats,
"circuit_breakers": circuit_states
}), 200
@monitoring_bp.route('/stats', methods=['GET'])
def get_stats():
"""
Get human-readable statistics.
"""
app_metrics = metrics.get_metrics()
# Get additional stats from request logger
recent_requests = request_logger.get_recent_requests(limit=10)
intent_dist = request_logger.get_intent_distribution(hours=24)
error_rate_24h = request_logger.get_error_rate(hours=24)
return jsonify({
"summary": {
"total_requests": app_metrics["requests"]["total"],
"active_requests": app_metrics["requests"]["active"],
"error_rate": app_metrics["requests"]["error_rate"],
"avg_latency_ms": app_metrics["requests"]["latency_ms"]["p50"],
"uptime_hours": app_metrics["uptime_seconds"] / 3600,
},
"intent_distribution": intent_dist,
"recent_requests": recent_requests,
"error_rate_24h": error_rate_24h,
"cache_performance": {
"llm_cache_hit_rate": app_metrics["llm"]["cache_hit_rate"],
"app_cache_hit_rate": app_metrics["cache"]["hit_rate"],
}
}), 200
@monitoring_bp.route('/logs/recent', methods=['GET'])
def get_recent_logs():
"""
Get recent request logs.
"""
limit = int(request.args.get('limit', 50))
limit = min(limit, 500) # Cap at 500
recent = request_logger.get_recent_requests(limit=limit)
return jsonify({
"count": len(recent),
"requests": recent
}), 200