File size: 4,171 Bytes
72bff80
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
"""
Monitoring and health check endpoints.
"""
from flask import Blueprint, jsonify
import os
from datetime import datetime
from utils.metrics import metrics
from utils.request_logger import request_logger
from utils.cache import cache_manager
from utils.circuit_breaker import circuit_breaker_manager
from config import config

monitoring_bp = Blueprint('monitoring', __name__, url_prefix='/api')


@monitoring_bp.route('/health', methods=['GET'])
def health_check():
    """
    Basic health check endpoint.
    Returns 200 if service is running.
    """
    return jsonify({
        "status": "healthy",
        "timestamp": datetime.now().isoformat(),
        "version": config.VERSION,
        "environment": config.ENVIRONMENT.value
    }), 200


@monitoring_bp.route('/ready', methods=['GET'])
def readiness_check():
    """
    Readiness check - validates critical dependencies.
    Returns 200 if all dependencies are available.
    """
    checks = {}
    overall_ready = True
    
    # Check vector store
    try:
        vector_store_path = config.VECTOR_STORE_PATH
        if os.path.exists(vector_store_path):
            checks["vector_store"] = "ready"
        else:
            checks["vector_store"] = "not_found"
            overall_ready = False
    except Exception as e:
        checks["vector_store"] = f"error: {str(e)}"
        overall_ready = False
    
    # Check LLM API key
    if config.GROQ_API_KEY:
        checks["llm_api"] = "configured"
    else:
        checks["llm_api"] = "missing_api_key"
        overall_ready = False
    
    # Check circuit breakers
    breaker_states = circuit_breaker_manager.get_all_states()
    open_breakers = [name for name, state in breaker_states.items() if state["state"] == "open"]
    
    if open_breakers:
        checks["circuit_breakers"] = f"open: {', '.join(open_breakers)}"
        overall_ready = False
    else:
        checks["circuit_breakers"] = "all_closed"
    
    status_code = 200 if overall_ready else 503
    
    return jsonify({
        "ready": overall_ready,
        "checks": checks,
        "timestamp": datetime.now().isoformat()
    }), status_code


@monitoring_bp.route('/metrics', methods=['GET'])
def get_metrics():
    """
    Get application metrics in JSON format.
    """
    if not config.ENABLE_METRICS:
        return jsonify({"error": "Metrics disabled"}), 403
    
    app_metrics = metrics.get_metrics()
    cache_stats = cache_manager.get_all_stats()
    circuit_states = circuit_breaker_manager.get_all_states()
    
    return jsonify({
        "application": app_metrics,
        "cache": cache_stats,
        "circuit_breakers": circuit_states
    }), 200


@monitoring_bp.route('/stats', methods=['GET'])
def get_stats():
    """
    Get human-readable statistics.
    """
    app_metrics = metrics.get_metrics()
    
    # Get additional stats from request logger
    recent_requests = request_logger.get_recent_requests(limit=10)
    intent_dist = request_logger.get_intent_distribution(hours=24)
    error_rate_24h = request_logger.get_error_rate(hours=24)
    
    return jsonify({
        "summary": {
            "total_requests": app_metrics["requests"]["total"],
            "active_requests": app_metrics["requests"]["active"],
            "error_rate": app_metrics["requests"]["error_rate"],
            "avg_latency_ms": app_metrics["requests"]["latency_ms"]["p50"],
            "uptime_hours": app_metrics["uptime_seconds"] / 3600,
        },
        "intent_distribution": intent_dist,
        "recent_requests": recent_requests,
        "error_rate_24h": error_rate_24h,
        "cache_performance": {
            "llm_cache_hit_rate": app_metrics["llm"]["cache_hit_rate"],
            "app_cache_hit_rate": app_metrics["cache"]["hit_rate"],
        }
    }), 200


@monitoring_bp.route('/logs/recent', methods=['GET'])
def get_recent_logs():
    """
    Get recent request logs.
    """
    limit = int(request.args.get('limit', 50))
    limit = min(limit, 500)  # Cap at 500
    
    recent = request_logger.get_recent_requests(limit=limit)
    
    return jsonify({
        "count": len(recent),
        "requests": recent
    }), 200