File size: 2,310 Bytes
409c17a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
"""

Core - Prometheus Metrics



Application metrics for monitoring.

"""
from prometheus_client import Counter, Gauge, Histogram

# Request metrics
http_requests_total = Counter(
    "http_requests_total",
    "Total HTTP requests",
    ["method", "endpoint", "status"],
)

http_request_duration_seconds = Histogram(
    "http_request_duration_seconds",
    "HTTP request duration in seconds",
    ["method", "endpoint"],
    buckets=[0.01, 0.05, 0.1, 0.5, 1.0, 2.0, 5.0, 10.0],
)

# RAG Pipeline metrics
rag_retrieval_duration_seconds = Histogram(
    "rag_retrieval_duration_seconds",
    "RAG retrieval phase duration",
    ["strategy"],
    buckets=[0.01, 0.05, 0.1, 0.2, 0.5, 1.0, 2.0],
)

rag_reranking_duration_seconds = Histogram(
    "rag_reranking_duration_seconds",
    "RAG reranking phase duration",
    buckets=[0.01, 0.05, 0.1, 0.2, 0.5, 1.0],
)

llm_generation_duration_seconds = Histogram(
    "llm_generation_duration_seconds",
    "LLM generation duration",
    ["model"],
    buckets=[0.5, 1.0, 2.0, 5.0, 10.0, 20.0],
)

llm_tokens_used_total = Counter(
    "llm_tokens_used_total",
    "Total LLM tokens used",
    ["model", "type"],  # type: prompt, completion
)

# Cache metrics
cache_hits_total = Counter(
    "cache_hits_total",
    "Total cache hits",
    ["cache_type"],  # embedding, retrieval, generation
)

cache_misses_total = Counter(
    "cache_misses_total",
    "Total cache misses",
    ["cache_type"],
)

# Business metrics
queries_total = Counter(
    "queries_total",
    "Total queries processed",
    ["department", "status"],
)

documents_indexed_total = Counter(
    "documents_indexed_total",
    "Total documents indexed",
    ["department", "file_type"],
)

query_confidence_score = Histogram(
    "query_confidence_score",
    "Query confidence scores",
    ["department"],
    buckets=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
)

# System metrics
active_requests = Gauge(
    "active_requests",
    "Number of active requests",
)

database_connections_active = Gauge(
    "database_connections_active",
    "Active database connections",
)

# Error metrics
errors_total = Counter(
    "errors_total",
    "Total errors",
    ["error_type", "component"],
)