Spaces:
Sleeping
Sleeping
File size: 2,310 Bytes
409c17a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 |
"""
Core - Prometheus Metrics
Application metrics for monitoring.
"""
from prometheus_client import Counter, Gauge, Histogram
# Request metrics
http_requests_total = Counter(
"http_requests_total",
"Total HTTP requests",
["method", "endpoint", "status"],
)
http_request_duration_seconds = Histogram(
"http_request_duration_seconds",
"HTTP request duration in seconds",
["method", "endpoint"],
buckets=[0.01, 0.05, 0.1, 0.5, 1.0, 2.0, 5.0, 10.0],
)
# RAG Pipeline metrics
rag_retrieval_duration_seconds = Histogram(
"rag_retrieval_duration_seconds",
"RAG retrieval phase duration",
["strategy"],
buckets=[0.01, 0.05, 0.1, 0.2, 0.5, 1.0, 2.0],
)
rag_reranking_duration_seconds = Histogram(
"rag_reranking_duration_seconds",
"RAG reranking phase duration",
buckets=[0.01, 0.05, 0.1, 0.2, 0.5, 1.0],
)
llm_generation_duration_seconds = Histogram(
"llm_generation_duration_seconds",
"LLM generation duration",
["model"],
buckets=[0.5, 1.0, 2.0, 5.0, 10.0, 20.0],
)
llm_tokens_used_total = Counter(
"llm_tokens_used_total",
"Total LLM tokens used",
["model", "type"], # type: prompt, completion
)
# Cache metrics
cache_hits_total = Counter(
"cache_hits_total",
"Total cache hits",
["cache_type"], # embedding, retrieval, generation
)
cache_misses_total = Counter(
"cache_misses_total",
"Total cache misses",
["cache_type"],
)
# Business metrics
queries_total = Counter(
"queries_total",
"Total queries processed",
["department", "status"],
)
documents_indexed_total = Counter(
"documents_indexed_total",
"Total documents indexed",
["department", "file_type"],
)
query_confidence_score = Histogram(
"query_confidence_score",
"Query confidence scores",
["department"],
buckets=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
)
# System metrics
active_requests = Gauge(
"active_requests",
"Number of active requests",
)
database_connections_active = Gauge(
"database_connections_active",
"Active database connections",
)
# Error metrics
errors_total = Counter(
"errors_total",
"Total errors",
["error_type", "component"],
)
|