Baktabek's picture
Upload folder using huggingface_hub
409c17a verified
raw
history blame
2.31 kB
"""
Core - Prometheus Metrics
Application metrics for monitoring.
"""
from prometheus_client import Counter, Gauge, Histogram
# Request metrics
http_requests_total = Counter(
"http_requests_total",
"Total HTTP requests",
["method", "endpoint", "status"],
)
http_request_duration_seconds = Histogram(
"http_request_duration_seconds",
"HTTP request duration in seconds",
["method", "endpoint"],
buckets=[0.01, 0.05, 0.1, 0.5, 1.0, 2.0, 5.0, 10.0],
)
# RAG Pipeline metrics
rag_retrieval_duration_seconds = Histogram(
"rag_retrieval_duration_seconds",
"RAG retrieval phase duration",
["strategy"],
buckets=[0.01, 0.05, 0.1, 0.2, 0.5, 1.0, 2.0],
)
rag_reranking_duration_seconds = Histogram(
"rag_reranking_duration_seconds",
"RAG reranking phase duration",
buckets=[0.01, 0.05, 0.1, 0.2, 0.5, 1.0],
)
llm_generation_duration_seconds = Histogram(
"llm_generation_duration_seconds",
"LLM generation duration",
["model"],
buckets=[0.5, 1.0, 2.0, 5.0, 10.0, 20.0],
)
llm_tokens_used_total = Counter(
"llm_tokens_used_total",
"Total LLM tokens used",
["model", "type"], # type: prompt, completion
)
# Cache metrics
cache_hits_total = Counter(
"cache_hits_total",
"Total cache hits",
["cache_type"], # embedding, retrieval, generation
)
cache_misses_total = Counter(
"cache_misses_total",
"Total cache misses",
["cache_type"],
)
# Business metrics
queries_total = Counter(
"queries_total",
"Total queries processed",
["department", "status"],
)
documents_indexed_total = Counter(
"documents_indexed_total",
"Total documents indexed",
["department", "file_type"],
)
query_confidence_score = Histogram(
"query_confidence_score",
"Query confidence scores",
["department"],
buckets=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0],
)
# System metrics
active_requests = Gauge(
"active_requests",
"Number of active requests",
)
database_connections_active = Gauge(
"database_connections_active",
"Active database connections",
)
# Error metrics
errors_total = Counter(
"errors_total",
"Total errors",
["error_type", "component"],
)