Spaces:
Sleeping
Sleeping
| """ | |
| Core - Prometheus Metrics | |
| Application metrics for monitoring. | |
| """ | |
| from prometheus_client import Counter, Gauge, Histogram | |
| # Request metrics | |
| http_requests_total = Counter( | |
| "http_requests_total", | |
| "Total HTTP requests", | |
| ["method", "endpoint", "status"], | |
| ) | |
| http_request_duration_seconds = Histogram( | |
| "http_request_duration_seconds", | |
| "HTTP request duration in seconds", | |
| ["method", "endpoint"], | |
| buckets=[0.01, 0.05, 0.1, 0.5, 1.0, 2.0, 5.0, 10.0], | |
| ) | |
| # RAG Pipeline metrics | |
| rag_retrieval_duration_seconds = Histogram( | |
| "rag_retrieval_duration_seconds", | |
| "RAG retrieval phase duration", | |
| ["strategy"], | |
| buckets=[0.01, 0.05, 0.1, 0.2, 0.5, 1.0, 2.0], | |
| ) | |
| rag_reranking_duration_seconds = Histogram( | |
| "rag_reranking_duration_seconds", | |
| "RAG reranking phase duration", | |
| buckets=[0.01, 0.05, 0.1, 0.2, 0.5, 1.0], | |
| ) | |
| llm_generation_duration_seconds = Histogram( | |
| "llm_generation_duration_seconds", | |
| "LLM generation duration", | |
| ["model"], | |
| buckets=[0.5, 1.0, 2.0, 5.0, 10.0, 20.0], | |
| ) | |
| llm_tokens_used_total = Counter( | |
| "llm_tokens_used_total", | |
| "Total LLM tokens used", | |
| ["model", "type"], # type: prompt, completion | |
| ) | |
| # Cache metrics | |
| cache_hits_total = Counter( | |
| "cache_hits_total", | |
| "Total cache hits", | |
| ["cache_type"], # embedding, retrieval, generation | |
| ) | |
| cache_misses_total = Counter( | |
| "cache_misses_total", | |
| "Total cache misses", | |
| ["cache_type"], | |
| ) | |
| # Business metrics | |
| queries_total = Counter( | |
| "queries_total", | |
| "Total queries processed", | |
| ["department", "status"], | |
| ) | |
| documents_indexed_total = Counter( | |
| "documents_indexed_total", | |
| "Total documents indexed", | |
| ["department", "file_type"], | |
| ) | |
| query_confidence_score = Histogram( | |
| "query_confidence_score", | |
| "Query confidence scores", | |
| ["department"], | |
| buckets=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0], | |
| ) | |
| # System metrics | |
| active_requests = Gauge( | |
| "active_requests", | |
| "Number of active requests", | |
| ) | |
| database_connections_active = Gauge( | |
| "database_connections_active", | |
| "Active database connections", | |
| ) | |
| # Error metrics | |
| errors_total = Counter( | |
| "errors_total", | |
| "Total errors", | |
| ["error_type", "component"], | |
| ) | |