Spaces:

MWasil
/

customer-support-agent-space

Sleeping

File size: 1,606 Bytes

9a3b3da

from prometheus_client import Counter, Histogram, Gauge


"""
    This code implements Observability, using Prometheus. It monitors the health, cost, and performance of the LLM application in real-time through Grafana dashboard. 
Counter (Requests/Tokens): Tracks total volume. It helps calculate Tokens per Minute (TPM) or Requests per Minute (RPM) to monitor API costs and usage spikes.
Histogram (Latency): Tracks how long the AI takes to respond. This is critical for identifying if the model provider is slowing down.
Gauge (Active Sessions): Tracks a value that goes up and down, showing how many users are currently interacting with the agent.
Labels: By using labels(model=model), you can compare different models side-by-side in your charts.
"""


# Metrics
AGENT_REQUESTS = Counter(
    'agent_requests_total',
    'Total requests to agent',
    ['model', 'status']
)

AGENT_LATENCY = Histogram(
    'agent_response_latency_seconds',
    'Response latency',
    ['model']
)

TOKEN_USAGE = Counter(
    'agent_tokens_total',
    'Total tokens used',
    ['model', 'type']  
)

USER_FEEDBACK = Counter(
    'user_feedback_total',
    'User feedback ratings',
    ['rating'] 
)

ACTIVE_SESSIONS = Gauge('active_sessions', 'Number of active sessions')

def record_agent_metrics(model: str, latency: float, tokens_in: int, tokens_out: int, status: str):
    AGENT_LATENCY.labels(model=model).observe(latency)
    AGENT_REQUESTS.labels(model=model, status=status).inc()
    TOKEN_USAGE.labels(model=model, type='input').inc(tokens_in)
    TOKEN_USAGE.labels(model=model, type='output').inc(tokens_out)