Agentic-RagBot / monitoring /grafana-dashboard.json
MediGuard AI
feat: Initial release of MediGuard AI v2.0
c4f5f25
{
"dashboard": {
"id": null,
"title": "MediGuard AI Monitoring",
"tags": ["mediguard", "ai", "medical"],
"timezone": "browser",
"panels": [
{
"id": 1,
"title": "API Request Rate",
"type": "graph",
"targets": [
{
"expr": "rate(http_requests_total[5m])",
"legendFormat": "{{method}} {{endpoint}}"
}
],
"yAxes": [
{
"label": "Requests/sec"
}
]
},
{
"id": 2,
"title": "Response Time",
"type": "graph",
"targets": [
{
"expr": "histogram_quantile(0.95, rate(http_request_duration_seconds_bucket[5m]))",
"legendFormat": "95th percentile"
},
{
"expr": "histogram_quantile(0.50, rate(http_request_duration_seconds_bucket[5m]))",
"legendFormat": "50th percentile"
}
],
"yAxes": [
{
"label": "Seconds"
}
]
},
{
"id": 3,
"title": "Error Rate",
"type": "singlestat",
"targets": [
{
"expr": "rate(http_requests_total{status=~\"5..\"}[5m]) / rate(http_requests_total[5m])",
"legendFormat": "Error Rate"
}
],
"valueMaps": [
{
"value": "null",
"text": "N/A"
}
],
"thresholds": "0.01,0.05,0.1",
"unit": "percentunit"
},
{
"id": 4,
"title": "Active Users",
"type": "singlestat",
"targets": [
{
"expr": "active_users_total",
"legendFormat": "Active Users"
}
]
},
{
"id": 5,
"title": "Workflow Execution Time",
"type": "graph",
"targets": [
{
"expr": "histogram_quantile(0.95, rate(workflow_duration_seconds_bucket[5m]))",
"legendFormat": "95th percentile"
}
],
"yAxes": [
{
"label": "Seconds"
}
]
},
{
"id": 6,
"title": "Database Connections",
"type": "graph",
"targets": [
{
"expr": "opensearch_connections_active",
"legendFormat": "OpenSearch"
},
{
"expr": "redis_connections_active",
"legendFormat": "Redis"
}
]
},
{
"id": 7,
"title": "Memory Usage",
"type": "graph",
"targets": [
{
"expr": "process_resident_memory_bytes",
"legendFormat": "RSS"
}
],
"yAxes": [
{
"label": "Bytes"
}
]
},
{
"id": 8,
"title": "CPU Usage",
"type": "graph",
"targets": [
{
"expr": "rate(process_cpu_seconds_total[5m])",
"legendFormat": "CPU"
}
],
"yAxes": [
{
"label": "Cores"
}
]
},
{
"id": 9,
"title": "LLM Request Rate",
"type": "graph",
"targets": [
{
"expr": "rate(llm_requests_total[5m])",
"legendFormat": "{{provider}}"
}
],
"yAxes": [
{
"label": "Requests/sec"
}
]
},
{
"id": 10,
"title": "Cache Hit Rate",
"type": "singlestat",
"targets": [
{
"expr": "rate(cache_hits_total[5m]) / (rate(cache_hits_total[5m]) + rate(cache_misses_total[5m]))",
"legendFormat": "Hit Rate"
}
],
"unit": "percentunit",
"thresholds": "0.8,0.9,0.95"
},
{
"id": 11,
"title": "Agent Performance",
"type": "table",
"targets": [
{
"expr": "agent_execution_duration_seconds",
"legendFormat": "{{agent_name}}",
"format": "table"
}
],
"columns": [
{
"text": "Agent",
"value": "agent_name"
},
{
"text": "Avg Duration",
"value": "avg"
},
{
"text": "Success Rate",
"value": "success_rate"
}
]
},
{
"id": 12,
"title": "System Health",
"type": "row"
},
{
"id": 13,
"title": "Service Status",
"type": "stat",
"targets": [
{
"expr": "up{job=\"mediguard\"}",
"legendFormat": "{{instance}}"
}
]
}
],
"time": {
"from": "now-1h",
"to": "now"
},
"refresh": "30s"
}
}