# Prometheus configuration for Cyber-LLM monitoring global: scrape_interval: 15s evaluation_interval: 15s external_labels: cluster: 'cyber-llm-production' environment: 'production' rule_files: - "/etc/prometheus/rules/*.yml" alerting: alertmanagers: - static_configs: - targets: - alertmanager:9093 scrape_configs: # Cyber-LLM API metrics - job_name: 'cyber-llm-api' metrics_path: '/metrics' scrape_interval: 30s static_configs: - targets: ['cyber-llm-api-service:8000'] relabel_configs: - source_labels: [__address__] target_label: __param_target - source_labels: [__param_target] target_label: instance - target_label: __address__ replacement: cyber-llm-api-service:8000 # Kubernetes cluster metrics - job_name: 'kubernetes-apiservers' kubernetes_sd_configs: - role: endpoints scheme: https tls_config: ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token relabel_configs: - source_labels: [__meta_kubernetes_namespace, __meta_kubernetes_service_name, __meta_kubernetes_endpoint_port_name] action: keep regex: default;kubernetes;https # Kubernetes node metrics - job_name: 'kubernetes-nodes' scheme: https tls_config: ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token kubernetes_sd_configs: - role: node relabel_configs: - action: labelmap regex: __meta_kubernetes_node_label_(.+) - target_label: __address__ replacement: kubernetes.default.svc:443 - source_labels: [__meta_kubernetes_node_name] regex: (.+) target_label: __metrics_path__ replacement: /api/v1/nodes/${1}/proxy/metrics # Kubernetes pods - job_name: 'kubernetes-pods' kubernetes_sd_configs: - role: pod relabel_configs: - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_scrape] action: keep regex: true - source_labels: [__meta_kubernetes_pod_annotation_prometheus_io_path] action: replace target_label: __metrics_path__ regex: (.+) - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port] action: replace regex: ([^:]+)(?::\d+)?;(\d+) replacement: $1:$2 target_label: __address__ - action: labelmap regex: __meta_kubernetes_pod_label_(.+) - source_labels: [__meta_kubernetes_namespace] action: replace target_label: kubernetes_namespace - source_labels: [__meta_kubernetes_pod_name] action: replace target_label: kubernetes_pod_name # GPU metrics (if nvidia-gpu-exporter is deployed) - job_name: 'gpu-metrics' static_configs: - targets: ['nvidia-gpu-exporter:9835'] # Custom business metrics - job_name: 'cyber-llm-business-metrics' metrics_path: '/business-metrics' scrape_interval: 60s static_configs: - targets: ['cyber-llm-api-service:8000']