"""Prometheus Alertmanager configuration"""

groups:
  - name: fraud_detection_alerts
    interval: 30s
    rules:
      # System Health Alerts
      - alert: HighMemoryUsage
        expr: (node_memory_MemAvailable_bytes / node_memory_MemTotal_bytes) * 100 < 20
        for: 5m
        labels:
          severity: warning
          component: system
        annotations:
          summary: "High memory usage detected"
          description: "Memory usage is above 80% for more than 5 minutes"
      
      - alert: HighCPUUsage
        expr: 100 - (avg by (instance) (rate(node_cpu_seconds_total{mode="idle"}[5m])) * 100) > 80
        for: 5m
        labels:
          severity: warning
          component: system
        annotations:
          summary: "High CPU usage detected"
          description: "CPU usage is above 80% for more than 5 minutes"
      
      - alert: DiskSpaceLow
        expr: (node_filesystem_avail_bytes / node_filesystem_size_bytes) * 100 < 10
        for: 5m
        labels:
          severity: critical
          component: system
        annotations:
          summary: "Disk space critically low"
          description: "Less than 10% disk space available"
      
      # Application Alerts
      - alert: HighErrorRate
        expr: rate(http_requests_total{status=~"5.."}[5m]) > 0.05
        for: 2m
        labels:
          severity: critical
          component: application
        annotations:
          summary: "High error rate detected"
          description: "More than 5% of requests are failing with 5xx errors"
      
      - alert: SlowResponseTime
        expr: histogram_quantile(0.95, rate(http_request_duration_seconds_bucket[5m])) > 2
        for: 5m
        labels:
          severity: warning
          component: application
        annotations:
          summary: "Slow API response times"
          description: "95th percentile response time is above 2 seconds"
      
      - alert: HighFraudDetectionRate
        expr: rate(fraud_detections_total[10m]) > 100
        for: 5m
        labels:
          severity: warning
          component: fraud_engine
        annotations:
          summary: "Unusually high fraud detection rate"
          description: "More than 100 fraud cases detected per 10 minutes"
      
      # Database Alerts
      - alert: DatabaseConnectionPoolExhausted
        expr: db_pool_size - db_pool_available < 2
        for: 2m
        labels:
          severity: critical
          component: database
        annotations:
          summary: "Database connection pool nearly exhausted"
          description: "Less than 2 database connections available"
      
      - alert: SlowDatabaseQueries
        expr: rate(db_query_duration_seconds_sum[5m]) / rate(db_query_duration_seconds_count[5m]) > 1
        for: 5m
        labels:
          severity: warning
          component: database
        annotations:
          summary: "Slow database queries detected"
          description: "Average query time is above 1 second"
      
      # Service Availability
      - alert: ServiceDown
        expr: up{job="fraud-detection-backend"} == 0
        for: 1m
        labels:
          severity: critical
          component: application
        annotations:
          summary: "Service is down"
          description: "Fraud detection backend service is not responding"
      
      - alert: DatabaseDown
        expr: up{job="postgres"} == 0
        for: 1m
        labels:
          severity: critical
          component: database
        annotations:
          summary: "Database is down"
          description: "PostgreSQL database is not responding"

# Alertmanager configuration
alertmanager_config: |
  global:
    resolve_timeout: 5m
  
  route:
    group_by: ['alertname', 'component']
    group_wait: 10s
    group_interval: 10s
    repeat_interval: 12h
    receiver: 'default'
    routes:
      - match:
          severity: critical
        receiver: 'pagerduty'
        continue: true
      - match:
          severity: warning
        receiver: 'slack'
  
  receivers:
    - name: 'default'
      email_configs:
        - to: 'ops@example.com'
          from: 'alertmanager@example.com'
          smarthost: 'smtp.example.com:587'
          auth_username: 'alertmanager@example.com'
          auth_password: '${SMTP_PASSWORD}'
    
    - name: 'slack'
      slack_configs:
        - api_url: '${SLACK_WEBHOOK_URL}'
          channel: '#fraud-detection-alerts'
          title: '{{ .GroupLabels.alertname }}'
          text: '{{ range .Alerts }}{{ .Annotations.description }}{{ end }}'
    
    - name: 'pagerduty'
      pagerduty_configs:
        - service_key: '${PAGERDUTY_SERVICE_KEY}'
          description: '{{ .GroupLabels.alertname }}: {{ .GroupLabels.component }}'