Spaces:

DaCrow13
/

Hopcroft-Skill-Classification

Sleeping

App Files Files Community

giuto commited on Dec 22, 2025

Commit

1396866

1 Parent(s): 8ed9c1d

Update Grafana and Prometheus configurations, enhance drift detection scripts, and add monitoring dashboard

Browse files

Files changed (7) hide show

docker-compose.yml +2 -3
monitoring/drift/scripts/prepare_baseline.py +20 -8
monitoring/drift/scripts/run_drift_check.py +23 -17
monitoring/grafana/dashboards/hopcroft_dashboard.json +358 -0
monitoring/grafana/provisioning/datasources/prometheus.yml +2 -0
monitoring/prometheus/prometheus.yml +3 -6
requirements.txt +1 -3

docker-compose.yml CHANGED Viewed

@@ -72,7 +72,7 @@ services:
       - hopcroft-net
     restart: unless-stopped
-grafana:
     image: grafana/grafana:latest
     container_name: grafana
     ports:
@@ -82,7 +82,6 @@ grafana:
       - GF_SECURITY_ADMIN_PASSWORD=admin
       - GF_USERS_ALLOW_SIGN_UP=false
       - GF_SERVER_ROOT_URL=http://localhost:3000
-      - GF_INSTALL_PLUGINS=grafana-piechart-panel
     volumes:
       # Provisioning: auto-configure datasources and dashboards
       - ./monitoring/grafana/provisioning/datasources:/etc/grafana/provisioning/datasources
@@ -101,7 +100,7 @@ grafana:
       timeout: 10s
       retries: 3
-pushgateway:
     image: prom/pushgateway:latest
     container_name: pushgateway
     ports:

       - hopcroft-net
     restart: unless-stopped
+  grafana:
     image: grafana/grafana:latest
     container_name: grafana
     ports:
       - GF_SECURITY_ADMIN_PASSWORD=admin
       - GF_USERS_ALLOW_SIGN_UP=false
       - GF_SERVER_ROOT_URL=http://localhost:3000
     volumes:
       # Provisioning: auto-configure datasources and dashboards
       - ./monitoring/grafana/provisioning/datasources:/etc/grafana/provisioning/datasources
       timeout: 10s
       retries: 3
+  pushgateway:
     image: prom/pushgateway:latest
     container_name: pushgateway
     ports:

monitoring/drift/scripts/prepare_baseline.py CHANGED Viewed

@@ -6,6 +6,7 @@ This script samples representative data from the training set.
 import pickle
 import pandas as pd
 import numpy as np
 from pathlib import Path
 from sklearn.model_selection import train_test_split
@@ -16,14 +17,21 @@ BASELINE_DIR.mkdir(parents=True, exist_ok=True)
 def load_training_data():
-    """Load the original training dataset."""
-    # Adjust path to your actual data
-    data_path = PROJECT_ROOT / "data" / "train.csv"
-    if not data_path.exists():
-        raise FileNotFoundError(f"Training data not found at {data_path}")
-    df = pd.read_csv(data_path)
     print(f"Loaded {len(df)} training samples")
     return df
@@ -61,10 +69,14 @@ def extract_features(df):
     Should match the features used by your model.
     """
-    feature_columns = [col for col in df.columns if col not in ['label', 'id', 'timestamp']]
     X = df[feature_columns].values
-    print(f"Extracted {X.shape[1]} features from {X.shape[0]} samples")
     return X

 import pickle
 import pandas as pd
 import numpy as np
+import sqlite3
 from pathlib import Path
 from sklearn.model_selection import train_test_split
 def load_training_data():
+    """Load the original training dataset from SQLite database."""
+    # Load from SQLite database
+    db_path = PROJECT_ROOT / "data" / "raw" / "skillscope_data.db"
+    if not db_path.exists():
+        raise FileNotFoundError(f"Database not found at {db_path}")
+    print(f"Loading data from database: {db_path}")
+    conn = sqlite3.connect(db_path)
+    # Load from the main table
+    query = "SELECT * FROM nlbse_tool_competition_data_by_issue LIMIT 10000"
+    df = pd.read_sql_query(query, conn)
+    conn.close()
     print(f"Loaded {len(df)} training samples")
     return df
     Should match the features used by your model.
     """
+    # Select only numeric columns, exclude labels and IDs
+    numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
+    exclude_cols = ['label', 'id', 'timestamp', 'issue_id', 'file_id', 'method_id', 'class_id']
+    feature_columns = [col for col in numeric_cols if col not in exclude_cols]
     X = df[feature_columns].values
+    print(f"Extracted {X.shape[1]} numeric features from {X.shape[0]} samples")
     return X

monitoring/drift/scripts/run_drift_check.py CHANGED Viewed

@@ -1,5 +1,5 @@
 """
-Data Drift Detection using Alibi Detect.
 Detects distribution shifts between baseline and new data.
 """
@@ -10,7 +10,7 @@ import numpy as np
 import pandas as pd
 from pathlib import Path
 from datetime import datetime
-from alibi_detect.cd import KSDrift
 from typing import Dict, Tuple
 # Configuration
@@ -73,7 +73,7 @@ def load_new_data() -> np.ndarray:
 def run_drift_detection(X_baseline: np.ndarray, X_new: np.ndarray) -> Dict:
     """
-    Run Kolmogorov-Smirnov drift detection.
     Args:
         X_baseline: Reference data
@@ -86,24 +86,30 @@ def run_drift_detection(X_baseline: np.ndarray, X_new: np.ndarray) -> Dict:
     print("Running Drift Detection (Kolmogorov-Smirnov Test)")
     print("=" * 60)
-    # Initialize detector
-    cd = KSDrift(
-        X_baseline,
-        p_val=P_VALUE_THRESHOLD,
-        alternative='two-sided',
-        correction='bonferroni'  # Multiple testing correction
-    )
-    # Run detection
-    preds = cd.predict(X_new)
     # Extract results
     results = {
         "timestamp": datetime.now().isoformat(),
-        "drift_detected": int(preds['data']['is_drift']),
-        "p_value": float(preds['data']['p_val']),
-        "threshold": P_VALUE_THRESHOLD,
-        "distance": float(preds['data']['distance']),
         "baseline_samples": X_baseline.shape[0],
         "new_samples": X_new.shape[0],
         "num_features": X_baseline.shape[1]
@@ -112,7 +118,7 @@ def run_drift_detection(X_baseline: np.ndarray, X_new: np.ndarray) -> Dict:
     # Print results
     print(f"\nResults:")
     print(f"   Drift Detected: {'YES' if results['drift_detected'] else 'NO'}")
-    print(f"   P-Value: {results['p_value']:.6f} (threshold: {P_VALUE_THRESHOLD})")
     print(f"   Distance: {results['distance']:.6f}")
     print(f"   Baseline: {X_baseline.shape[0]} samples")
     print(f"   New Data: {X_new.shape[0]} samples")

 """
+Data Drift Detection using Scipy KS Test.
 Detects distribution shifts between baseline and new data.
 """
 import pandas as pd
 from pathlib import Path
 from datetime import datetime
+from scipy.stats import ks_2samp
 from typing import Dict, Tuple
 # Configuration
 def run_drift_detection(X_baseline: np.ndarray, X_new: np.ndarray) -> Dict:
     """
+    Run Kolmogorov-Smirnov drift detection using scipy.
     Args:
         X_baseline: Reference data
     print("Running Drift Detection (Kolmogorov-Smirnov Test)")
     print("=" * 60)
+    # Run KS test for each feature
+    p_values = []
+    distances = []
+    for i in range(X_baseline.shape[1]):
+        statistic, p_value = ks_2samp(X_baseline[:, i], X_new[:, i])
+        p_values.append(p_value)
+        distances.append(statistic)
+    # Aggregate results
+    min_p_value = np.min(p_values)
+    max_distance = np.max(distances)
+    # Apply Bonferroni correction for multiple testing
+    adjusted_threshold = P_VALUE_THRESHOLD / X_baseline.shape[1]
+    drift_detected = min_p_value < adjusted_threshold
     # Extract results
     results = {
         "timestamp": datetime.now().isoformat(),
+        "drift_detected": int(drift_detected),
+        "p_value": float(min_p_value),
+        "threshold": adjusted_threshold,
+        "distance": float(max_distance),
         "baseline_samples": X_baseline.shape[0],
         "new_samples": X_new.shape[0],
         "num_features": X_baseline.shape[1]
     # Print results
     print(f"\nResults:")
     print(f"   Drift Detected: {'YES' if results['drift_detected'] else 'NO'}")
+    print(f"   P-Value: {results['p_value']:.6f} (adjusted threshold: {adjusted_threshold:.6f})")
     print(f"   Distance: {results['distance']:.6f}")
     print(f"   Baseline: {X_baseline.shape[0]} samples")
     print(f"   New Data: {X_new.shape[0]} samples")

monitoring/grafana/dashboards/hopcroft_dashboard.json ADDED Viewed

	@@ -0,0 +1,358 @@

+{
+  "annotations": {
+    "list": [
+      {
+        "builtIn": 1,
+        "datasource": "-- Grafana --",
+        "enable": true,
+        "hide": true,
+        "iconColor": "rgba(0, 211, 255, 1)",
+        "name": "Annotations & Alerts",
+        "type": "dashboard"
+      }
+    ]
+  },
+  "editable": true,
+  "gnetId": null,
+  "graphTooltip": 1,
+  "id": null,
+  "links": [],
+  "panels": [
+    {
+      "datasource": "Prometheus",
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "thresholds"
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "red",
+                "value": 80
+              }
+            ]
+          },
+          "unit": "reqps"
+        }
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 6,
+        "x": 0,
+        "y": 0
+      },
+      "id": 1,
+      "options": {
+        "orientation": "auto",
+        "reduceOptions": {
+          "calcs": ["lastNotNull"],
+          "fields": "",
+          "values": false
+        },
+        "showThresholdLabels": false,
+        "showThresholdMarkers": true
+      },
+      "pluginVersion": "9.0.0",
+      "targets": [
+        {
+          "expr": "rate(fastapi_requests_total[1m])",
+          "refId": "A"
+        }
+      ],
+      "title": "Request Rate",
+      "type": "gauge",
+      "description": "Number of requests per second handled by the API"
+    },
+    {
+      "datasource": "Prometheus",
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 10,
+            "gradientMode": "none",
+            "hideFrom": {
+              "tooltip": false,
+              "viz": false,
+              "legend": false
+            },
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "never",
+            "spanNulls": true
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              }
+            ]
+          },
+          "unit": "ms"
+        }
+      },
+      "gridPos": {
+        "h": 8,
+        "w": 18,
+        "x": 6,
+        "y": 0
+      },
+      "id": 2,
+      "options": {
+        "legend": {
+          "calcs": ["mean", "max"],
+          "displayMode": "table",
+          "placement": "right"
+        },
+        "tooltip": {
+          "mode": "multi"
+        }
+      },
+      "pluginVersion": "9.0.0",
+      "targets": [
+        {
+          "expr": "histogram_quantile(0.95, rate(fastapi_request_duration_seconds_bucket[5m])) * 1000",
+          "legendFormat": "p95",
+          "refId": "A"
+        },
+        {
+          "expr": "histogram_quantile(0.50, rate(fastapi_request_duration_seconds_bucket[5m])) * 1000",
+          "legendFormat": "p50 (median)",
+          "refId": "B"
+        }
+      ],
+      "title": "Request Latency (p50, p95)",
+      "type": "timeseries",
+      "description": "API response time percentiles over time"
+    },
+    {
+      "datasource": "Prometheus",
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "thresholds"
+          },
+          "mappings": [
+            {
+              "options": {
+                "0": {
+                  "color": "red",
+                  "index": 1,
+                  "text": "No Drift"
+                },
+                "1": {
+                  "color": "green",
+                  "index": 0,
+                  "text": "Drift Detected"
+                }
+              },
+              "type": "value"
+            }
+          ],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              }
+            ]
+          }
+        }
+      },
+      "gridPos": {
+        "h": 6,
+        "w": 6,
+        "x": 0,
+        "y": 8
+      },
+      "id": 3,
+      "options": {
+        "orientation": "auto",
+        "reduceOptions": {
+          "calcs": ["lastNotNull"],
+          "fields": "",
+          "values": false
+        },
+        "showThresholdLabels": false,
+        "showThresholdMarkers": true,
+        "text": {}
+      },
+      "pluginVersion": "9.0.0",
+      "targets": [
+        {
+          "expr": "drift_detected",
+          "refId": "A"
+        }
+      ],
+      "title": "Data Drift Status",
+      "type": "stat",
+      "description": "Current data drift detection status (1 = drift detected, 0 = no drift)"
+    },
+    {
+      "datasource": "Prometheus",
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "thresholds"
+          },
+          "decimals": 4,
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              },
+              {
+                "color": "yellow",
+                "value": 0.01
+              },
+              {
+                "color": "red",
+                "value": 0.05
+              }
+            ]
+          },
+          "unit": "short"
+        }
+      },
+      "gridPos": {
+        "h": 6,
+        "w": 6,
+        "x": 6,
+        "y": 8
+      },
+      "id": 4,
+      "options": {
+        "orientation": "auto",
+        "reduceOptions": {
+          "calcs": ["lastNotNull"],
+          "fields": "",
+          "values": false
+        },
+        "showThresholdLabels": false,
+        "showThresholdMarkers": true,
+        "text": {}
+      },
+      "pluginVersion": "9.0.0",
+      "targets": [
+        {
+          "expr": "drift_p_value",
+          "refId": "A"
+        }
+      ],
+      "title": "Drift P-Value",
+      "type": "stat",
+      "description": "Statistical significance of detected drift (lower = more significant)"
+    },
+    {
+      "datasource": "Prometheus",
+      "fieldConfig": {
+        "defaults": {
+          "color": {
+            "mode": "palette-classic"
+          },
+          "custom": {
+            "axisLabel": "",
+            "axisPlacement": "auto",
+            "barAlignment": 0,
+            "drawStyle": "line",
+            "fillOpacity": 10,
+            "gradientMode": "none",
+            "hideFrom": {
+              "tooltip": false,
+              "viz": false,
+              "legend": false
+            },
+            "lineInterpolation": "linear",
+            "lineWidth": 1,
+            "pointSize": 5,
+            "scaleDistribution": {
+              "type": "linear"
+            },
+            "showPoints": "auto",
+            "spanNulls": false
+          },
+          "mappings": [],
+          "thresholds": {
+            "mode": "absolute",
+            "steps": [
+              {
+                "color": "green",
+                "value": null
+              }
+            ]
+          },
+          "unit": "short"
+        }
+      },
+      "gridPos": {
+        "h": 6,
+        "w": 12,
+        "x": 12,
+        "y": 8
+      },
+      "id": 5,
+      "options": {
+        "legend": {
+          "calcs": ["mean", "lastNotNull"],
+          "displayMode": "table",
+          "placement": "right"
+        },
+        "tooltip": {
+          "mode": "multi"
+        }
+      },
+      "pluginVersion": "9.0.0",
+      "targets": [
+        {
+          "expr": "drift_distance",
+          "legendFormat": "Distance",
+          "refId": "A"
+        }
+      ],
+      "title": "Drift Distance Over Time",
+      "type": "timeseries",
+      "description": "Statistical distance between baseline and current data distribution"
+    }
+  ],
+  "refresh": "10s",
+  "schemaVersion": 36,
+  "style": "dark",
+  "tags": ["hopcroft", "ml", "monitoring"],
+  "templating": {
+    "list": []
+  },
+  "time": {
+    "from": "now-1h",
+    "to": "now"
+  },
+  "timepicker": {},
+  "timezone": "",
+  "title": "Hopcroft ML Model Monitoring",
+  "uid": "hopcroft-ml-dashboard",
+  "version": 1,
+  "weekStart": ""
+}

monitoring/grafana/provisioning/datasources/prometheus.yml CHANGED Viewed

@@ -4,6 +4,8 @@ datasources:
   - name: Prometheus
     type: prometheus
     access: proxy
     url: http://prometheus:9090
     isDefault: true
     editable: true

   - name: Prometheus
     type: prometheus
     access: proxy
+    uid: prometheus
+    orgId: 1
     url: http://prometheus:9090
     isDefault: true
     editable: true

monitoring/prometheus/prometheus.yml CHANGED Viewed

@@ -16,18 +16,15 @@ alerting:
 scrape_configs:
   - job_name: 'hopcroft-api'
     static_configs:
       - targets: ['hopcroft-api:8080']
   - job_name: 'prometheus'
     static_configs:
       - targets: ['localhost:9090']
-  - job_name: 'hopcroft-api'
-    metrics_path: '/metrics'
-    static_configs:
-      - targets: ['hopcroft-api:8080']
-    scrape_interval: 10s
   - job_name: 'pushgateway'
     honor_labels: true
     static_configs:

 scrape_configs:
   - job_name: 'hopcroft-api'
+    metrics_path: '/metrics'
     static_configs:
       - targets: ['hopcroft-api:8080']
+    scrape_interval: 10s
   - job_name: 'prometheus'
     static_configs:
       - targets: ['localhost:9090']
   - job_name: 'pushgateway'
     honor_labels: true
     static_configs:

requirements.txt CHANGED Viewed

@@ -65,6 +65,4 @@ pytest-html
 pytest-json-report
 # GUI
-streamlit>=1.28.0
-alibi-detect>=0.11.4

 pytest-json-report
 # GUI
+streamlit>=1.28.0