Spaces:

LeonardoMdSA
/

ML-Inference-Service-with-Drift-Detection

Running

App Files Files Community

LeonardoMdSA commited on Jan 12

Commit

e105368

1 Parent(s): 91a9dcd

logs and test drift complete

Browse files

Files changed (11) hide show

app/api/routes.py +19 -11
app/core/config.py +8 -1
app/monitoring/drift.py +18 -24
app/monitoring/governance.py +57 -15
app.db → database/app.db +0 -0
open_drift.py +3 -0
reports/evidently/drift_report.html +0 -0
tests/test_drift.py +0 -0
tests/test_governance.json +13 -0
tests/test_governance.py +15 -0
tests/test_run_drift.py +35 -0

app/api/routes.py CHANGED Viewed

@@ -1,14 +1,15 @@
-# /predict, /health, /dashboard
-from fastapi import APIRouter
 from app.api.schemas import PredictionRequest, PredictionResponse
 from app.inference.predictor import Predictor
 from app.core.logging import log_prediction
 from app.monitoring.data_loader import load_production_data
 from app.monitoring.drift import run_drift_check
 import pandas as pd
 router = APIRouter()
 predictor = Predictor()
@@ -30,21 +31,28 @@ def predict(request: PredictionRequest):
 def health():
     return {"status": "ok"}
 @router.get("/run-drift")
 def run_drift():
     current_df = load_production_data()
     report_path = run_drift_check(current_df)
     return {
         "status": "drift_check_completed",
         "report_path": report_path
     }
-@router.get("/monitoring/run")
-def monitoring_run():
-    # Example: load some data
-    current_data = pd.read_csv("data/current.csv")
-    reference_data = pd.read_csv("data/reference.csv")
-    alerts = run_drift_check(current_data, reference_data, model_version="v1")
-    return {"alerts": alerts}

+# app/api/routes.py
+# /predict, /health, /dashboard, /monitoring/run
+from fastapi import APIRouter, BackgroundTasks
 from app.api.schemas import PredictionRequest, PredictionResponse
 from app.inference.predictor import Predictor
 from app.core.logging import log_prediction
 from app.monitoring.data_loader import load_production_data
 from app.monitoring.drift import run_drift_check
+from app.monitoring.governance import run_governance_checks
 import pandas as pd
 router = APIRouter()
 predictor = Predictor()
 def health():
     return {"status": "ok"}
 @router.get("/run-drift")
 def run_drift():
     current_df = load_production_data()
     report_path = run_drift_check(current_df)
     return {
         "status": "drift_check_completed",
         "report_path": report_path
     }
+@router.get("/monitoring/run")
+def monitoring_run(background_tasks: BackgroundTasks, model_version: str = "v1"):
+    """
+    Step 6: Run production monitoring including drift + governance checks in background.
+    """
+    # Load current and reference data
+    current_data = pd.read_csv("data/processed/current_data.csv")
+    reference_data = pd.read_csv("data/processed/credit_default_clean.csv")  # reference
+    # Schedule background tasks
+    background_tasks.add_task(run_drift_check, current_data, reference_data, model_version=model_version)
+    background_tasks.add_task(run_governance_checks, current_data, model_version=model_version)
+    return {"status": "monitoring triggered", "model_version": model_version}

app/core/config.py CHANGED Viewed

@@ -1,6 +1,13 @@
 # env vars, paths, thresholds
 MODEL_VERSION = "v1"
 MODEL_PATH = "models/v1/model.pkl"
 FEATURES_PATH = "models/v1/features.json"
-DB_PATH = "app.db"

 # env vars, paths, thresholds
+import os
 MODEL_VERSION = "v1"
 MODEL_PATH = "models/v1/model.pkl"
 FEATURES_PATH = "models/v1/features.json"
+DB_PATH = "database/app.db"
+# Governance logs path
+PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+LOGS_PATH = os.environ.get("LOGS_PATH", os.path.join(PROJECT_ROOT, "logs"))
+os.makedirs(LOGS_PATH, exist_ok=True)

app/monitoring/drift.py CHANGED Viewed

@@ -1,5 +1,5 @@
 # Evidently logic
 import os
 import pandas as pd
 from evidently.report import Report
@@ -10,25 +10,6 @@ REFERENCE_DATA_PATH = "models/v1/reference_data.csv"
 REPORT_DIR = "reports/evidently"
 REPORT_PATH = os.path.join(REPORT_DIR, "drift_report.html")
-def run_drift_check(current_df: pd.DataFrame):
-    reference_df = pd.read_csv(REFERENCE_DATA_PATH)
-    os.makedirs(REPORT_DIR, exist_ok=True)
-    report = Report(metrics=[
-        DataDriftPreset()
-    ])
-    report.run(
-        reference_data=reference_df.drop(columns=["target"]),
-        current_data=current_df
-    )
-    report.save_html(REPORT_PATH)
-    return REPORT_PATH
 # Thresholds configuration
 thresholds = {
     "psi": 0.2,
@@ -38,10 +19,23 @@ thresholds = {
 governance = Governance(thresholds=thresholds)
-def run_drift_check(current_data, reference_data, model_version="v1"):
     report = Report(metrics=[DataDriftPreset()])
     report.run(current_data=current_data, reference_data=reference_data)
-    # Governance check
-    alerts = governance.check_metrics(report.as_dict(), model_version=model_version)
-    return alerts

 # Evidently logic
+# app/monitoring/drift.py
 import os
 import pandas as pd
 from evidently.report import Report
 REPORT_DIR = "reports/evidently"
 REPORT_PATH = os.path.join(REPORT_DIR, "drift_report.html")
 # Thresholds configuration
 thresholds = {
     "psi": 0.2,
 governance = Governance(thresholds=thresholds)
+def run_drift_check(current_data: pd.DataFrame, reference_data: pd.DataFrame, model_version="v1"):
+    """
+    Run Evidently DataDriftPreset on current vs reference data,
+    save HTML report, and run governance checks.
+    Returns a tuple: (alerts, report_metrics)
+    """
+    os.makedirs(REPORT_DIR, exist_ok=True)
     report = Report(metrics=[DataDriftPreset()])
     report.run(current_data=current_data, reference_data=reference_data)
+    report.save_html(REPORT_PATH)
+    # report.as_dict() returns a dict; in newer Evidently versions it can be a list
+    report_metrics = report.as_dict() if hasattr(report, "as_dict") else list(report)
+    # Run governance checks
+    alerts = governance.check_metrics(report_metrics, model_version=model_version)
+    return alerts, report_metrics

app/monitoring/governance.py CHANGED Viewed

@@ -1,15 +1,23 @@
-# This file implements threshold checking, governance signals logging, and notifications.
 import json
 import logging
 from datetime import datetime
-from app.utils.alerts import send_email_alert, send_slack_alert
 import os
-os.makedirs("logs", exist_ok=True)
 logger = logging.getLogger("governance")
 logger.setLevel(logging.INFO)
-handler = logging.FileHandler("logs/governance_alerts.log")
 formatter = logging.Formatter('%(asctime)s | %(levelname)s | %(message)s')
 handler.setFormatter(formatter)
 logger.addHandler(handler)
@@ -30,27 +38,52 @@ class Governance:
     def check_metrics(self, report_dict: dict, model_version: str):
         alerts = []
-        # Example: data drift
-        psi = report_dict.get("metrics", {}).get("DataDriftPreset", {}).get("result", {}).get("dataset_drift", 0)
         if psi > self.thresholds.get("psi", 0.2):
             alerts.append(f"Data drift detected (PSI={psi})")
-        # Example: classification performance
-        f1 = report_dict.get("metrics", {}).get("ClassificationPreset", {}).get("result", {}).get("f1_score", 1.0)
         if f1 < self.thresholds.get("f1", 0.7):
             alerts.append(f"F1 drop detected (F1={f1})")
-        # Example: regression accuracy
-        accuracy_drop = report_dict.get("metrics", {}).get("RegressionPreset", {}).get("result", {}).get("accuracy_drop", 0)
         if accuracy_drop > self.thresholds.get("accuracy_drop", 0.05):
             alerts.append(f"Accuracy drop detected ({accuracy_drop})")
-        # Log alerts
         for alert in alerts:
             self.log_alert(alert, model_version)
-        # Optional notifications
-        for alert in alerts:
             send_email_alert(alert)
             send_slack_alert(alert)
@@ -64,3 +97,12 @@ class Governance:
             "alert": message
         }
         logger.info(json.dumps(log_entry))

+# app/monitoring/governance.py
 import json
 import logging
 from datetime import datetime
 import os
+from app.utils.alerts import send_email_alert, send_slack_alert
+from app.core.config import LOGS_PATH  # configurable logs folder
+# ensure logs folder exists
+os.makedirs(LOGS_PATH, exist_ok=True)
+# setup logger
 logger = logging.getLogger("governance")
 logger.setLevel(logging.INFO)
+# Remove all existing handlers
+if logger.hasHandlers():
+    logger.handlers.clear()
+handler = logging.FileHandler(os.path.join(LOGS_PATH, "governance_alerts.log"))
 formatter = logging.Formatter('%(asctime)s | %(levelname)s | %(message)s')
 handler.setFormatter(formatter)
 logger.addHandler(handler)
     def check_metrics(self, report_dict: dict, model_version: str):
         alerts = []
+        # Normalize report_dict to a metrics dict
+        metrics = {}
+        if isinstance(report_dict, dict):
+            raw_metrics = report_dict.get("metrics")
+            if isinstance(raw_metrics, list):
+                for item in raw_metrics:
+                    metric_name = item.get("metric")
+                    result = item.get("result", {})
+                    if metric_name:
+                        metrics[metric_name] = result
+            else:
+                metrics = raw_metrics or {}
+        elif isinstance(report_dict, list):
+            for item in report_dict:
+                metric_name = item.get("metric")
+                result = item.get("result", {})
+                if metric_name:
+                    metrics[metric_name] = result
+        # Data drift (project-level)
+        psi_metric = metrics.get("DatasetDriftMetric", {})
+        psi = psi_metric.get("share_of_drifted_columns", 0)
         if psi > self.thresholds.get("psi", 0.2):
             alerts.append(f"Data drift detected (PSI={psi})")
+        # Column-level drift alerts
+        data_drift_table = metrics.get("DataDriftTable", {}).get("drift_by_columns", {})
+        if data_drift_table:
+            for col, info in data_drift_table.items():
+                if isinstance(info, dict) and info.get("drift_detected", False):
+                    alert_msg = f"Drift detected in column {col} (score={info.get('drift_score')})"
+                    alerts.append(alert_msg)
+        # Classification performance
+        f1 = metrics.get("ClassificationPreset", {}).get("f1_score", 1.0)
         if f1 < self.thresholds.get("f1", 0.7):
             alerts.append(f"F1 drop detected (F1={f1})")
+        # Regression accuracy
+        accuracy_drop = metrics.get("RegressionPreset", {}).get("accuracy_drop", 0)
         if accuracy_drop > self.thresholds.get("accuracy_drop", 0.05):
             alerts.append(f"Accuracy drop detected ({accuracy_drop})")
+        # Log and send alerts
         for alert in alerts:
             self.log_alert(alert, model_version)
             send_email_alert(alert)
             send_slack_alert(alert)
             "alert": message
         }
         logger.info(json.dumps(log_entry))
+def run_governance_checks(report_dict: dict, model_version: str = "v1", thresholds: dict = None):
+    """
+    Convenience wrapper to run governance checks using default thresholds.
+    """
+    thresholds = thresholds or {"psi": 0.2, "accuracy_drop": 0.05, "f1": 0.7}
+    governance = Governance(thresholds)
+    return governance.check_metrics(report_dict, model_version)

app.db → database/app.db RENAMED Viewed

Binary files a/app.db and b/database/app.db differ

open_drift.py ADDED Viewed

	@@ -0,0 +1,3 @@

+import webbrowser
+report_path = r"C:\Users\Rayquaza\Desktop\IT\ML Inference Service with Drift Detection\reports\evidently\drift_report.html"
+webbrowser.open(f"file://{report_path}")

reports/evidently/drift_report.html CHANGED Viewed

The diff for this file is too large to render. See raw diff

tests/test_drift.py DELETED Viewed

File without changes

tests/test_governance.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+  "metrics": {
+    "DataDriftPreset": {
+      "result": {"dataset_drift": 0.25}
+    },
+    "ClassificationPreset": {
+      "result": {"f1_score": 0.65}
+    },
+    "RegressionPreset": {
+      "result": {"accuracy_drop": 0.06}
+    }
+  }
+}

tests/test_governance.py ADDED Viewed

	@@ -0,0 +1,15 @@

+import sys
+from pathlib import Path
+import json
+repo_root = Path(__file__).parent.parent.resolve()
+sys.path.insert(0, str(repo_root))
+from app.monitoring.governance import run_governance_checks
+# Load a sample report JSON (create this for testing)
+with open('tests/test_governance.json', 'r') as f:
+    report = json.load(f)
+alerts = run_governance_checks(report, model_version="v1")
+print("Governance alerts:", alerts)

tests/test_run_drift.py ADDED Viewed

	@@ -0,0 +1,35 @@

+import sys
+import os
+import pandas as pd
+# Ensure project root is in sys.path
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from app.monitoring.drift import run_drift_check
+from app.monitoring.governance import run_governance_checks
+def main():
+    root_dir = os.path.dirname(os.path.dirname(__file__))
+    # Load current and reference data
+    current_path = os.path.join(root_dir, "data", "processed", "current_data.csv")
+    reference_path = os.path.join(root_dir, "models", "v1", "reference_data.csv")
+    if not os.path.exists(current_path):
+        raise FileNotFoundError(f"{current_path} does not exist.")
+    if not os.path.exists(reference_path):
+        raise FileNotFoundError(f"{reference_path} does not exist.")
+    current_df = pd.read_csv(current_path)
+    reference_df = pd.read_csv(reference_path)
+    # Run drift check
+    report = run_drift_check(current_df, reference_df, model_version="v1")
+    # Run drift check
+    print("Metrics from Evidently report:", report)
+if __name__ == "__main__":
+    main()