LeonardoMdSA commited on
Commit
e105368
·
1 Parent(s): 91a9dcd

logs and test drift complete

Browse files
app/api/routes.py CHANGED
@@ -1,14 +1,15 @@
1
- # /predict, /health, /dashboard
 
2
 
3
- from fastapi import APIRouter
4
  from app.api.schemas import PredictionRequest, PredictionResponse
5
  from app.inference.predictor import Predictor
6
  from app.core.logging import log_prediction
7
  from app.monitoring.data_loader import load_production_data
8
  from app.monitoring.drift import run_drift_check
 
9
  import pandas as pd
10
 
11
-
12
  router = APIRouter()
13
  predictor = Predictor()
14
 
@@ -30,21 +31,28 @@ def predict(request: PredictionRequest):
30
  def health():
31
  return {"status": "ok"}
32
 
 
33
  @router.get("/run-drift")
34
  def run_drift():
35
  current_df = load_production_data()
36
  report_path = run_drift_check(current_df)
37
-
38
  return {
39
  "status": "drift_check_completed",
40
  "report_path": report_path
41
  }
42
 
43
- @router.get("/monitoring/run")
44
- def monitoring_run():
45
- # Example: load some data
46
- current_data = pd.read_csv("data/current.csv")
47
- reference_data = pd.read_csv("data/reference.csv")
48
 
49
- alerts = run_drift_check(current_data, reference_data, model_version="v1")
50
- return {"alerts": alerts}
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app/api/routes.py
2
+ # /predict, /health, /dashboard, /monitoring/run
3
 
4
+ from fastapi import APIRouter, BackgroundTasks
5
  from app.api.schemas import PredictionRequest, PredictionResponse
6
  from app.inference.predictor import Predictor
7
  from app.core.logging import log_prediction
8
  from app.monitoring.data_loader import load_production_data
9
  from app.monitoring.drift import run_drift_check
10
+ from app.monitoring.governance import run_governance_checks
11
  import pandas as pd
12
 
 
13
  router = APIRouter()
14
  predictor = Predictor()
15
 
 
31
  def health():
32
  return {"status": "ok"}
33
 
34
+
35
  @router.get("/run-drift")
36
  def run_drift():
37
  current_df = load_production_data()
38
  report_path = run_drift_check(current_df)
 
39
  return {
40
  "status": "drift_check_completed",
41
  "report_path": report_path
42
  }
43
 
 
 
 
 
 
44
 
45
+ @router.get("/monitoring/run")
46
+ def monitoring_run(background_tasks: BackgroundTasks, model_version: str = "v1"):
47
+ """
48
+ Step 6: Run production monitoring including drift + governance checks in background.
49
+ """
50
+ # Load current and reference data
51
+ current_data = pd.read_csv("data/processed/current_data.csv")
52
+ reference_data = pd.read_csv("data/processed/credit_default_clean.csv") # reference
53
+
54
+ # Schedule background tasks
55
+ background_tasks.add_task(run_drift_check, current_data, reference_data, model_version=model_version)
56
+ background_tasks.add_task(run_governance_checks, current_data, model_version=model_version)
57
+
58
+ return {"status": "monitoring triggered", "model_version": model_version}
app/core/config.py CHANGED
@@ -1,6 +1,13 @@
1
  # env vars, paths, thresholds
 
2
 
3
  MODEL_VERSION = "v1"
4
  MODEL_PATH = "models/v1/model.pkl"
5
  FEATURES_PATH = "models/v1/features.json"
6
- DB_PATH = "app.db"
 
 
 
 
 
 
 
1
  # env vars, paths, thresholds
2
+ import os
3
 
4
  MODEL_VERSION = "v1"
5
  MODEL_PATH = "models/v1/model.pkl"
6
  FEATURES_PATH = "models/v1/features.json"
7
+ DB_PATH = "database/app.db"
8
+
9
+ # Governance logs path
10
+ PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
11
+ LOGS_PATH = os.environ.get("LOGS_PATH", os.path.join(PROJECT_ROOT, "logs"))
12
+ os.makedirs(LOGS_PATH, exist_ok=True)
13
+
app/monitoring/drift.py CHANGED
@@ -1,5 +1,5 @@
1
  # Evidently logic
2
-
3
  import os
4
  import pandas as pd
5
  from evidently.report import Report
@@ -10,25 +10,6 @@ REFERENCE_DATA_PATH = "models/v1/reference_data.csv"
10
  REPORT_DIR = "reports/evidently"
11
  REPORT_PATH = os.path.join(REPORT_DIR, "drift_report.html")
12
 
13
-
14
- def run_drift_check(current_df: pd.DataFrame):
15
- reference_df = pd.read_csv(REFERENCE_DATA_PATH)
16
-
17
- os.makedirs(REPORT_DIR, exist_ok=True)
18
-
19
- report = Report(metrics=[
20
- DataDriftPreset()
21
- ])
22
-
23
- report.run(
24
- reference_data=reference_df.drop(columns=["target"]),
25
- current_data=current_df
26
- )
27
-
28
- report.save_html(REPORT_PATH)
29
-
30
- return REPORT_PATH
31
-
32
  # Thresholds configuration
33
  thresholds = {
34
  "psi": 0.2,
@@ -38,10 +19,23 @@ thresholds = {
38
 
39
  governance = Governance(thresholds=thresholds)
40
 
41
- def run_drift_check(current_data, reference_data, model_version="v1"):
 
 
 
 
 
 
 
 
42
  report = Report(metrics=[DataDriftPreset()])
43
  report.run(current_data=current_data, reference_data=reference_data)
 
 
 
 
 
 
 
44
 
45
- # Governance check
46
- alerts = governance.check_metrics(report.as_dict(), model_version=model_version)
47
- return alerts
 
1
  # Evidently logic
2
+ # app/monitoring/drift.py
3
  import os
4
  import pandas as pd
5
  from evidently.report import Report
 
10
  REPORT_DIR = "reports/evidently"
11
  REPORT_PATH = os.path.join(REPORT_DIR, "drift_report.html")
12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  # Thresholds configuration
14
  thresholds = {
15
  "psi": 0.2,
 
19
 
20
  governance = Governance(thresholds=thresholds)
21
 
22
+
23
+ def run_drift_check(current_data: pd.DataFrame, reference_data: pd.DataFrame, model_version="v1"):
24
+ """
25
+ Run Evidently DataDriftPreset on current vs reference data,
26
+ save HTML report, and run governance checks.
27
+ Returns a tuple: (alerts, report_metrics)
28
+ """
29
+ os.makedirs(REPORT_DIR, exist_ok=True)
30
+
31
  report = Report(metrics=[DataDriftPreset()])
32
  report.run(current_data=current_data, reference_data=reference_data)
33
+ report.save_html(REPORT_PATH)
34
+
35
+ # report.as_dict() returns a dict; in newer Evidently versions it can be a list
36
+ report_metrics = report.as_dict() if hasattr(report, "as_dict") else list(report)
37
+
38
+ # Run governance checks
39
+ alerts = governance.check_metrics(report_metrics, model_version=model_version)
40
 
41
+ return alerts, report_metrics
 
 
app/monitoring/governance.py CHANGED
@@ -1,15 +1,23 @@
1
- # This file implements threshold checking, governance signals logging, and notifications.
2
-
3
  import json
4
  import logging
5
  from datetime import datetime
6
- from app.utils.alerts import send_email_alert, send_slack_alert
7
  import os
 
 
8
 
9
- os.makedirs("logs", exist_ok=True)
 
 
 
10
  logger = logging.getLogger("governance")
11
  logger.setLevel(logging.INFO)
12
- handler = logging.FileHandler("logs/governance_alerts.log")
 
 
 
 
 
13
  formatter = logging.Formatter('%(asctime)s | %(levelname)s | %(message)s')
14
  handler.setFormatter(formatter)
15
  logger.addHandler(handler)
@@ -30,27 +38,52 @@ class Governance:
30
  def check_metrics(self, report_dict: dict, model_version: str):
31
  alerts = []
32
 
33
- # Example: data drift
34
- psi = report_dict.get("metrics", {}).get("DataDriftPreset", {}).get("result", {}).get("dataset_drift", 0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  if psi > self.thresholds.get("psi", 0.2):
36
  alerts.append(f"Data drift detected (PSI={psi})")
37
 
38
- # Example: classification performance
39
- f1 = report_dict.get("metrics", {}).get("ClassificationPreset", {}).get("result", {}).get("f1_score", 1.0)
 
 
 
 
 
 
 
 
40
  if f1 < self.thresholds.get("f1", 0.7):
41
  alerts.append(f"F1 drop detected (F1={f1})")
42
 
43
- # Example: regression accuracy
44
- accuracy_drop = report_dict.get("metrics", {}).get("RegressionPreset", {}).get("result", {}).get("accuracy_drop", 0)
45
  if accuracy_drop > self.thresholds.get("accuracy_drop", 0.05):
46
  alerts.append(f"Accuracy drop detected ({accuracy_drop})")
47
 
48
- # Log alerts
49
  for alert in alerts:
50
  self.log_alert(alert, model_version)
51
-
52
- # Optional notifications
53
- for alert in alerts:
54
  send_email_alert(alert)
55
  send_slack_alert(alert)
56
 
@@ -64,3 +97,12 @@ class Governance:
64
  "alert": message
65
  }
66
  logger.info(json.dumps(log_entry))
 
 
 
 
 
 
 
 
 
 
1
+ # app/monitoring/governance.py
 
2
  import json
3
  import logging
4
  from datetime import datetime
 
5
  import os
6
+ from app.utils.alerts import send_email_alert, send_slack_alert
7
+ from app.core.config import LOGS_PATH # configurable logs folder
8
 
9
+ # ensure logs folder exists
10
+ os.makedirs(LOGS_PATH, exist_ok=True)
11
+
12
+ # setup logger
13
  logger = logging.getLogger("governance")
14
  logger.setLevel(logging.INFO)
15
+
16
+ # Remove all existing handlers
17
+ if logger.hasHandlers():
18
+ logger.handlers.clear()
19
+
20
+ handler = logging.FileHandler(os.path.join(LOGS_PATH, "governance_alerts.log"))
21
  formatter = logging.Formatter('%(asctime)s | %(levelname)s | %(message)s')
22
  handler.setFormatter(formatter)
23
  logger.addHandler(handler)
 
38
  def check_metrics(self, report_dict: dict, model_version: str):
39
  alerts = []
40
 
41
+ # Normalize report_dict to a metrics dict
42
+ metrics = {}
43
+ if isinstance(report_dict, dict):
44
+ raw_metrics = report_dict.get("metrics")
45
+ if isinstance(raw_metrics, list):
46
+ for item in raw_metrics:
47
+ metric_name = item.get("metric")
48
+ result = item.get("result", {})
49
+ if metric_name:
50
+ metrics[metric_name] = result
51
+ else:
52
+ metrics = raw_metrics or {}
53
+ elif isinstance(report_dict, list):
54
+ for item in report_dict:
55
+ metric_name = item.get("metric")
56
+ result = item.get("result", {})
57
+ if metric_name:
58
+ metrics[metric_name] = result
59
+
60
+ # Data drift (project-level)
61
+ psi_metric = metrics.get("DatasetDriftMetric", {})
62
+ psi = psi_metric.get("share_of_drifted_columns", 0)
63
  if psi > self.thresholds.get("psi", 0.2):
64
  alerts.append(f"Data drift detected (PSI={psi})")
65
 
66
+ # Column-level drift alerts
67
+ data_drift_table = metrics.get("DataDriftTable", {}).get("drift_by_columns", {})
68
+ if data_drift_table:
69
+ for col, info in data_drift_table.items():
70
+ if isinstance(info, dict) and info.get("drift_detected", False):
71
+ alert_msg = f"Drift detected in column {col} (score={info.get('drift_score')})"
72
+ alerts.append(alert_msg)
73
+
74
+ # Classification performance
75
+ f1 = metrics.get("ClassificationPreset", {}).get("f1_score", 1.0)
76
  if f1 < self.thresholds.get("f1", 0.7):
77
  alerts.append(f"F1 drop detected (F1={f1})")
78
 
79
+ # Regression accuracy
80
+ accuracy_drop = metrics.get("RegressionPreset", {}).get("accuracy_drop", 0)
81
  if accuracy_drop > self.thresholds.get("accuracy_drop", 0.05):
82
  alerts.append(f"Accuracy drop detected ({accuracy_drop})")
83
 
84
+ # Log and send alerts
85
  for alert in alerts:
86
  self.log_alert(alert, model_version)
 
 
 
87
  send_email_alert(alert)
88
  send_slack_alert(alert)
89
 
 
97
  "alert": message
98
  }
99
  logger.info(json.dumps(log_entry))
100
+
101
+
102
+ def run_governance_checks(report_dict: dict, model_version: str = "v1", thresholds: dict = None):
103
+ """
104
+ Convenience wrapper to run governance checks using default thresholds.
105
+ """
106
+ thresholds = thresholds or {"psi": 0.2, "accuracy_drop": 0.05, "f1": 0.7}
107
+ governance = Governance(thresholds)
108
+ return governance.check_metrics(report_dict, model_version)
app.db → database/app.db RENAMED
Binary files a/app.db and b/database/app.db differ
 
open_drift.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ import webbrowser
2
+ report_path = r"C:\Users\Rayquaza\Desktop\IT\ML Inference Service with Drift Detection\reports\evidently\drift_report.html"
3
+ webbrowser.open(f"file://{report_path}")
reports/evidently/drift_report.html CHANGED
The diff for this file is too large to render. See raw diff
 
tests/test_drift.py DELETED
File without changes
tests/test_governance.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metrics": {
3
+ "DataDriftPreset": {
4
+ "result": {"dataset_drift": 0.25}
5
+ },
6
+ "ClassificationPreset": {
7
+ "result": {"f1_score": 0.65}
8
+ },
9
+ "RegressionPreset": {
10
+ "result": {"accuracy_drop": 0.06}
11
+ }
12
+ }
13
+ }
tests/test_governance.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ from pathlib import Path
3
+ import json
4
+
5
+ repo_root = Path(__file__).parent.parent.resolve()
6
+ sys.path.insert(0, str(repo_root))
7
+
8
+ from app.monitoring.governance import run_governance_checks
9
+
10
+ # Load a sample report JSON (create this for testing)
11
+ with open('tests/test_governance.json', 'r') as f:
12
+ report = json.load(f)
13
+
14
+ alerts = run_governance_checks(report, model_version="v1")
15
+ print("Governance alerts:", alerts)
tests/test_run_drift.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ import os
3
+ import pandas as pd
4
+
5
+ # Ensure project root is in sys.path
6
+ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
7
+
8
+ from app.monitoring.drift import run_drift_check
9
+ from app.monitoring.governance import run_governance_checks
10
+
11
+ def main():
12
+ root_dir = os.path.dirname(os.path.dirname(__file__))
13
+
14
+ # Load current and reference data
15
+ current_path = os.path.join(root_dir, "data", "processed", "current_data.csv")
16
+ reference_path = os.path.join(root_dir, "models", "v1", "reference_data.csv")
17
+
18
+ if not os.path.exists(current_path):
19
+ raise FileNotFoundError(f"{current_path} does not exist.")
20
+ if not os.path.exists(reference_path):
21
+ raise FileNotFoundError(f"{reference_path} does not exist.")
22
+
23
+ current_df = pd.read_csv(current_path)
24
+ reference_df = pd.read_csv(reference_path)
25
+
26
+ # Run drift check
27
+ report = run_drift_check(current_df, reference_df, model_version="v1")
28
+
29
+ # Run drift check
30
+ print("Metrics from Evidently report:", report)
31
+
32
+
33
+
34
+ if __name__ == "__main__":
35
+ main()