Commit ·
88260af
1
Parent(s): 2c53c82
Removed 0.01 drift on frontend
Browse files- app/api/routes.py +18 -76
- app/inference/predictor.py +0 -1
- app/monitoring/drift.py +10 -6
- reports/evidently/drift_report.html +0 -0
app/api/routes.py
CHANGED
|
@@ -1,6 +1,4 @@
|
|
| 1 |
# app/api/routes.py
|
| 2 |
-
# /predict, /health, /dashboard, /monitoring/run
|
| 3 |
-
|
| 4 |
from fastapi import APIRouter, BackgroundTasks, UploadFile, File, Request, HTTPException
|
| 5 |
from fastapi.responses import JSONResponse
|
| 6 |
from fastapi.templating import Jinja2Templates
|
|
@@ -11,7 +9,7 @@ from app.monitoring.drift import run_drift_check
|
|
| 11 |
from app.monitoring.governance import run_governance_checks
|
| 12 |
|
| 13 |
import pandas as pd
|
| 14 |
-
import numpy as np #
|
| 15 |
|
| 16 |
templates = Jinja2Templates(directory="app/templates")
|
| 17 |
|
|
@@ -21,83 +19,45 @@ predictor = Predictor()
|
|
| 21 |
|
| 22 |
# CSV upload & prediction
|
| 23 |
@router.post("/predict")
|
| 24 |
-
async def predict_file(
|
| 25 |
-
background_tasks: BackgroundTasks,
|
| 26 |
-
file: UploadFile = File(...)
|
| 27 |
-
):
|
| 28 |
df = pd.read_csv(file.file)
|
| 29 |
|
| 30 |
# ---- STRICT MODE: schema enforcement ----
|
| 31 |
missing = set(predictor.features) - set(df.columns)
|
| 32 |
if missing:
|
| 33 |
-
raise HTTPException(
|
| 34 |
-
status_code=400,
|
| 35 |
-
detail=f"Invalid schema. Missing required columns: {sorted(missing)}"
|
| 36 |
-
)
|
| 37 |
|
| 38 |
# ---- Model inference ----
|
| 39 |
preds, probas = predictor.predict(df)
|
| 40 |
-
|
| 41 |
results = []
|
| 42 |
for i, (pred, proba) in enumerate(zip(preds, probas)):
|
| 43 |
results.append({
|
| 44 |
"row": i,
|
| 45 |
"probability": round(float(proba), 4),
|
| 46 |
"prediction": "Default" if pred == 1 else "No Default",
|
| 47 |
-
"risk_level":
|
| 48 |
-
"High" if proba >= 0.75 else
|
| 49 |
-
"Medium" if proba >= 0.5 else
|
| 50 |
-
"Low"
|
| 51 |
-
)
|
| 52 |
})
|
| 53 |
|
| 54 |
# ---- Drift: run once immediately to return chart data ----
|
| 55 |
reference_df = pd.read_csv("models/v1/reference_data.csv")
|
| 56 |
|
| 57 |
-
#
|
| 58 |
_, drift_dict = run_drift_check(df[predictor.features], reference_df[predictor.features], "v1")
|
| 59 |
|
| 60 |
-
#
|
| 61 |
drift_for_chart = []
|
| 62 |
for col, score in drift_dict.items():
|
| 63 |
-
|
| 64 |
-
if isinstance(score, dict):
|
| 65 |
-
numeric_values = []
|
| 66 |
-
|
| 67 |
-
def extract_numbers(d):
|
| 68 |
-
for v in d.values():
|
| 69 |
-
if isinstance(v, (int, float)):
|
| 70 |
-
numeric_values.append(v)
|
| 71 |
-
elif isinstance(v, dict):
|
| 72 |
-
extract_numbers(v)
|
| 73 |
-
elif isinstance(v, (list, np.ndarray)):
|
| 74 |
-
numeric_values.extend([float(x) for x in v if isinstance(x, (int, float))])
|
| 75 |
-
|
| 76 |
-
extract_numbers(score)
|
| 77 |
-
if numeric_values:
|
| 78 |
-
score_value = float(np.mean(numeric_values))
|
| 79 |
-
elif isinstance(score, (list, np.ndarray)):
|
| 80 |
-
score_value = float(np.mean([s for s in score if isinstance(s, (int, float))]))
|
| 81 |
-
elif isinstance(score, (int, float)):
|
| 82 |
score_value = float(score)
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
drift_for_chart.append({"column": col, "score":
|
| 87 |
|
| 88 |
# Schedule full drift in background as before
|
| 89 |
-
background_tasks.add_task(
|
| 90 |
-
run_drift_check,
|
| 91 |
-
df[predictor.features],
|
| 92 |
-
reference_df[predictor.features],
|
| 93 |
-
"v1"
|
| 94 |
-
)
|
| 95 |
|
| 96 |
-
return JSONResponse({
|
| 97 |
-
"n_rows": len(results),
|
| 98 |
-
"results": results,
|
| 99 |
-
"drift": drift_for_chart
|
| 100 |
-
})
|
| 101 |
|
| 102 |
|
| 103 |
# Health
|
|
@@ -111,10 +71,7 @@ def health():
|
|
| 111 |
def run_drift():
|
| 112 |
current_df = load_production_data()
|
| 113 |
report_path = run_drift_check(current_df)
|
| 114 |
-
return {
|
| 115 |
-
"status": "drift_check_completed",
|
| 116 |
-
"report_path": report_path
|
| 117 |
-
}
|
| 118 |
|
| 119 |
|
| 120 |
# Monitoring pipeline
|
|
@@ -123,28 +80,13 @@ def monitoring_run(background_tasks: BackgroundTasks, model_version: str = "v1")
|
|
| 123 |
current_data = pd.read_csv("data/processed/current_data.csv")
|
| 124 |
reference_data = pd.read_csv("data/processed/credit_default_clean.csv")
|
| 125 |
|
| 126 |
-
background_tasks.add_task(
|
| 127 |
-
|
| 128 |
-
current_data[predictor.features],
|
| 129 |
-
reference_data[predictor.features],
|
| 130 |
-
model_version
|
| 131 |
-
)
|
| 132 |
-
background_tasks.add_task(
|
| 133 |
-
run_governance_checks,
|
| 134 |
-
current_data,
|
| 135 |
-
model_version=model_version
|
| 136 |
-
)
|
| 137 |
|
| 138 |
-
return {
|
| 139 |
-
"status": "monitoring triggered",
|
| 140 |
-
"model_version": model_version
|
| 141 |
-
}
|
| 142 |
|
| 143 |
|
| 144 |
# Dashboard
|
| 145 |
@router.get("/dashboard")
|
| 146 |
def dashboard(request: Request):
|
| 147 |
-
return templates.TemplateResponse(
|
| 148 |
-
"dashboard.html",
|
| 149 |
-
{"request": request}
|
| 150 |
-
)
|
|
|
|
| 1 |
# app/api/routes.py
|
|
|
|
|
|
|
| 2 |
from fastapi import APIRouter, BackgroundTasks, UploadFile, File, Request, HTTPException
|
| 3 |
from fastapi.responses import JSONResponse
|
| 4 |
from fastapi.templating import Jinja2Templates
|
|
|
|
| 9 |
from app.monitoring.governance import run_governance_checks
|
| 10 |
|
| 11 |
import pandas as pd
|
| 12 |
+
import numpy as np # for numeric handling
|
| 13 |
|
| 14 |
templates = Jinja2Templates(directory="app/templates")
|
| 15 |
|
|
|
|
| 19 |
|
| 20 |
# CSV upload & prediction
|
| 21 |
@router.post("/predict")
|
| 22 |
+
async def predict_file(background_tasks: BackgroundTasks, file: UploadFile = File(...)):
|
|
|
|
|
|
|
|
|
|
| 23 |
df = pd.read_csv(file.file)
|
| 24 |
|
| 25 |
# ---- STRICT MODE: schema enforcement ----
|
| 26 |
missing = set(predictor.features) - set(df.columns)
|
| 27 |
if missing:
|
| 28 |
+
raise HTTPException(status_code=400, detail=f"Invalid schema. Missing required columns: {sorted(missing)}")
|
|
|
|
|
|
|
|
|
|
| 29 |
|
| 30 |
# ---- Model inference ----
|
| 31 |
preds, probas = predictor.predict(df)
|
|
|
|
| 32 |
results = []
|
| 33 |
for i, (pred, proba) in enumerate(zip(preds, probas)):
|
| 34 |
results.append({
|
| 35 |
"row": i,
|
| 36 |
"probability": round(float(proba), 4),
|
| 37 |
"prediction": "Default" if pred == 1 else "No Default",
|
| 38 |
+
"risk_level": "High" if proba >= 0.75 else "Medium" if proba >= 0.5 else "Low"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
})
|
| 40 |
|
| 41 |
# ---- Drift: run once immediately to return chart data ----
|
| 42 |
reference_df = pd.read_csv("models/v1/reference_data.csv")
|
| 43 |
|
| 44 |
+
# Correctly get numeric drift scores per column
|
| 45 |
_, drift_dict = run_drift_check(df[predictor.features], reference_df[predictor.features], "v1")
|
| 46 |
|
| 47 |
+
# Ensure minimal values for chart and safe numeric handling
|
| 48 |
drift_for_chart = []
|
| 49 |
for col, score in drift_dict.items():
|
| 50 |
+
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
score_value = float(score)
|
| 52 |
+
score_value = max(score_value, 0.01)
|
| 53 |
+
except Exception:
|
| 54 |
+
score_value = 0.01
|
| 55 |
+
drift_for_chart.append({"column": col, "score": score_value})
|
| 56 |
|
| 57 |
# Schedule full drift in background as before
|
| 58 |
+
background_tasks.add_task(run_drift_check, df[predictor.features], reference_df[predictor.features], "v1")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
|
| 60 |
+
return JSONResponse({"n_rows": len(results), "results": results, "drift": drift_for_chart})
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
|
| 62 |
|
| 63 |
# Health
|
|
|
|
| 71 |
def run_drift():
|
| 72 |
current_df = load_production_data()
|
| 73 |
report_path = run_drift_check(current_df)
|
| 74 |
+
return {"status": "drift_check_completed", "report_path": report_path}
|
|
|
|
|
|
|
|
|
|
| 75 |
|
| 76 |
|
| 77 |
# Monitoring pipeline
|
|
|
|
| 80 |
current_data = pd.read_csv("data/processed/current_data.csv")
|
| 81 |
reference_data = pd.read_csv("data/processed/credit_default_clean.csv")
|
| 82 |
|
| 83 |
+
background_tasks.add_task(run_drift_check, current_data[predictor.features], reference_data[predictor.features], model_version)
|
| 84 |
+
background_tasks.add_task(run_governance_checks, current_data, model_version=model_version)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 85 |
|
| 86 |
+
return {"status": "monitoring triggered", "model_version": model_version}
|
|
|
|
|
|
|
|
|
|
| 87 |
|
| 88 |
|
| 89 |
# Dashboard
|
| 90 |
@router.get("/dashboard")
|
| 91 |
def dashboard(request: Request):
|
| 92 |
+
return templates.TemplateResponse("dashboard.html", {"request": request})
|
|
|
|
|
|
|
|
|
app/inference/predictor.py
CHANGED
|
@@ -1,5 +1,4 @@
|
|
| 1 |
# model.predict wrapper
|
| 2 |
-
|
| 3 |
import json
|
| 4 |
import joblib
|
| 5 |
import numpy as np
|
|
|
|
| 1 |
# model.predict wrapper
|
|
|
|
| 2 |
import json
|
| 3 |
import joblib
|
| 4 |
import numpy as np
|
app/monitoring/drift.py
CHANGED
|
@@ -1,5 +1,4 @@
|
|
| 1 |
# Evidently logic
|
| 2 |
-
# app/monitoring/drift.py
|
| 3 |
import os
|
| 4 |
import pandas as pd
|
| 5 |
from evidently.report import Report
|
|
@@ -32,10 +31,15 @@ def run_drift_check(current_data: pd.DataFrame, reference_data: pd.DataFrame, mo
|
|
| 32 |
report.run(current_data=current_data, reference_data=reference_data)
|
| 33 |
report.save_html(REPORT_PATH)
|
| 34 |
|
| 35 |
-
#
|
| 36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
|
| 38 |
-
# Run governance checks
|
| 39 |
-
alerts = governance.check_metrics(
|
| 40 |
|
| 41 |
-
return alerts,
|
|
|
|
| 1 |
# Evidently logic
|
|
|
|
| 2 |
import os
|
| 3 |
import pandas as pd
|
| 4 |
from evidently.report import Report
|
|
|
|
| 31 |
report.run(current_data=current_data, reference_data=reference_data)
|
| 32 |
report.save_html(REPORT_PATH)
|
| 33 |
|
| 34 |
+
# Extract numeric drift scores per column
|
| 35 |
+
report_dict = report.as_dict() if hasattr(report, "as_dict") else {}
|
| 36 |
+
drift_scores = {}
|
| 37 |
+
for metric in report_dict.get("metrics", []):
|
| 38 |
+
if metric["metric"] == "DataDriftMetric":
|
| 39 |
+
for col_name, col_data in metric["result"].get("dataset_drift", {}).items():
|
| 40 |
+
drift_scores[col_name] = col_data.get("drift_score", 0.0)
|
| 41 |
|
| 42 |
+
# Run governance checks (keeps existing alerts)
|
| 43 |
+
alerts = governance.check_metrics(report_dict, model_version=model_version)
|
| 44 |
|
| 45 |
+
return alerts, drift_scores
|
reports/evidently/drift_report.html
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|