Commit ·
61ee9c4
1
Parent(s): 79b961c
auto drift
Browse files- app/api/background_drift.py +40 -26
- app/api/routes.py +1 -0
- app/templates/dashboard.html +34 -48
- data/production/predictions_log.csv +0 -0
- reports/evidently/drift_report.html +0 -0
- reports/evidently/drift_report.json +0 -0
app/api/background_drift.py
CHANGED
|
@@ -3,55 +3,69 @@ import asyncio
|
|
| 3 |
import pandas as pd
|
| 4 |
import os
|
| 5 |
import json
|
| 6 |
-
import numpy as np
|
| 7 |
|
| 8 |
from app.monitoring.drift import run_drift_check
|
| 9 |
from app.inference.predictor import Predictor
|
| 10 |
|
| 11 |
predictor = Predictor()
|
|
|
|
| 12 |
REFERENCE_PATH = "models/v1/reference_data.csv"
|
| 13 |
-
|
| 14 |
DASHBOARD_JSON = "reports/evidently/drift_report.json"
|
| 15 |
|
| 16 |
-
#
|
|
|
|
|
|
|
| 17 |
os.makedirs(os.path.dirname(DASHBOARD_JSON), exist_ok=True)
|
| 18 |
-
if not os.path.exists(DASHBOARD_JSON):
|
| 19 |
-
with open(DASHBOARD_JSON, "w") as f:
|
| 20 |
-
json.dump({"n_rows": 0, "results": [], "drift": [{"column": feat, "score": 0.0} for feat in predictor.features]}, f, indent=2)
|
| 21 |
|
| 22 |
-
|
|
|
|
| 23 |
"""
|
| 24 |
-
Continuously
|
| 25 |
"""
|
| 26 |
while True:
|
| 27 |
try:
|
| 28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
reference_df = pd.read_csv(REFERENCE_PATH)
|
| 30 |
|
| 31 |
_, drift_dict = run_drift_check(
|
| 32 |
-
|
| 33 |
reference_df[predictor.features],
|
| 34 |
-
"v1"
|
| 35 |
)
|
| 36 |
|
| 37 |
-
# Ensure numeric safe drift values
|
| 38 |
-
drift_for_chart = []
|
| 39 |
-
for col, score in drift_dict.items():
|
| 40 |
-
try:
|
| 41 |
-
val = float(score)
|
| 42 |
-
if not np.isfinite(val):
|
| 43 |
-
val = 0.0
|
| 44 |
-
except Exception:
|
| 45 |
-
val = 0.0
|
| 46 |
-
drift_for_chart.append({"column": col, "score": val})
|
| 47 |
-
|
| 48 |
dashboard_payload = {
|
| 49 |
-
"n_rows": len(
|
| 50 |
-
"results": [],
|
| 51 |
-
"drift":
|
|
|
|
|
|
|
|
|
|
| 52 |
}
|
| 53 |
|
| 54 |
-
# Atomic write
|
| 55 |
tmp_path = DASHBOARD_JSON + ".tmp"
|
| 56 |
with open(tmp_path, "w") as f:
|
| 57 |
json.dump(dashboard_payload, f, indent=2)
|
|
|
|
| 3 |
import pandas as pd
|
| 4 |
import os
|
| 5 |
import json
|
|
|
|
| 6 |
|
| 7 |
from app.monitoring.drift import run_drift_check
|
| 8 |
from app.inference.predictor import Predictor
|
| 9 |
|
| 10 |
predictor = Predictor()
|
| 11 |
+
|
| 12 |
REFERENCE_PATH = "models/v1/reference_data.csv"
|
| 13 |
+
PROD_LOG_PATH = "data/production/predictions_log.csv"
|
| 14 |
DASHBOARD_JSON = "reports/evidently/drift_report.json"
|
| 15 |
|
| 16 |
+
# Retention policy (VERY IMPORTANT for HF Spaces)
|
| 17 |
+
MAX_ROWS = 5000 # rolling window
|
| 18 |
+
|
| 19 |
os.makedirs(os.path.dirname(DASHBOARD_JSON), exist_ok=True)
|
|
|
|
|
|
|
|
|
|
| 20 |
|
| 21 |
+
|
| 22 |
+
async def drift_loop(interval_seconds: int = 10):
|
| 23 |
"""
|
| 24 |
+
Continuously compute drift from production inference data.
|
| 25 |
"""
|
| 26 |
while True:
|
| 27 |
try:
|
| 28 |
+
if not os.path.exists(PROD_LOG_PATH):
|
| 29 |
+
await asyncio.sleep(interval_seconds)
|
| 30 |
+
continue
|
| 31 |
+
|
| 32 |
+
prod_df = pd.read_csv(PROD_LOG_PATH)
|
| 33 |
+
|
| 34 |
+
# ---- Retention window (prevents infinite growth) ----
|
| 35 |
+
if len(prod_df) > MAX_ROWS:
|
| 36 |
+
prod_df = prod_df.tail(MAX_ROWS)
|
| 37 |
+
prod_df.to_csv(PROD_LOG_PATH, index=False)
|
| 38 |
+
|
| 39 |
+
# ---- Keep only rows with all required features ----
|
| 40 |
+
missing_features = set(predictor.features) - set(prod_df.columns)
|
| 41 |
+
if missing_features:
|
| 42 |
+
print(f"Skipping drift check, missing features: {missing_features}")
|
| 43 |
+
await asyncio.sleep(interval_seconds)
|
| 44 |
+
continue
|
| 45 |
+
|
| 46 |
+
prod_df = prod_df.dropna(subset=predictor.features)
|
| 47 |
+
if prod_df.empty:
|
| 48 |
+
await asyncio.sleep(interval_seconds)
|
| 49 |
+
continue
|
| 50 |
+
|
| 51 |
reference_df = pd.read_csv(REFERENCE_PATH)
|
| 52 |
|
| 53 |
_, drift_dict = run_drift_check(
|
| 54 |
+
prod_df[predictor.features],
|
| 55 |
reference_df[predictor.features],
|
| 56 |
+
model_version="v1",
|
| 57 |
)
|
| 58 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
dashboard_payload = {
|
| 60 |
+
"n_rows": len(prod_df),
|
| 61 |
+
"results": [],
|
| 62 |
+
"drift": [
|
| 63 |
+
{"column": col, "score": float(score)}
|
| 64 |
+
for col, score in drift_dict.items()
|
| 65 |
+
],
|
| 66 |
}
|
| 67 |
|
| 68 |
+
# Atomic write (prevents frontend race conditions)
|
| 69 |
tmp_path = DASHBOARD_JSON + ".tmp"
|
| 70 |
with open(tmp_path, "w") as f:
|
| 71 |
json.dump(dashboard_payload, f, indent=2)
|
app/api/routes.py
CHANGED
|
@@ -60,6 +60,7 @@ async def predict_file(background_tasks: BackgroundTasks, file: UploadFile = Fil
|
|
| 60 |
df_log = df.copy()
|
| 61 |
df_log["prediction"] = preds
|
| 62 |
df_log["probability"] = probas
|
|
|
|
| 63 |
df_log["model_version"] = predictor.model_version
|
| 64 |
df_log["timestamp"] = pd.Timestamp.utcnow()
|
| 65 |
|
|
|
|
| 60 |
df_log = df.copy()
|
| 61 |
df_log["prediction"] = preds
|
| 62 |
df_log["probability"] = probas
|
| 63 |
+
df_log["risk_level"] = ["High" if p >= 0.75 else "Medium" if p >= 0.5 else "Low" for p in probas]
|
| 64 |
df_log["model_version"] = predictor.model_version
|
| 65 |
df_log["timestamp"] = pd.Timestamp.utcnow()
|
| 66 |
|
app/templates/dashboard.html
CHANGED
|
@@ -14,65 +14,34 @@
|
|
| 14 |
<button type="submit">Run Prediction</button>
|
| 15 |
</form>
|
| 16 |
|
| 17 |
-
<h2>Predictions</h2>
|
| 18 |
<div id="predictions"></div>
|
| 19 |
|
| 20 |
<h2>Drift Metrics</h2>
|
| 21 |
<div id="drift-chart"></div>
|
| 22 |
|
| 23 |
-
<script>
|
| 24 |
-
async function fetchResults(csvFile) {
|
| 25 |
-
const formData = new FormData();
|
| 26 |
-
formData.append("file", csvFile);
|
| 27 |
-
|
| 28 |
-
const response = await fetch("/predict", { method: "POST", body: formData });
|
| 29 |
-
const data = await response.json();
|
| 30 |
-
|
| 31 |
-
document.getElementById("predictions").innerHTML =
|
| 32 |
-
`<pre>${JSON.stringify(data.results, null, 2)}</pre>`;
|
| 33 |
-
|
| 34 |
-
const driftContainer = document.getElementById("drift-chart");
|
| 35 |
-
driftContainer.innerHTML = "";
|
| 36 |
-
|
| 37 |
-
if (Array.isArray(data.drift)) {
|
| 38 |
-
const cols = data.drift.map(d => d.column);
|
| 39 |
-
const scores = data.drift.map(d => {
|
| 40 |
-
let val = Number(d.score);
|
| 41 |
-
if (!Number.isFinite(val)) val = 0;
|
| 42 |
-
return val;
|
| 43 |
-
});
|
| 44 |
-
|
| 45 |
-
Plotly.newPlot(driftContainer, [{
|
| 46 |
-
x: cols,
|
| 47 |
-
y: scores,
|
| 48 |
-
type: "bar"
|
| 49 |
-
}]);
|
| 50 |
-
} else {
|
| 51 |
-
driftContainer.innerHTML =
|
| 52 |
-
"<p>Drift report scheduled. Open the Evidently HTML report.</p>";
|
| 53 |
-
}
|
| 54 |
-
}
|
| 55 |
-
|
| 56 |
-
document.getElementById("upload-form").addEventListener("submit", async (e) => {
|
| 57 |
-
e.preventDefault();
|
| 58 |
-
const fileInput = e.target.file.files[0];
|
| 59 |
-
if (fileInput) {
|
| 60 |
-
await fetchResults(fileInput);
|
| 61 |
-
}
|
| 62 |
-
});
|
| 63 |
-
</script>
|
| 64 |
-
<div id="drift-chart"></div>
|
| 65 |
-
|
| 66 |
-
<script src="https://cdn.plot.ly/plotly-latest.min.js"></script>
|
| 67 |
<script>
|
| 68 |
async function fetchDashboardData() {
|
| 69 |
try {
|
| 70 |
const resp = await fetch("/dashboard/data");
|
| 71 |
const json = await resp.json();
|
|
|
|
| 72 |
if(json.status === "ok") {
|
| 73 |
-
const
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
const columns = driftData.map(d => d.column);
|
| 75 |
-
const scores = driftData.map(d => d.score);
|
| 76 |
|
| 77 |
const trace = {
|
| 78 |
x: columns,
|
|
@@ -87,10 +56,12 @@ async function fetchDashboardData() {
|
|
| 87 |
xaxis: {title: 'Column'}
|
| 88 |
};
|
| 89 |
|
| 90 |
-
Plotly.newPlot(
|
|
|
|
| 91 |
} else {
|
| 92 |
console.warn("Dashboard data not available:", json.message);
|
| 93 |
}
|
|
|
|
| 94 |
} catch(err) {
|
| 95 |
console.error("Failed to fetch dashboard data:", err);
|
| 96 |
}
|
|
@@ -99,6 +70,21 @@ async function fetchDashboardData() {
|
|
| 99 |
// Fetch every 10 seconds
|
| 100 |
fetchDashboardData();
|
| 101 |
setInterval(fetchDashboardData, 10000);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
</script>
|
| 103 |
</body>
|
| 104 |
</html>
|
|
|
|
| 14 |
<button type="submit">Run Prediction</button>
|
| 15 |
</form>
|
| 16 |
|
| 17 |
+
<h2>Recent Predictions</h2>
|
| 18 |
<div id="predictions"></div>
|
| 19 |
|
| 20 |
<h2>Drift Metrics</h2>
|
| 21 |
<div id="drift-chart"></div>
|
| 22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
<script>
|
| 24 |
async function fetchDashboardData() {
|
| 25 |
try {
|
| 26 |
const resp = await fetch("/dashboard/data");
|
| 27 |
const json = await resp.json();
|
| 28 |
+
|
| 29 |
if(json.status === "ok") {
|
| 30 |
+
const data = json.data;
|
| 31 |
+
|
| 32 |
+
// Update predictions table
|
| 33 |
+
const predDiv = document.getElementById("predictions");
|
| 34 |
+
if(Array.isArray(data.results) && data.results.length > 0){
|
| 35 |
+
predDiv.innerHTML = "<pre>" + JSON.stringify(data.results, null, 2) + "</pre>";
|
| 36 |
+
} else {
|
| 37 |
+
predDiv.innerHTML = "<p>No recent predictions.</p>";
|
| 38 |
+
}
|
| 39 |
+
|
| 40 |
+
// Update drift chart
|
| 41 |
+
const driftContainer = document.getElementById("drift-chart");
|
| 42 |
+
const driftData = data.drift || [];
|
| 43 |
const columns = driftData.map(d => d.column);
|
| 44 |
+
const scores = driftData.map(d => Number(d.score));
|
| 45 |
|
| 46 |
const trace = {
|
| 47 |
x: columns,
|
|
|
|
| 56 |
xaxis: {title: 'Column'}
|
| 57 |
};
|
| 58 |
|
| 59 |
+
Plotly.newPlot(driftContainer, [trace], layout);
|
| 60 |
+
|
| 61 |
} else {
|
| 62 |
console.warn("Dashboard data not available:", json.message);
|
| 63 |
}
|
| 64 |
+
|
| 65 |
} catch(err) {
|
| 66 |
console.error("Failed to fetch dashboard data:", err);
|
| 67 |
}
|
|
|
|
| 70 |
// Fetch every 10 seconds
|
| 71 |
fetchDashboardData();
|
| 72 |
setInterval(fetchDashboardData, 10000);
|
| 73 |
+
|
| 74 |
+
// File upload handler
|
| 75 |
+
document.getElementById("upload-form").addEventListener("submit", async (e) => {
|
| 76 |
+
e.preventDefault();
|
| 77 |
+
const fileInput = e.target.file.files[0];
|
| 78 |
+
if (fileInput) {
|
| 79 |
+
const formData = new FormData();
|
| 80 |
+
formData.append("file", fileInput);
|
| 81 |
+
|
| 82 |
+
const response = await fetch("/predict", { method: "POST", body: formData });
|
| 83 |
+
const data = await response.json();
|
| 84 |
+
document.getElementById("predictions").innerHTML =
|
| 85 |
+
`<pre>${JSON.stringify(data.results, null, 2)}</pre>`;
|
| 86 |
+
}
|
| 87 |
+
});
|
| 88 |
</script>
|
| 89 |
</body>
|
| 90 |
</html>
|
data/production/predictions_log.csv
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
reports/evidently/drift_report.html
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
reports/evidently/drift_report.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|