Commit ·
8e3bbfe
1
Parent(s): eafdbbf
turning batch into mlops
Browse files- README.md +1 -1
- app/api/routes.py +21 -12
- app/inference/predictor.py +4 -0
- data/production/predictions_log.csv +1 -0
- reports/evidently/drift_report.html +0 -0
- scripts/simulate_inference.py +25 -0
README.md
CHANGED
|
@@ -11,7 +11,7 @@ license: mit
|
|
| 11 |
|
| 12 |
# Under Construction
|
| 13 |
|
| 14 |
-
|
| 15 |
|
| 16 |
py -3.9 -m venv .venv
|
| 17 |
|
|
|
|
| 11 |
|
| 12 |
# Under Construction
|
| 13 |
|
| 14 |
+
Building a production-ready ML inference service with post-deployment drift detection, governance, and alerting, demonstrating real MLOps practices rather than offline modeling.
|
| 15 |
|
| 16 |
py -3.9 -m venv .venv
|
| 17 |
|
app/api/routes.py
CHANGED
|
@@ -9,15 +9,17 @@ from app.monitoring.drift import run_drift_check
|
|
| 9 |
from app.monitoring.governance import run_governance_checks
|
| 10 |
|
| 11 |
import pandas as pd
|
| 12 |
-
import numpy as np
|
|
|
|
| 13 |
|
| 14 |
templates = Jinja2Templates(directory="app/templates")
|
| 15 |
-
|
| 16 |
router = APIRouter()
|
| 17 |
predictor = Predictor()
|
| 18 |
|
|
|
|
|
|
|
|
|
|
| 19 |
|
| 20 |
-
# CSV upload & prediction
|
| 21 |
@router.post("/predict")
|
| 22 |
async def predict_file(background_tasks: BackgroundTasks, file: UploadFile = File(...)):
|
| 23 |
df = pd.read_csv(file.file)
|
|
@@ -38,13 +40,11 @@ async def predict_file(background_tasks: BackgroundTasks, file: UploadFile = Fil
|
|
| 38 |
"risk_level": "High" if proba >= 0.75 else "Medium" if proba >= 0.5 else "Low"
|
| 39 |
})
|
| 40 |
|
| 41 |
-
# ---- Drift:
|
| 42 |
reference_df = pd.read_csv("models/v1/reference_data.csv")
|
| 43 |
-
|
| 44 |
-
# Correctly get numeric drift scores per column
|
| 45 |
_, drift_dict = run_drift_check(df[predictor.features], reference_df[predictor.features], "v1")
|
| 46 |
|
| 47 |
-
#
|
| 48 |
drift_for_chart = []
|
| 49 |
for col, score in drift_dict.items():
|
| 50 |
try:
|
|
@@ -55,19 +55,30 @@ async def predict_file(background_tasks: BackgroundTasks, file: UploadFile = Fil
|
|
| 55 |
score_value = 0.0
|
| 56 |
drift_for_chart.append({"column": col, "score": score_value})
|
| 57 |
|
| 58 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
background_tasks.add_task(run_drift_check, df[predictor.features], reference_df[predictor.features], "v1")
|
| 60 |
|
| 61 |
return JSONResponse({"n_rows": len(results), "results": results, "drift": drift_for_chart})
|
| 62 |
|
| 63 |
|
| 64 |
-
# Health
|
| 65 |
@router.get("/health")
|
| 66 |
def health():
|
| 67 |
return {"status": "ok"}
|
| 68 |
|
| 69 |
|
| 70 |
-
# Manual drift run
|
| 71 |
@router.get("/run-drift")
|
| 72 |
def run_drift():
|
| 73 |
current_df = load_production_data()
|
|
@@ -75,7 +86,6 @@ def run_drift():
|
|
| 75 |
return {"status": "drift_check_completed", "report_path": report_path}
|
| 76 |
|
| 77 |
|
| 78 |
-
# Monitoring pipeline
|
| 79 |
@router.get("/monitoring/run")
|
| 80 |
def monitoring_run(background_tasks: BackgroundTasks, model_version: str = "v1"):
|
| 81 |
current_data = pd.read_csv("data/processed/current_data.csv")
|
|
@@ -87,7 +97,6 @@ def monitoring_run(background_tasks: BackgroundTasks, model_version: str = "v1")
|
|
| 87 |
return {"status": "monitoring triggered", "model_version": model_version}
|
| 88 |
|
| 89 |
|
| 90 |
-
# Dashboard
|
| 91 |
@router.get("/dashboard")
|
| 92 |
def dashboard(request: Request):
|
| 93 |
return templates.TemplateResponse("dashboard.html", {"request": request})
|
|
|
|
| 9 |
from app.monitoring.governance import run_governance_checks
|
| 10 |
|
| 11 |
import pandas as pd
|
| 12 |
+
import numpy as np
|
| 13 |
+
import os
|
| 14 |
|
| 15 |
templates = Jinja2Templates(directory="app/templates")
|
|
|
|
| 16 |
router = APIRouter()
|
| 17 |
predictor = Predictor()
|
| 18 |
|
| 19 |
+
# Production log file
|
| 20 |
+
PROD_LOG = "data/production/predictions_log.csv"
|
| 21 |
+
|
| 22 |
|
|
|
|
| 23 |
@router.post("/predict")
|
| 24 |
async def predict_file(background_tasks: BackgroundTasks, file: UploadFile = File(...)):
|
| 25 |
df = pd.read_csv(file.file)
|
|
|
|
| 40 |
"risk_level": "High" if proba >= 0.75 else "Medium" if proba >= 0.5 else "Low"
|
| 41 |
})
|
| 42 |
|
| 43 |
+
# ---- Drift: immediate for frontend ----
|
| 44 |
reference_df = pd.read_csv("models/v1/reference_data.csv")
|
|
|
|
|
|
|
| 45 |
_, drift_dict = run_drift_check(df[predictor.features], reference_df[predictor.features], "v1")
|
| 46 |
|
| 47 |
+
# Safe numeric drift values for chart
|
| 48 |
drift_for_chart = []
|
| 49 |
for col, score in drift_dict.items():
|
| 50 |
try:
|
|
|
|
| 55 |
score_value = 0.0
|
| 56 |
drift_for_chart.append({"column": col, "score": score_value})
|
| 57 |
|
| 58 |
+
# ---- Append predictions to production log ----
|
| 59 |
+
df_log = df.copy()
|
| 60 |
+
df_log["prediction"] = preds
|
| 61 |
+
df_log["probability"] = probas
|
| 62 |
+
df_log["model_version"] = predictor.model_version
|
| 63 |
+
df_log["timestamp"] = pd.Timestamp.utcnow()
|
| 64 |
+
|
| 65 |
+
os.makedirs(os.path.dirname(PROD_LOG), exist_ok=True)
|
| 66 |
+
if not os.path.exists(PROD_LOG):
|
| 67 |
+
df_log.to_csv(PROD_LOG, index=False)
|
| 68 |
+
else:
|
| 69 |
+
df_log.to_csv(PROD_LOG, mode="a", header=False, index=False)
|
| 70 |
+
|
| 71 |
+
# ---- Background full drift check ----
|
| 72 |
background_tasks.add_task(run_drift_check, df[predictor.features], reference_df[predictor.features], "v1")
|
| 73 |
|
| 74 |
return JSONResponse({"n_rows": len(results), "results": results, "drift": drift_for_chart})
|
| 75 |
|
| 76 |
|
|
|
|
| 77 |
@router.get("/health")
|
| 78 |
def health():
|
| 79 |
return {"status": "ok"}
|
| 80 |
|
| 81 |
|
|
|
|
| 82 |
@router.get("/run-drift")
|
| 83 |
def run_drift():
|
| 84 |
current_df = load_production_data()
|
|
|
|
| 86 |
return {"status": "drift_check_completed", "report_path": report_path}
|
| 87 |
|
| 88 |
|
|
|
|
| 89 |
@router.get("/monitoring/run")
|
| 90 |
def monitoring_run(background_tasks: BackgroundTasks, model_version: str = "v1"):
|
| 91 |
current_data = pd.read_csv("data/processed/current_data.csv")
|
|
|
|
| 97 |
return {"status": "monitoring triggered", "model_version": model_version}
|
| 98 |
|
| 99 |
|
|
|
|
| 100 |
@router.get("/dashboard")
|
| 101 |
def dashboard(request: Request):
|
| 102 |
return templates.TemplateResponse("dashboard.html", {"request": request})
|
app/inference/predictor.py
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
|
|
|
|
|
| 1 |
# model.predict wrapper
|
| 2 |
import json
|
| 3 |
import joblib
|
|
@@ -11,6 +13,8 @@ class Predictor:
|
|
| 11 |
with open(FEATURES_PATH, "r") as f:
|
| 12 |
self.features = json.load(f)
|
| 13 |
|
|
|
|
|
|
|
| 14 |
def predict(self, df):
|
| 15 |
X = df[self.features]
|
| 16 |
probas = self.model.predict_proba(X)[:, 1]
|
|
|
|
| 1 |
+
# app/inference/predictor.py
|
| 2 |
+
|
| 3 |
# model.predict wrapper
|
| 4 |
import json
|
| 5 |
import joblib
|
|
|
|
| 13 |
with open(FEATURES_PATH, "r") as f:
|
| 14 |
self.features = json.load(f)
|
| 15 |
|
| 16 |
+
self.model_version = "v1"
|
| 17 |
+
|
| 18 |
def predict(self, df):
|
| 19 |
X = df[self.features]
|
| 20 |
probas = self.model.predict_proba(X)[:, 1]
|
data/production/predictions_log.csv
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
50000.0,22,0,0,45458.0,46450.0,2051.0,2200.0,1,0,0.20854088888292008,v1,2026-01-14 13:41:57.526400+00:00
|
reports/evidently/drift_report.html
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
scripts/simulate_inference.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# scripts/simulate_inference.py
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import requests
|
| 4 |
+
import random
|
| 5 |
+
import time
|
| 6 |
+
|
| 7 |
+
df = pd.read_csv("data/processed/current_data.csv")
|
| 8 |
+
|
| 9 |
+
# Sample 1-5 rows randomly
|
| 10 |
+
sample = df.sample(random.randint(1,5))
|
| 11 |
+
csv_bytes = sample.to_csv(index=False).encode("utf-8")
|
| 12 |
+
|
| 13 |
+
# POST to FastAPI predict endpoint
|
| 14 |
+
response = requests.post(
|
| 15 |
+
"http://localhost:8000/predict",
|
| 16 |
+
files={"file": ("sample.csv", csv_bytes, "text/csv")}
|
| 17 |
+
)
|
| 18 |
+
|
| 19 |
+
print("Status:", response.status_code)
|
| 20 |
+
try:
|
| 21 |
+
print("Response:", response.json())
|
| 22 |
+
except Exception:
|
| 23 |
+
print("Server returned non-JSON response:")
|
| 24 |
+
print(response.text)
|
| 25 |
+
|