Commit
Β·
ce96dae
1
Parent(s):
b1725f1
working on the daemon
Browse files- README.md +61 -58
- app/api/background_drift.py +16 -11
- app/api/traffic_daemon.py +17 -18
- app/monitoring/governance.py +3 -0
- data/production/predictions_log.csv +0 -9
- reports/evidently/drift_report.html +0 -0
- reports/evidently/drift_report.json +9 -9
README.md
CHANGED
|
@@ -26,61 +26,64 @@ uvicorn app.main:app --reload
|
|
| 26 |
# Repo Structure
|
| 27 |
|
| 28 |
ml-inference-drift-service/
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
# Repo Structure
|
| 27 |
|
| 28 |
ml-inference-drift-service/
|
| 29 |
+
Dockerfile
|
| 30 |
+
LICENSE
|
| 31 |
+
README.md
|
| 32 |
+
requirements-dev.txt
|
| 33 |
+
requirements.txt
|
| 34 |
+
app/
|
| 35 |
+
main.py
|
| 36 |
+
api/
|
| 37 |
+
background_drift.py
|
| 38 |
+
dashboard_data.py
|
| 39 |
+
routes.py
|
| 40 |
+
schemas.py
|
| 41 |
+
traffic_daemon.py
|
| 42 |
+
core/
|
| 43 |
+
config.py
|
| 44 |
+
logging.py
|
| 45 |
+
templates.py
|
| 46 |
+
inference/
|
| 47 |
+
predictor.py
|
| 48 |
+
monitoring/
|
| 49 |
+
data_loader.py
|
| 50 |
+
drift.py
|
| 51 |
+
governance.py
|
| 52 |
+
static/
|
| 53 |
+
styles.css
|
| 54 |
+
templates/
|
| 55 |
+
dashboard.html
|
| 56 |
+
utils/
|
| 57 |
+
alerts.py
|
| 58 |
+
data/
|
| 59 |
+
processed/
|
| 60 |
+
credit_default_clean.csv
|
| 61 |
+
current_data.csv
|
| 62 |
+
production/
|
| 63 |
+
predictions_log.csv
|
| 64 |
+
raw/
|
| 65 |
+
credit_default.csv
|
| 66 |
+
database/
|
| 67 |
+
logs/
|
| 68 |
+
models/
|
| 69 |
+
v1/
|
| 70 |
+
features.json
|
| 71 |
+
reference_data.csv
|
| 72 |
+
v2/
|
| 73 |
+
reports/
|
| 74 |
+
evidently/
|
| 75 |
+
drift_report.html
|
| 76 |
+
drift_report.json
|
| 77 |
+
scripts/
|
| 78 |
+
prepare_data.py
|
| 79 |
+
simulate_inference.py
|
| 80 |
+
train.py
|
| 81 |
+
tests/
|
| 82 |
+
conftest.py
|
| 83 |
+
integration/
|
| 84 |
+
test_api.py
|
| 85 |
+
test_governance.json
|
| 86 |
+
test_governance.py
|
| 87 |
+
test_run_drift.py
|
| 88 |
+
unit/
|
| 89 |
+
test_schemas.py
|
app/api/background_drift.py
CHANGED
|
@@ -13,7 +13,7 @@ REFERENCE_PATH = "models/v1/reference_data.csv"
|
|
| 13 |
PROD_LOG_PATH = "data/production/predictions_log.csv"
|
| 14 |
DASHBOARD_JSON = "reports/evidently/drift_report.json"
|
| 15 |
|
| 16 |
-
MAX_ROWS = 5000
|
| 17 |
os.makedirs(os.path.dirname(DASHBOARD_JSON), exist_ok=True)
|
| 18 |
|
| 19 |
async def drift_loop(interval_seconds: int = 10):
|
|
@@ -25,15 +25,12 @@ async def drift_loop(interval_seconds: int = 10):
|
|
| 25 |
|
| 26 |
prod_df = pd.read_csv(PROD_LOG_PATH)
|
| 27 |
|
| 28 |
-
# Retention window
|
| 29 |
if len(prod_df) > MAX_ROWS:
|
| 30 |
prod_df = prod_df.tail(MAX_ROWS)
|
| 31 |
prod_df.to_csv(PROD_LOG_PATH, index=False)
|
| 32 |
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
if missing_features:
|
| 36 |
-
print(f"Skipping drift check, missing features: {missing_features}")
|
| 37 |
await asyncio.sleep(interval_seconds)
|
| 38 |
continue
|
| 39 |
|
|
@@ -44,26 +41,34 @@ async def drift_loop(interval_seconds: int = 10):
|
|
| 44 |
|
| 45 |
reference_df = pd.read_csv(REFERENCE_PATH)
|
| 46 |
|
| 47 |
-
# ---- Run drift on features only ----
|
| 48 |
_, drift_dict = run_drift_check(
|
| 49 |
prod_df[predictor.features],
|
| 50 |
reference_df[predictor.features],
|
| 51 |
model_version="v1"
|
| 52 |
)
|
| 53 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
dashboard_payload = {
|
| 55 |
"n_rows": len(prod_df),
|
| 56 |
-
"results":
|
| 57 |
"drift": [
|
| 58 |
{"column": col, "score": float(score)}
|
| 59 |
for col, score in drift_dict.items()
|
| 60 |
],
|
| 61 |
}
|
| 62 |
|
| 63 |
-
|
| 64 |
-
with open(
|
| 65 |
json.dump(dashboard_payload, f, indent=2)
|
| 66 |
-
os.replace(
|
| 67 |
|
| 68 |
except Exception as e:
|
| 69 |
print("Drift loop error:", e)
|
|
|
|
| 13 |
PROD_LOG_PATH = "data/production/predictions_log.csv"
|
| 14 |
DASHBOARD_JSON = "reports/evidently/drift_report.json"
|
| 15 |
|
| 16 |
+
MAX_ROWS = 5000
|
| 17 |
os.makedirs(os.path.dirname(DASHBOARD_JSON), exist_ok=True)
|
| 18 |
|
| 19 |
async def drift_loop(interval_seconds: int = 10):
|
|
|
|
| 25 |
|
| 26 |
prod_df = pd.read_csv(PROD_LOG_PATH)
|
| 27 |
|
|
|
|
| 28 |
if len(prod_df) > MAX_ROWS:
|
| 29 |
prod_df = prod_df.tail(MAX_ROWS)
|
| 30 |
prod_df.to_csv(PROD_LOG_PATH, index=False)
|
| 31 |
|
| 32 |
+
missing = set(predictor.features) - set(prod_df.columns)
|
| 33 |
+
if missing:
|
|
|
|
|
|
|
| 34 |
await asyncio.sleep(interval_seconds)
|
| 35 |
continue
|
| 36 |
|
|
|
|
| 41 |
|
| 42 |
reference_df = pd.read_csv(REFERENCE_PATH)
|
| 43 |
|
|
|
|
| 44 |
_, drift_dict = run_drift_check(
|
| 45 |
prod_df[predictor.features],
|
| 46 |
reference_df[predictor.features],
|
| 47 |
model_version="v1"
|
| 48 |
)
|
| 49 |
|
| 50 |
+
# ---- RECENT PREDICTIONS FIX ----
|
| 51 |
+
recent_results = []
|
| 52 |
+
if "prediction" in prod_df.columns:
|
| 53 |
+
recent_results = (
|
| 54 |
+
prod_df[["prediction"]]
|
| 55 |
+
.tail(10)
|
| 56 |
+
.to_dict(orient="records")
|
| 57 |
+
)
|
| 58 |
+
|
| 59 |
dashboard_payload = {
|
| 60 |
"n_rows": len(prod_df),
|
| 61 |
+
"results": recent_results,
|
| 62 |
"drift": [
|
| 63 |
{"column": col, "score": float(score)}
|
| 64 |
for col, score in drift_dict.items()
|
| 65 |
],
|
| 66 |
}
|
| 67 |
|
| 68 |
+
tmp = DASHBOARD_JSON + ".tmp"
|
| 69 |
+
with open(tmp, "w") as f:
|
| 70 |
json.dump(dashboard_payload, f, indent=2)
|
| 71 |
+
os.replace(tmp, DASHBOARD_JSON)
|
| 72 |
|
| 73 |
except Exception as e:
|
| 74 |
print("Drift loop error:", e)
|
app/api/traffic_daemon.py
CHANGED
|
@@ -2,8 +2,8 @@
|
|
| 2 |
import asyncio
|
| 3 |
import pandas as pd
|
| 4 |
import random
|
| 5 |
-
import requests
|
| 6 |
import os
|
|
|
|
| 7 |
|
| 8 |
API_URL = "http://localhost:8000/predict"
|
| 9 |
SOURCE_DATA = "data/processed/current_data.csv"
|
|
@@ -12,7 +12,7 @@ MIN_SLEEP = 2
|
|
| 12 |
MAX_SLEEP = 8
|
| 13 |
MIN_BATCH = 1
|
| 14 |
MAX_BATCH = 5
|
| 15 |
-
STARTUP_DELAY =
|
| 16 |
|
| 17 |
async def traffic_loop():
|
| 18 |
await asyncio.sleep(STARTUP_DELAY)
|
|
@@ -24,23 +24,22 @@ async def traffic_loop():
|
|
| 24 |
df = pd.read_csv(SOURCE_DATA)
|
| 25 |
print("Traffic daemon started.")
|
| 26 |
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
|
|
|
| 32 |
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
timeout=60,
|
| 38 |
-
)
|
| 39 |
|
| 40 |
-
|
| 41 |
-
|
| 42 |
|
| 43 |
-
|
| 44 |
-
|
| 45 |
|
| 46 |
-
|
|
|
|
| 2 |
import asyncio
|
| 3 |
import pandas as pd
|
| 4 |
import random
|
|
|
|
| 5 |
import os
|
| 6 |
+
import httpx
|
| 7 |
|
| 8 |
API_URL = "http://localhost:8000/predict"
|
| 9 |
SOURCE_DATA = "data/processed/current_data.csv"
|
|
|
|
| 12 |
MAX_SLEEP = 8
|
| 13 |
MIN_BATCH = 1
|
| 14 |
MAX_BATCH = 5
|
| 15 |
+
STARTUP_DELAY = 7
|
| 16 |
|
| 17 |
async def traffic_loop():
|
| 18 |
await asyncio.sleep(STARTUP_DELAY)
|
|
|
|
| 24 |
df = pd.read_csv(SOURCE_DATA)
|
| 25 |
print("Traffic daemon started.")
|
| 26 |
|
| 27 |
+
async with httpx.AsyncClient(timeout=60.0) as client:
|
| 28 |
+
while True:
|
| 29 |
+
try:
|
| 30 |
+
batch_size = random.randint(MIN_BATCH, MAX_BATCH)
|
| 31 |
+
sample = df.sample(batch_size)
|
| 32 |
+
csv_bytes = sample.to_csv(index=False).encode("utf-8")
|
| 33 |
|
| 34 |
+
resp = await client.post(
|
| 35 |
+
API_URL,
|
| 36 |
+
files={"file": ("sample.csv", csv_bytes, "text/csv")}
|
| 37 |
+
)
|
|
|
|
|
|
|
| 38 |
|
| 39 |
+
if resp.status_code != 200:
|
| 40 |
+
print("Traffic daemon warning:", resp.status_code)
|
| 41 |
|
| 42 |
+
except Exception as e:
|
| 43 |
+
print("Traffic daemon error:", e)
|
| 44 |
|
| 45 |
+
await asyncio.sleep(random.uniform(MIN_SLEEP, MAX_SLEEP))
|
app/monitoring/governance.py
CHANGED
|
@@ -84,8 +84,11 @@ class Governance:
|
|
| 84 |
# Log and send alerts
|
| 85 |
for alert in alerts:
|
| 86 |
self.log_alert(alert, model_version)
|
|
|
|
| 87 |
send_email_alert(alert)
|
| 88 |
send_slack_alert(alert)
|
|
|
|
|
|
|
| 89 |
|
| 90 |
return alerts
|
| 91 |
|
|
|
|
| 84 |
# Log and send alerts
|
| 85 |
for alert in alerts:
|
| 86 |
self.log_alert(alert, model_version)
|
| 87 |
+
try:
|
| 88 |
send_email_alert(alert)
|
| 89 |
send_slack_alert(alert)
|
| 90 |
+
except Exception:
|
| 91 |
+
pass
|
| 92 |
|
| 93 |
return alerts
|
| 94 |
|
data/production/predictions_log.csv
DELETED
|
@@ -1,9 +0,0 @@
|
|
| 1 |
-
credit_limit,age,pay_delay_sep,pay_delay_aug,bill_amt_sep,bill_amt_aug,pay_amt_sep,pay_amt_aug,target,model_prediction,model_probability,model_risk_level,model_version,timestamp
|
| 2 |
-
70000.0,48,0,0,20744.0,22093.0,2000.0,2000.0,0,0,0.2563046708563498,Low,v1,2026-01-14 19:33:08.201187+00:00
|
| 3 |
-
390000.0,42,0,0,310075.0,184647.0,10021.0,4000.0,0,0,0.07238720996682343,Low,v1,2026-01-14 19:33:08.201187+00:00
|
| 4 |
-
230000.0,50,-2,-2,2789.0,2942.0,2942.0,2520.0,0,0,0.06061841289885345,Low,v1,2026-01-14 19:33:08.201187+00:00
|
| 5 |
-
40000.0,47,2,2,52358.0,54892.0,4000.0,0.0,1,1,0.608097803732095,Medium,v1,2026-01-14 19:35:59.632246+00:00
|
| 6 |
-
140000.0,41,0,0,130138.0,132726.0,4756.0,4912.0,1,0,0.19300589506044843,Low,v1,2026-01-14 19:35:59.632246+00:00
|
| 7 |
-
50000.0,26,-1,-1,5052.0,0.0,0.0,0.0,0,0,0.12125611010883464,Low,v1,2026-01-14 19:37:40.778834+00:00
|
| 8 |
-
400000.0,42,-1,-1,44198.0,10132.0,10132.0,16932.0,0,0,0.06632404342712281,Low,v1,2026-01-14 19:39:17.748870+00:00
|
| 9 |
-
280000.0,45,-2,-2,26573.0,17597.0,18388.0,22302.0,0,0,0.036231471756518835,Low,v1,2026-01-14 19:39:17.748870+00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
reports/evidently/drift_report.html
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
reports/evidently/drift_report.json
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
{
|
| 2 |
-
"n_rows":
|
| 3 |
"results": [],
|
| 4 |
"drift": [
|
| 5 |
{
|
|
@@ -8,35 +8,35 @@
|
|
| 8 |
},
|
| 9 |
{
|
| 10 |
"column": "age",
|
| 11 |
-
"score": 0.
|
| 12 |
},
|
| 13 |
{
|
| 14 |
"column": "bill_amt_aug",
|
| 15 |
-
"score": 0.
|
| 16 |
},
|
| 17 |
{
|
| 18 |
"column": "bill_amt_sep",
|
| 19 |
-
"score": 0.
|
| 20 |
},
|
| 21 |
{
|
| 22 |
"column": "credit_limit",
|
| 23 |
-
"score": 0.
|
| 24 |
},
|
| 25 |
{
|
| 26 |
"column": "pay_amt_aug",
|
| 27 |
-
"score": 0.
|
| 28 |
},
|
| 29 |
{
|
| 30 |
"column": "pay_amt_sep",
|
| 31 |
-
"score": 0.
|
| 32 |
},
|
| 33 |
{
|
| 34 |
"column": "pay_delay_aug",
|
| 35 |
-
"score": 0.
|
| 36 |
},
|
| 37 |
{
|
| 38 |
"column": "pay_delay_sep",
|
| 39 |
-
"score": 0.
|
| 40 |
}
|
| 41 |
]
|
| 42 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"n_rows": 20,
|
| 3 |
"results": [],
|
| 4 |
"drift": [
|
| 5 |
{
|
|
|
|
| 8 |
},
|
| 9 |
{
|
| 10 |
"column": "age",
|
| 11 |
+
"score": 0.47511682619708956
|
| 12 |
},
|
| 13 |
{
|
| 14 |
"column": "bill_amt_aug",
|
| 15 |
+
"score": 0.18806444456731744
|
| 16 |
},
|
| 17 |
{
|
| 18 |
"column": "bill_amt_sep",
|
| 19 |
+
"score": 0.24552730613716447
|
| 20 |
},
|
| 21 |
{
|
| 22 |
"column": "credit_limit",
|
| 23 |
+
"score": 0.36053044124390277
|
| 24 |
},
|
| 25 |
{
|
| 26 |
"column": "pay_amt_aug",
|
| 27 |
+
"score": 0.17193500059085817
|
| 28 |
},
|
| 29 |
{
|
| 30 |
"column": "pay_amt_sep",
|
| 31 |
+
"score": 0.6302425864834968
|
| 32 |
},
|
| 33 |
{
|
| 34 |
"column": "pay_delay_aug",
|
| 35 |
+
"score": 0.2880354597906329
|
| 36 |
},
|
| 37 |
{
|
| 38 |
"column": "pay_delay_sep",
|
| 39 |
+
"score": 0.37250685167671616
|
| 40 |
}
|
| 41 |
]
|
| 42 |
}
|