Commit
·
eafdbbf
1
Parent(s):
700e2b6
Tests update
Browse files- README.md +2 -0
- reports/evidently/drift_report.html +0 -0
- requirements-dev.txt +2 -0
- tests/conftest.py +9 -0
- tests/integration/test_api.py +74 -0
- tests/{test_governance.json → integration/test_governance.json} +0 -0
- tests/integration/test_governance.py +14 -0
- tests/integration/test_run_drift.py +25 -0
- tests/test_api.py +0 -0
- tests/test_governance.py +0 -15
- tests/test_run_drift.py +0 -35
- tests/test_schemas.py +0 -0
- tests/unit/test_schemas.py +59 -0
README.md
CHANGED
|
@@ -11,6 +11,8 @@ license: mit
|
|
| 11 |
|
| 12 |
# Under Construction
|
| 13 |
|
|
|
|
|
|
|
| 14 |
py -3.9 -m venv .venv
|
| 15 |
|
| 16 |
.venv\Scripts\activate
|
|
|
|
| 11 |
|
| 12 |
# Under Construction
|
| 13 |
|
| 14 |
+
Build a production-ready ML inference service with post-deployment drift detection, governance, and alerting, demonstrating real MLOps practices rather than offline modeling.
|
| 15 |
+
|
| 16 |
py -3.9 -m venv .venv
|
| 17 |
|
| 18 |
.venv\Scripts\activate
|
reports/evidently/drift_report.html
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
requirements-dev.txt
CHANGED
|
@@ -11,3 +11,5 @@ scipy>=1.10.0,<2.0.0
|
|
| 11 |
python-multipart>=0.0.6
|
| 12 |
typing-extensions>=4.0.0
|
| 13 |
jinja2
|
|
|
|
|
|
|
|
|
| 11 |
python-multipart>=0.0.6
|
| 12 |
typing-extensions>=4.0.0
|
| 13 |
jinja2
|
| 14 |
+
pytest
|
| 15 |
+
httpx
|
tests/conftest.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# tests/conftest.py
|
| 2 |
+
|
| 3 |
+
import sys
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
|
| 6 |
+
# Add project root to PYTHONPATH so `import app` works
|
| 7 |
+
PROJECT_ROOT = Path(__file__).parent.parent.resolve()
|
| 8 |
+
if str(PROJECT_ROOT) not in sys.path:
|
| 9 |
+
sys.path.insert(0, str(PROJECT_ROOT))
|
tests/integration/test_api.py
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# tests/test_api.py
|
| 2 |
+
|
| 3 |
+
import io
|
| 4 |
+
import pandas as pd
|
| 5 |
+
from fastapi.testclient import TestClient
|
| 6 |
+
from app.main import app
|
| 7 |
+
|
| 8 |
+
client = TestClient(app)
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
def test_health_endpoint():
|
| 12 |
+
response = client.get("/health")
|
| 13 |
+
assert response.status_code == 200
|
| 14 |
+
assert response.json() == {"status": "ok"}
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def test_predict_endpoint_valid_csv():
|
| 18 |
+
df = pd.DataFrame([
|
| 19 |
+
{
|
| 20 |
+
"credit_limit": 50000.0,
|
| 21 |
+
"age": 35,
|
| 22 |
+
"pay_delay_sep": 0,
|
| 23 |
+
"pay_delay_aug": -1,
|
| 24 |
+
"bill_amt_sep": 12000.0,
|
| 25 |
+
"bill_amt_aug": 11000.0,
|
| 26 |
+
"pay_amt_sep": 3000.0,
|
| 27 |
+
"pay_amt_aug": 2500.0
|
| 28 |
+
},
|
| 29 |
+
{
|
| 30 |
+
"credit_limit": 200000.0,
|
| 31 |
+
"age": 42,
|
| 32 |
+
"pay_delay_sep": 2,
|
| 33 |
+
"pay_delay_aug": 0,
|
| 34 |
+
"bill_amt_sep": 60000.0,
|
| 35 |
+
"bill_amt_aug": 58000.0,
|
| 36 |
+
"pay_amt_sep": 10000.0,
|
| 37 |
+
"pay_amt_aug": 9000.0
|
| 38 |
+
}
|
| 39 |
+
])
|
| 40 |
+
|
| 41 |
+
csv_bytes = df.to_csv(index=False).encode("utf-8")
|
| 42 |
+
file = io.BytesIO(csv_bytes)
|
| 43 |
+
|
| 44 |
+
response = client.post(
|
| 45 |
+
"/predict",
|
| 46 |
+
files={"file": ("test.csv", file, "text/csv")}
|
| 47 |
+
)
|
| 48 |
+
|
| 49 |
+
assert response.status_code == 200
|
| 50 |
+
|
| 51 |
+
body = response.json()
|
| 52 |
+
assert "results" in body
|
| 53 |
+
assert "drift" in body
|
| 54 |
+
assert body["n_rows"] == 2
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
def test_predict_endpoint_missing_columns():
|
| 58 |
+
df = pd.DataFrame([
|
| 59 |
+
{
|
| 60 |
+
"credit_limit": 50000.0,
|
| 61 |
+
"age": 35
|
| 62 |
+
}
|
| 63 |
+
])
|
| 64 |
+
|
| 65 |
+
csv_bytes = df.to_csv(index=False).encode("utf-8")
|
| 66 |
+
file = io.BytesIO(csv_bytes)
|
| 67 |
+
|
| 68 |
+
response = client.post(
|
| 69 |
+
"/predict",
|
| 70 |
+
files={"file": ("bad.csv", file, "text/csv")}
|
| 71 |
+
)
|
| 72 |
+
|
| 73 |
+
assert response.status_code == 400
|
| 74 |
+
assert "Invalid schema" in response.json()["detail"]
|
tests/{test_governance.json → integration/test_governance.json}
RENAMED
|
File without changes
|
tests/integration/test_governance.py
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# tests/test_governance.py
|
| 2 |
+
|
| 3 |
+
import json
|
| 4 |
+
from app.monitoring.governance import run_governance_checks
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
def test_governance_detects_alerts():
|
| 8 |
+
with open("tests/integration/test_governance.json", "r") as f:
|
| 9 |
+
report = json.load(f)
|
| 10 |
+
|
| 11 |
+
alerts = run_governance_checks(report, model_version="v1")
|
| 12 |
+
|
| 13 |
+
assert isinstance(alerts, list)
|
| 14 |
+
assert len(alerts) >= 0
|
tests/integration/test_run_drift.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from pathlib import Path
|
| 3 |
+
import pandas as pd
|
| 4 |
+
|
| 5 |
+
from app.monitoring.drift import run_drift_check
|
| 6 |
+
|
| 7 |
+
def test_run_drift_check_outputs_metrics():
|
| 8 |
+
repo_root = Path(__file__).resolve().parents[2]
|
| 9 |
+
|
| 10 |
+
current_path = repo_root / "data" / "processed" / "current_data.csv"
|
| 11 |
+
reference_path = repo_root / "models" / "v1" / "reference_data.csv"
|
| 12 |
+
|
| 13 |
+
assert current_path.exists()
|
| 14 |
+
assert reference_path.exists()
|
| 15 |
+
|
| 16 |
+
current_df = pd.read_csv(current_path)
|
| 17 |
+
reference_df = pd.read_csv(reference_path)
|
| 18 |
+
|
| 19 |
+
report = run_drift_check(
|
| 20 |
+
current_df,
|
| 21 |
+
reference_df,
|
| 22 |
+
model_version="v1"
|
| 23 |
+
)
|
| 24 |
+
|
| 25 |
+
assert report is not None
|
tests/test_api.py
DELETED
|
File without changes
|
tests/test_governance.py
DELETED
|
@@ -1,15 +0,0 @@
|
|
| 1 |
-
import sys
|
| 2 |
-
from pathlib import Path
|
| 3 |
-
import json
|
| 4 |
-
|
| 5 |
-
repo_root = Path(__file__).parent.parent.resolve()
|
| 6 |
-
sys.path.insert(0, str(repo_root))
|
| 7 |
-
|
| 8 |
-
from app.monitoring.governance import run_governance_checks
|
| 9 |
-
|
| 10 |
-
# Load a sample report JSON (create this for testing)
|
| 11 |
-
with open('tests/test_governance.json', 'r') as f:
|
| 12 |
-
report = json.load(f)
|
| 13 |
-
|
| 14 |
-
alerts = run_governance_checks(report, model_version="v1")
|
| 15 |
-
print("Governance alerts:", alerts)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tests/test_run_drift.py
DELETED
|
@@ -1,35 +0,0 @@
|
|
| 1 |
-
import sys
|
| 2 |
-
import os
|
| 3 |
-
import pandas as pd
|
| 4 |
-
|
| 5 |
-
# Ensure project root is in sys.path
|
| 6 |
-
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
| 7 |
-
|
| 8 |
-
from app.monitoring.drift import run_drift_check
|
| 9 |
-
from app.monitoring.governance import run_governance_checks
|
| 10 |
-
|
| 11 |
-
def main():
|
| 12 |
-
root_dir = os.path.dirname(os.path.dirname(__file__))
|
| 13 |
-
|
| 14 |
-
# Load current and reference data
|
| 15 |
-
current_path = os.path.join(root_dir, "data", "processed", "current_data.csv")
|
| 16 |
-
reference_path = os.path.join(root_dir, "models", "v1", "reference_data.csv")
|
| 17 |
-
|
| 18 |
-
if not os.path.exists(current_path):
|
| 19 |
-
raise FileNotFoundError(f"{current_path} does not exist.")
|
| 20 |
-
if not os.path.exists(reference_path):
|
| 21 |
-
raise FileNotFoundError(f"{reference_path} does not exist.")
|
| 22 |
-
|
| 23 |
-
current_df = pd.read_csv(current_path)
|
| 24 |
-
reference_df = pd.read_csv(reference_path)
|
| 25 |
-
|
| 26 |
-
# Run drift check
|
| 27 |
-
report = run_drift_check(current_df, reference_df, model_version="v1")
|
| 28 |
-
|
| 29 |
-
# Run drift check
|
| 30 |
-
print("Metrics from Evidently report:", report)
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
if __name__ == "__main__":
|
| 35 |
-
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tests/test_schemas.py
DELETED
|
File without changes
|
tests/unit/test_schemas.py
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# tests/test_schemas.py
|
| 2 |
+
|
| 3 |
+
import pytest
|
| 4 |
+
from pydantic import ValidationError
|
| 5 |
+
from app.api.schemas import PredictionRequest, PredictionResponse
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
def test_prediction_request_valid():
|
| 9 |
+
payload = {
|
| 10 |
+
"credit_limit": 50000.0,
|
| 11 |
+
"age": 35,
|
| 12 |
+
"pay_delay_sep": 0,
|
| 13 |
+
"pay_delay_aug": -1,
|
| 14 |
+
"bill_amt_sep": 12000.0,
|
| 15 |
+
"bill_amt_aug": 11000.0,
|
| 16 |
+
"pay_amt_sep": 3000.0,
|
| 17 |
+
"pay_amt_aug": 2500.0
|
| 18 |
+
}
|
| 19 |
+
|
| 20 |
+
req = PredictionRequest(**payload)
|
| 21 |
+
assert req.credit_limit == 50000.0
|
| 22 |
+
assert req.age == 35
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
def test_prediction_request_missing_field():
|
| 26 |
+
payload = {
|
| 27 |
+
"credit_limit": 50000.0,
|
| 28 |
+
"age": 35
|
| 29 |
+
}
|
| 30 |
+
|
| 31 |
+
with pytest.raises(ValidationError):
|
| 32 |
+
PredictionRequest(**payload)
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
def test_prediction_request_invalid_type():
|
| 36 |
+
payload = {
|
| 37 |
+
"credit_limit": "not-a-number",
|
| 38 |
+
"age": "thirty",
|
| 39 |
+
"pay_delay_sep": 0,
|
| 40 |
+
"pay_delay_aug": 0,
|
| 41 |
+
"bill_amt_sep": 1000.0,
|
| 42 |
+
"bill_amt_aug": 1000.0,
|
| 43 |
+
"pay_amt_sep": 100.0,
|
| 44 |
+
"pay_amt_aug": 100.0
|
| 45 |
+
}
|
| 46 |
+
|
| 47 |
+
with pytest.raises(ValidationError):
|
| 48 |
+
PredictionRequest(**payload)
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
def test_prediction_response_valid():
|
| 52 |
+
payload = {
|
| 53 |
+
"prediction": 1,
|
| 54 |
+
"probability": 0.82
|
| 55 |
+
}
|
| 56 |
+
|
| 57 |
+
resp = PredictionResponse(**payload)
|
| 58 |
+
assert resp.prediction in (0, 1)
|
| 59 |
+
assert 0.0 <= resp.probability <= 1.0
|