fraud-detection-mlops-api / tests /test_service.py
github-actions[bot]
deploy: sync snapshot from github
4937cba
from __future__ import annotations
import json
from pathlib import Path
import joblib
import numpy as np
import pandas as pd
from api.service import InferenceService, load_inference_service, resolve_threshold
class DummyPreprocessor:
feature_names_in_ = np.array(["Time", *[f"V{i}" for i in range(1, 29)], "Amount"])
def transform(self, frame: pd.DataFrame) -> pd.DataFrame:
return frame
class DummyModel:
def predict_proba(self, frame: pd.DataFrame) -> np.ndarray:
probs = []
for amount in frame["Amount"].tolist():
if amount >= 300:
probs.append([0.1, 0.9])
elif amount >= 100:
probs.append([0.55, 0.45])
else:
probs.append([0.95, 0.05])
return np.array(probs)
def _record(amount: float) -> dict[str, float]:
payload = {"Time": 0.0, "Amount": amount}
for i in range(1, 29):
payload[f"V{i}"] = 0.0
return payload
def test_inference_service_predict_records_risk_levels() -> None:
service = InferenceService(
model=DummyModel(),
preprocessor=DummyPreprocessor(),
threshold=0.5,
model_path=Path("models/model.pkl"),
preprocessor_path=Path("models/preprocessor.pkl"),
feature_columns=["Time", *[f"V{i}" for i in range(1, 29)], "Amount"],
)
outputs = service.predict_records([_record(20), _record(120), _record(320)])
assert outputs[0]["risk_level"] == "low"
assert outputs[1]["risk_level"] == "medium"
assert outputs[2]["risk_level"] == "high"
assert outputs[2]["is_fraud"] is True
def test_resolve_threshold_precedence(tmp_path) -> None:
training_report = tmp_path / "model_training_report.json"
model_report = tmp_path / "model_report.json"
config_path = tmp_path / "train.yaml"
config_path.write_text("threshold:\n decision_threshold: 0.51\n", encoding="utf-8")
model_report.write_text(
json.dumps({"threshold_selection": {"selected_threshold": 0.63}}), encoding="utf-8"
)
training_report.write_text(
json.dumps({"best_model": {"selected_threshold": 0.74}}), encoding="utf-8"
)
threshold = resolve_threshold(
training_report_path=training_report,
model_report_path=model_report,
config_path=config_path,
)
assert threshold == 0.74
def test_load_inference_service_reads_artifacts_and_threshold(tmp_path) -> None:
load_inference_service.cache_clear()
model_path = tmp_path / "model.pkl"
preprocessor_path = tmp_path / "preprocessor.pkl"
training_report = tmp_path / "model_training_report.json"
joblib.dump(DummyModel(), model_path)
joblib.dump(DummyPreprocessor(), preprocessor_path)
training_report.write_text(
json.dumps({"best_model": {"selected_threshold": 0.66}}), encoding="utf-8"
)
service = load_inference_service(
model_path=str(model_path),
preprocessor_path=str(preprocessor_path),
training_report_path=str(training_report),
model_report_path=str(tmp_path / "missing_model_report.json"),
config_path=str(tmp_path / "missing_config.yaml"),
)
assert service.threshold == 0.66
outputs = service.predict_records([_record(300.0)])
assert outputs[0]["is_fraud"] is True