| from __future__ import annotations |
|
|
| import json |
| from pathlib import Path |
|
|
| import joblib |
| import numpy as np |
| import pandas as pd |
|
|
| from api.service import InferenceService, load_inference_service, resolve_threshold |
|
|
|
|
| class DummyPreprocessor: |
| feature_names_in_ = np.array(["Time", *[f"V{i}" for i in range(1, 29)], "Amount"]) |
|
|
| def transform(self, frame: pd.DataFrame) -> pd.DataFrame: |
| return frame |
|
|
|
|
| class DummyModel: |
| def predict_proba(self, frame: pd.DataFrame) -> np.ndarray: |
| probs = [] |
| for amount in frame["Amount"].tolist(): |
| if amount >= 300: |
| probs.append([0.1, 0.9]) |
| elif amount >= 100: |
| probs.append([0.55, 0.45]) |
| else: |
| probs.append([0.95, 0.05]) |
| return np.array(probs) |
|
|
|
|
| def _record(amount: float) -> dict[str, float]: |
| payload = {"Time": 0.0, "Amount": amount} |
| for i in range(1, 29): |
| payload[f"V{i}"] = 0.0 |
| return payload |
|
|
|
|
| def test_inference_service_predict_records_risk_levels() -> None: |
| service = InferenceService( |
| model=DummyModel(), |
| preprocessor=DummyPreprocessor(), |
| threshold=0.5, |
| model_path=Path("models/model.pkl"), |
| preprocessor_path=Path("models/preprocessor.pkl"), |
| feature_columns=["Time", *[f"V{i}" for i in range(1, 29)], "Amount"], |
| ) |
|
|
| outputs = service.predict_records([_record(20), _record(120), _record(320)]) |
|
|
| assert outputs[0]["risk_level"] == "low" |
| assert outputs[1]["risk_level"] == "medium" |
| assert outputs[2]["risk_level"] == "high" |
| assert outputs[2]["is_fraud"] is True |
|
|
|
|
| def test_resolve_threshold_precedence(tmp_path) -> None: |
| training_report = tmp_path / "model_training_report.json" |
| model_report = tmp_path / "model_report.json" |
| config_path = tmp_path / "train.yaml" |
|
|
| config_path.write_text("threshold:\n decision_threshold: 0.51\n", encoding="utf-8") |
| model_report.write_text( |
| json.dumps({"threshold_selection": {"selected_threshold": 0.63}}), encoding="utf-8" |
| ) |
| training_report.write_text( |
| json.dumps({"best_model": {"selected_threshold": 0.74}}), encoding="utf-8" |
| ) |
|
|
| threshold = resolve_threshold( |
| training_report_path=training_report, |
| model_report_path=model_report, |
| config_path=config_path, |
| ) |
|
|
| assert threshold == 0.74 |
|
|
|
|
| def test_load_inference_service_reads_artifacts_and_threshold(tmp_path) -> None: |
| load_inference_service.cache_clear() |
|
|
| model_path = tmp_path / "model.pkl" |
| preprocessor_path = tmp_path / "preprocessor.pkl" |
| training_report = tmp_path / "model_training_report.json" |
|
|
| joblib.dump(DummyModel(), model_path) |
| joblib.dump(DummyPreprocessor(), preprocessor_path) |
| training_report.write_text( |
| json.dumps({"best_model": {"selected_threshold": 0.66}}), encoding="utf-8" |
| ) |
|
|
| service = load_inference_service( |
| model_path=str(model_path), |
| preprocessor_path=str(preprocessor_path), |
| training_report_path=str(training_report), |
| model_report_path=str(tmp_path / "missing_model_report.json"), |
| config_path=str(tmp_path / "missing_config.yaml"), |
| ) |
|
|
| assert service.threshold == 0.66 |
| outputs = service.predict_records([_record(300.0)]) |
| assert outputs[0]["is_fraud"] is True |
|
|