Spaces:
Running
Running
File size: 3,166 Bytes
178345a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 | import numpy as np
import pandas as pd
import pytest
import app as app_module
try:
from app import predict_score, model
except ImportError:
from app import _predict as predict_score
model = app_module.MODEL
class DummyModel:
def __init__(self, proba: float = 0.2) -> None:
self.proba = proba
def predict_proba(self, df: pd.DataFrame) -> np.ndarray:
return np.array([[1.0 - self.proba, self.proba]])
def predict(self, df: pd.DataFrame) -> np.ndarray:
return np.array([self.proba])
def _series_json(payload: dict) -> str:
# Convert a single-record payload using Series.to_json(orient="records").
# Pandas returns a one-item list; trim brackets to get the JSON object.
return pd.Series([payload]).to_json(orient="records")[1:-1]
def _extract_proba(response: str) -> float:
for line in response.splitlines():
if line.startswith("Probabilit"):
return float(line.split(":", 1)[1].strip())
raise AssertionError("Probability line not found in response")
@pytest.fixture()
def dummy_model(monkeypatch: pytest.MonkeyPatch) -> DummyModel:
# Patch the global model so tests are fast and independent of disk artifacts.
dummy = DummyModel(proba=0.23)
monkeypatch.setattr(app_module, "MODEL", dummy, raising=False)
monkeypatch.setattr(app_module, "model", dummy, raising=False)
return dummy
def test_predict_valid_minimal_json(dummy_model: DummyModel) -> None:
# Valid minimal JSON should yield a probability between 0 and 1.
payload = {
"EXT_SOURCE_1": 0.5,
"AMT_INCOME_TOTAL": 50000.0,
}
json_line = _series_json(payload)
response = predict_score(json_line)
assert "Erreur" not in response
proba = _extract_proba(response)
assert 0.0 <= proba <= 1.0
def test_predict_partial_json_missing_columns(dummy_model: DummyModel) -> None:
# Missing columns should be handled (reindex + NaN) and still predict.
payload = {
"EXT_SOURCE_2": 0.1,
}
json_line = _series_json(payload)
response = predict_score(json_line)
assert "Erreur" not in response
proba = _extract_proba(response)
assert 0.0 <= proba <= 1.0
def test_predict_invalid_json_returns_error() -> None:
# Bad JSON format should return an explicit error message.
json_line = "{this is not valid json"
response = predict_score(json_line)
assert "Erreur" in response
def test_predict_out_of_range_value(dummy_model: DummyModel) -> None:
# Aberrant values (e.g., negative income) should still predict for now.
payload = {
"AMT_INCOME_TOTAL": -1000.0,
"EXT_SOURCE_3": 0.2,
}
json_line = _series_json(payload)
response = predict_score(json_line)
assert "Erreur" not in response
proba = _extract_proba(response)
assert 0.0 <= proba <= 1.0
def test_predict_accepts_raw_categorical(dummy_model: DummyModel) -> None:
# The API should accept raw categorical fields and map them to the model's
# one-hot columns (e.g. NAME_CONTRACT_TYPE -> NAME_CONTRACT_TYPE_Cash loans).
payload = {
"NAME_CONTRACT_TYPE": "Cash loans",
"AMT_INCOME_TOTAL": 75000.0,
"EXT_SOURCE_1": 0.3,
}
json_line = _series_json(payload)
response = predict_score(json_line)
assert "Erreur" not in response
proba = _extract_proba(response)
assert 0.0 <= proba <= 1.0
|