File size: 1,772 Bytes
6c8bfc8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import pandas as pd

from src.preprocessing import RawToModelTransformer


def test_transform_computes_derived_features():
    row = pd.DataFrame([
        {
            "AMT_ANNUITY": 1000.0,
            "AMT_CREDIT": 20000.0,
            "AMT_INCOME_TOTAL": 60000.0,
            "CNT_FAM_MEMBERS": 3,
            "DAYS_EMPLOYED": -1000,
            "DAYS_BIRTH": -10000,
            "NAME_CONTRACT_TYPE": "Cash loans",
        }
    ])

    pre = RawToModelTransformer()
    out = pre.transform(row)

    # Derived numeric
    assert "PAYMENT_RATE" in out.columns
    assert abs(out["PAYMENT_RATE"].iloc[0] - (1000.0 / 20000.0)) < 1e-8
    assert "INCOME_CREDIT_PERC" in out.columns
    assert abs(out["INCOME_CREDIT_PERC"].iloc[0] - (60000.0 / 20000.0)) < 1e-8
    assert "INCOME_PER_PERSON" in out.columns
    assert abs(out["INCOME_PER_PERSON"].iloc[0] - (60000.0 / 3.0)) < 1e-8
    assert "ANNUITY_INCOME_PERC" in out.columns
    assert abs(out["ANNUITY_INCOME_PERC"].iloc[0] - (1000.0 / 60000.0)) < 1e-8
    assert "DAYS_EMPLOYED_PERC" in out.columns
    assert abs(out["DAYS_EMPLOYED_PERC"].iloc[0] - (-1000.0 / -10000.0)) < 1e-8


def test_transform_maps_categorical_to_one_hot():
    row = pd.DataFrame([
        {"NAME_CONTRACT_TYPE": "Cash loans", "AMT_INCOME_TOTAL": 1000.0}
    ])
    pre = RawToModelTransformer()
    out = pre.transform(row)

    # Expect a one-hot column for the contract type (sanitized name)
    # We look for any column that starts with NAME_CONTRACT_TYPE_ and contains 'Cash'
    matching = [c for c in out.columns if c.startswith("NAME_CONTRACT_TYPE_") and "Cash" in c]
    assert matching, "No one-hot column found for NAME_CONTRACT_TYPE"
    # the matching column should be 1 for our input
    assert out[matching[0]].iloc[0] == 1