File size: 1,364 Bytes
55cdb7e
 
 
 
a32e584
 
 
 
 
55cdb7e
 
 
 
 
a32e584
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55cdb7e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
"""Test that the encoding fix works."""

# Force reload of modules
import sys

if "src.preprocessing" in sys.modules:
    del sys.modules["src.preprocessing"]
if "src.infer" in sys.modules:
    del sys.modules["src.infer"]

from src.preprocessing import prepare_features
import pandas as pd

# Create test inputs with different countries (values from valid_categories)
input1 = pd.DataFrame(
    {
        "Country": ["United States of America"],
        "YearsCode": [5.0],
        "EdLevel": ["Bachelor's degree (B.A., B.S., B.Eng., etc.)"],
        "DevType": ["Developer, full-stack"],
    }
)

input2 = pd.DataFrame(
    {
        "Country": ["Germany"],
        "YearsCode": [5.0],
        "EdLevel": ["Bachelor's degree (B.A., B.S., B.Eng., etc.)"],
        "DevType": ["Developer, full-stack"],
    }
)

print("Testing prepare_features with different countries...")
features1 = prepare_features(input1)
features2 = prepare_features(input2)

print(f"\nUSA features: {features1.shape}")
print(f"Columns: {list(features1.columns)[:10]}")

print(f"\nGermany features: {features2.shape}")
print(f"Columns: {list(features2.columns)[:10]}")

print(f"\nAre they different? {not features1.equals(features2)}")

if features1.shape[1] > 1:
    print("\n✅ SUCCESS: Categorical features are preserved!")
else:
    print("\n❌ FAIL: Still only has numeric features")