Spaces:
Sleeping
Sleeping
File size: 1,364 Bytes
55cdb7e a32e584 55cdb7e a32e584 55cdb7e | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 | """Test that the encoding fix works."""
# Force reload of modules
import sys
if "src.preprocessing" in sys.modules:
del sys.modules["src.preprocessing"]
if "src.infer" in sys.modules:
del sys.modules["src.infer"]
from src.preprocessing import prepare_features
import pandas as pd
# Create test inputs with different countries (values from valid_categories)
input1 = pd.DataFrame(
{
"Country": ["United States of America"],
"YearsCode": [5.0],
"EdLevel": ["Bachelor's degree (B.A., B.S., B.Eng., etc.)"],
"DevType": ["Developer, full-stack"],
}
)
input2 = pd.DataFrame(
{
"Country": ["Germany"],
"YearsCode": [5.0],
"EdLevel": ["Bachelor's degree (B.A., B.S., B.Eng., etc.)"],
"DevType": ["Developer, full-stack"],
}
)
print("Testing prepare_features with different countries...")
features1 = prepare_features(input1)
features2 = prepare_features(input2)
print(f"\nUSA features: {features1.shape}")
print(f"Columns: {list(features1.columns)[:10]}")
print(f"\nGermany features: {features2.shape}")
print(f"Columns: {list(features2.columns)[:10]}")
print(f"\nAre they different? {not features1.equals(features2)}")
if features1.shape[1] > 1:
print("\n✅ SUCCESS: Categorical features are preserved!")
else:
print("\n❌ FAIL: Still only has numeric features")
|