dima806's picture
Upload 32 files
a32e584 verified
"""Test that the encoding fix works."""
# Force reload of modules
import sys
if "src.preprocessing" in sys.modules:
del sys.modules["src.preprocessing"]
if "src.infer" in sys.modules:
del sys.modules["src.infer"]
from src.preprocessing import prepare_features
import pandas as pd
# Create test inputs with different countries (values from valid_categories)
input1 = pd.DataFrame(
{
"Country": ["United States of America"],
"YearsCode": [5.0],
"EdLevel": ["Bachelor's degree (B.A., B.S., B.Eng., etc.)"],
"DevType": ["Developer, full-stack"],
}
)
input2 = pd.DataFrame(
{
"Country": ["Germany"],
"YearsCode": [5.0],
"EdLevel": ["Bachelor's degree (B.A., B.S., B.Eng., etc.)"],
"DevType": ["Developer, full-stack"],
}
)
print("Testing prepare_features with different countries...")
features1 = prepare_features(input1)
features2 = prepare_features(input2)
print(f"\nUSA features: {features1.shape}")
print(f"Columns: {list(features1.columns)[:10]}")
print(f"\nGermany features: {features2.shape}")
print(f"Columns: {list(features2.columns)[:10]}")
print(f"\nAre they different? {not features1.equals(features2)}")
if features1.shape[1] > 1:
print("\n✅ SUCCESS: Categorical features are preserved!")
else:
print("\n❌ FAIL: Still only has numeric features")