Spaces:
Sleeping
Sleeping
| """Test that the encoding fix works.""" | |
| # Force reload of modules | |
| import sys | |
| if "src.preprocessing" in sys.modules: | |
| del sys.modules["src.preprocessing"] | |
| if "src.infer" in sys.modules: | |
| del sys.modules["src.infer"] | |
| from src.preprocessing import prepare_features | |
| import pandas as pd | |
| # Create test inputs with different countries (values from valid_categories) | |
| input1 = pd.DataFrame( | |
| { | |
| "Country": ["United States of America"], | |
| "YearsCode": [5.0], | |
| "EdLevel": ["Bachelor's degree (B.A., B.S., B.Eng., etc.)"], | |
| "DevType": ["Developer, full-stack"], | |
| } | |
| ) | |
| input2 = pd.DataFrame( | |
| { | |
| "Country": ["Germany"], | |
| "YearsCode": [5.0], | |
| "EdLevel": ["Bachelor's degree (B.A., B.S., B.Eng., etc.)"], | |
| "DevType": ["Developer, full-stack"], | |
| } | |
| ) | |
| print("Testing prepare_features with different countries...") | |
| features1 = prepare_features(input1) | |
| features2 = prepare_features(input2) | |
| print(f"\nUSA features: {features1.shape}") | |
| print(f"Columns: {list(features1.columns)[:10]}") | |
| print(f"\nGermany features: {features2.shape}") | |
| print(f"Columns: {list(features2.columns)[:10]}") | |
| print(f"\nAre they different? {not features1.equals(features2)}") | |
| if features1.shape[1] > 1: | |
| print("\n✅ SUCCESS: Categorical features are preserved!") | |
| else: | |
| print("\n❌ FAIL: Still only has numeric features") | |