Swiper-match / debug_features.py
Ubuntu
added proper models
fafa5f2
#!/usr/bin/env python3
import sys
import os
sys.path.append('.')
import pandas as pd
# Import the CarDealerMatcher from app.py
from app import CarDealerMatcher
def debug_feature_types():
print("=== Debugging Feature Types ===")
# Initialize simple matcher
simple_matcher = CarDealerMatcher(model_path="./simple_autogluon_models")
if simple_matcher.model_loaded:
print("\nπŸ” Model Feature Information:")
# Check feature types
try:
feature_metadata = simple_matcher.predictor.feature_metadata
print(f"Feature metadata: {feature_metadata}")
except Exception as e:
print(f"Could not get feature metadata: {e}")
# Create test data to see how features are processed
print("\nπŸ§ͺ Testing Feature Processing:")
test_data_text = pd.DataFrame([{
'make': 'toyota',
'model': 'camry', # Text model
'year': 2020,
'odometer': 50000
}])
test_data_numeric = pd.DataFrame([{
'make': 'bmw',
'model': '5', # Numeric model
'year': 2020,
'odometer': 50000
}])
print(f"Text model data types: {test_data_text.dtypes}")
print(f"Numeric model data types: {test_data_numeric.dtypes}")
# Test preprocessing
try:
print("\nπŸ”„ Testing Model Predictions:")
# Text model prediction
text_result = simple_matcher.predictor.predict_proba(test_data_text)
print(f"Text model prediction shape: {text_result.shape}")
print(f"Text model top prediction: {text_result.max(axis=1).iloc[0]:.4f}")
# Numeric model prediction
numeric_result = simple_matcher.predictor.predict_proba(test_data_numeric)
print(f"Numeric model prediction shape: {numeric_result.shape}")
print(f"Numeric model top prediction: {numeric_result.max(axis=1).iloc[0]:.4f}")
# Check if predictions are identical
if text_result.equals(numeric_result):
print("❌ PROBLEM: Predictions are identical!")
else:
print("βœ… Predictions are different (as expected)")
except Exception as e:
print(f"Error during prediction testing: {e}")
# Test what happens with unknown categories
print("\n🚫 Testing Unknown Categories:")
unknown_data = pd.DataFrame([{
'make': 'unknown_make',
'model': 'unknown_model',
'year': 2020,
'odometer': 50000
}])
try:
unknown_result = simple_matcher.predictor.predict_proba(unknown_data)
print(f"Unknown category top prediction: {unknown_result.max(axis=1).iloc[0]:.4f}")
except Exception as e:
print(f"Error with unknown categories: {e}")
if __name__ == "__main__":
debug_feature_types()