Spaces:

mzx
/

Swiper-match

Sleeping

File size: 3,089 Bytes

fafa5f2

#!/usr/bin/env python3

import sys
import os
sys.path.append('.')
import pandas as pd

# Import the CarDealerMatcher from app.py
from app import CarDealerMatcher

def debug_feature_types():
    print("=== Debugging Feature Types ===")
    
    # Initialize simple matcher
    simple_matcher = CarDealerMatcher(model_path="./simple_autogluon_models")
    
    if simple_matcher.model_loaded:
        print("\n🔍 Model Feature Information:")
        
        # Check feature types
        try:
            feature_metadata = simple_matcher.predictor.feature_metadata
            print(f"Feature metadata: {feature_metadata}")
        except Exception as e:
            print(f"Could not get feature metadata: {e}")
        
        # Create test data to see how features are processed
        print("\n🧪 Testing Feature Processing:")
        
        test_data_text = pd.DataFrame([{
            'make': 'toyota',
            'model': 'camry',  # Text model
            'year': 2020,
            'odometer': 50000
        }])
        
        test_data_numeric = pd.DataFrame([{
            'make': 'bmw', 
            'model': '5',  # Numeric model
            'year': 2020,
            'odometer': 50000
        }])
        
        print(f"Text model data types: {test_data_text.dtypes}")
        print(f"Numeric model data types: {test_data_numeric.dtypes}")
        
        # Test preprocessing
        try:
            print("\n🔄 Testing Model Predictions:")
            
            # Text model prediction
            text_result = simple_matcher.predictor.predict_proba(test_data_text)
            print(f"Text model prediction shape: {text_result.shape}")
            print(f"Text model top prediction: {text_result.max(axis=1).iloc[0]:.4f}")
            
            # Numeric model prediction  
            numeric_result = simple_matcher.predictor.predict_proba(test_data_numeric)
            print(f"Numeric model prediction shape: {numeric_result.shape}")
            print(f"Numeric model top prediction: {numeric_result.max(axis=1).iloc[0]:.4f}")
            
            # Check if predictions are identical
            if text_result.equals(numeric_result):
                print("❌ PROBLEM: Predictions are identical!")
            else:
                print("✅ Predictions are different (as expected)")
                
        except Exception as e:
            print(f"Error during prediction testing: {e}")
        
        # Test what happens with unknown categories
        print("\n🚫 Testing Unknown Categories:")
        
        unknown_data = pd.DataFrame([{
            'make': 'unknown_make',
            'model': 'unknown_model', 
            'year': 2020,
            'odometer': 50000
        }])
        
        try:
            unknown_result = simple_matcher.predictor.predict_proba(unknown_data)
            print(f"Unknown category top prediction: {unknown_result.max(axis=1).iloc[0]:.4f}")
        except Exception as e:
            print(f"Error with unknown categories: {e}")

if __name__ == "__main__":
    debug_feature_types()