File size: 3,089 Bytes
fafa5f2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
#!/usr/bin/env python3

import sys
import os
sys.path.append('.')
import pandas as pd

# Import the CarDealerMatcher from app.py
from app import CarDealerMatcher

def debug_feature_types():
    print("=== Debugging Feature Types ===")
    
    # Initialize simple matcher
    simple_matcher = CarDealerMatcher(model_path="./simple_autogluon_models")
    
    if simple_matcher.model_loaded:
        print("\n🔍 Model Feature Information:")
        
        # Check feature types
        try:
            feature_metadata = simple_matcher.predictor.feature_metadata
            print(f"Feature metadata: {feature_metadata}")
        except Exception as e:
            print(f"Could not get feature metadata: {e}")
        
        # Create test data to see how features are processed
        print("\n🧪 Testing Feature Processing:")
        
        test_data_text = pd.DataFrame([{
            'make': 'toyota',
            'model': 'camry',  # Text model
            'year': 2020,
            'odometer': 50000
        }])
        
        test_data_numeric = pd.DataFrame([{
            'make': 'bmw', 
            'model': '5',  # Numeric model
            'year': 2020,
            'odometer': 50000
        }])
        
        print(f"Text model data types: {test_data_text.dtypes}")
        print(f"Numeric model data types: {test_data_numeric.dtypes}")
        
        # Test preprocessing
        try:
            print("\n🔄 Testing Model Predictions:")
            
            # Text model prediction
            text_result = simple_matcher.predictor.predict_proba(test_data_text)
            print(f"Text model prediction shape: {text_result.shape}")
            print(f"Text model top prediction: {text_result.max(axis=1).iloc[0]:.4f}")
            
            # Numeric model prediction  
            numeric_result = simple_matcher.predictor.predict_proba(test_data_numeric)
            print(f"Numeric model prediction shape: {numeric_result.shape}")
            print(f"Numeric model top prediction: {numeric_result.max(axis=1).iloc[0]:.4f}")
            
            # Check if predictions are identical
            if text_result.equals(numeric_result):
                print("❌ PROBLEM: Predictions are identical!")
            else:
                print("✅ Predictions are different (as expected)")
                
        except Exception as e:
            print(f"Error during prediction testing: {e}")
        
        # Test what happens with unknown categories
        print("\n🚫 Testing Unknown Categories:")
        
        unknown_data = pd.DataFrame([{
            'make': 'unknown_make',
            'model': 'unknown_model', 
            'year': 2020,
            'odometer': 50000
        }])
        
        try:
            unknown_result = simple_matcher.predictor.predict_proba(unknown_data)
            print(f"Unknown category top prediction: {unknown_result.max(axis=1).iloc[0]:.4f}")
        except Exception as e:
            print(f"Error with unknown categories: {e}")

if __name__ == "__main__":
    debug_feature_types()