File size: 6,666 Bytes
673d9a1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
# hybrid_model_debugger.py
import pickle
import numpy as np
import sys
import traceback

def debug_model(model_path, test_text):
    """

    Debugs the hybrid model by running a detailed test prediction and inspecting the outputs

    at each stage of the process

    """
    print(f"Loading model from {model_path}...")
    
    try:
        # Load model
        with open(model_path, "rb") as f:
            model_data = pickle.load(f)
        
        print(f"Model loaded successfully. Type: {type(model_data)}")
        
        # Determine the type of model
        if isinstance(model_data, dict):
            print("\nModel is a dictionary with keys:")
            for key in model_data:
                print(f"  - {key} ({type(model_data[key])})")
            
            # Look for classifier in the dictionary
            classifier = None
            if 'model' in model_data:
                classifier = model_data['model']
                print("Using 'model' key as classifier")
            elif 'classifier' in model_data:
                classifier = model_data['classifier']
                print("Using 'classifier' key as classifier")
            else:
                # Try to find a component with predict method
                for key, component in model_data.items():
                    if hasattr(component, 'predict'):
                        classifier = component
                        print(f"Using '{key}' as classifier (has predict method)")
                        break
        else:
            # Direct classifier
            classifier = model_data
            print("Model is a direct classifier object")
        
        if not classifier:
            print("ERROR: Could not identify a classifier component in the model")
            return
        
        # Check for mlb
        mlb = None
        if hasattr(classifier, 'mlb'):
            mlb = classifier.mlb
            print("\nFound MultiLabelBinarizer on classifier")
            if hasattr(mlb, 'classes_'):
                print(f"Available classes: {mlb.classes_}")
            else:
                print("WARNING: MultiLabelBinarizer has no classes_ attribute")
        else:
            print("\nNo MultiLabelBinarizer found on classifier")
            
            # Check if mlb is in the dictionary
            if isinstance(model_data, dict) and 'mlb' in model_data:
                mlb = model_data['mlb']
                print("Found MultiLabelBinarizer in model dictionary")
                if hasattr(mlb, 'classes_'):
                    print(f"Available classes: {mlb.classes_}")
                else:
                    print("WARNING: MultiLabelBinarizer has no classes_ attribute")
        
        # Check for alpha parameter
        alpha = getattr(classifier, 'alpha', None)
        print(f"\nAlpha parameter: {alpha}")
        
        # Check for threshold parameter
        threshold = getattr(classifier, 'threshold', None)
        print(f"Threshold parameter: {threshold}")
        
        # Try making a prediction
        print(f"\nTesting prediction with text: '{test_text}'")
        
        # Try different prediction approaches
        approaches = [
            ("Standard prediction with text as list", lambda: classifier.predict([test_text])),
            ("With specific alpha and threshold", lambda: classifier.predict([test_text], alpha=0.6, threshold=0.4)),
            ("With return_scores=True", lambda: classifier.predict([test_text], return_scores=True)),
            ("All parameters", lambda: classifier.predict([test_text], alpha=0.6, threshold=0.4, return_scores=True))
        ]
        
        for description, predict_func in approaches:
            print(f"\n--- {description} ---")
            try:
                result = predict_func()
                print(f"Result type: {type(result)}")
                print(f"Result value: {result}")
                
                # If it's a numpy array, try to interpret it
                if isinstance(result, np.ndarray):
                    print(f"Array shape: {result.shape}")
                    print(f"Array contents: {result}")
                    
                    if mlb and hasattr(mlb, 'classes_'):
                        try:
                            # Check if it's a binary array
                            if len(result.shape) == 2:  # First dim is samples, second is classes
                                labels = mlb.classes_[result[0].astype(bool)].tolist()
                                print(f"Converted to labels: {labels}")
                        except Exception as e:
                            print(f"Error converting to labels: {e}")
                
                # If it's a list, check the first item
                elif isinstance(result, list) and len(result) > 0:
                    print(f"First item type: {type(result[0])}")
                    print(f"First item value: {result[0]}")
                    
                # If it's a dictionary, check its structure
                elif isinstance(result, dict):
                    print("Dictionary keys:")
                    for key in result:
                        value = result[key]
                        print(f"  - {key} ({type(value)})")
                        
                        # Show a sample of the value
                        if isinstance(value, (list, tuple)) and len(value) > 0:
                            print(f"    Sample: {value[:3]}...")
                        elif isinstance(value, dict) and len(value) > 0:
                            sample_keys = list(value.keys())[:3]
                            print(f"    Sample keys: {sample_keys}...")
                        else:
                            print(f"    Value: {value}")
            
            except Exception as e:
                print(f"Error during prediction: {e}")
                print(traceback.format_exc())
        
        print("\nDebugging complete")
        
    except Exception as e:
        print(f"Error loading or processing model: {e}")
        print(traceback.format_exc())

if __name__ == "__main__":
    model_path = r"C:\Users\tueyc\CMKL Year 1\nomad_sync_app\backend\hybrid_interest_classifier.pkl"
    test_text = "I hike mountains and explore cultures while traveling. I also love cooking new recipes."
    
    if len(sys.argv) > 1:
        model_path = sys.argv[1]
    if len(sys.argv) > 2:
        test_text = sys.argv[2]
    
    debug_model(model_path, test_text)