# hybrid_model_debugger.py import pickle import numpy as np import sys import traceback def debug_model(model_path, test_text): """ Debugs the hybrid model by running a detailed test prediction and inspecting the outputs at each stage of the process """ print(f"Loading model from {model_path}...") try: # Load model with open(model_path, "rb") as f: model_data = pickle.load(f) print(f"Model loaded successfully. Type: {type(model_data)}") # Determine the type of model if isinstance(model_data, dict): print("\nModel is a dictionary with keys:") for key in model_data: print(f" - {key} ({type(model_data[key])})") # Look for classifier in the dictionary classifier = None if 'model' in model_data: classifier = model_data['model'] print("Using 'model' key as classifier") elif 'classifier' in model_data: classifier = model_data['classifier'] print("Using 'classifier' key as classifier") else: # Try to find a component with predict method for key, component in model_data.items(): if hasattr(component, 'predict'): classifier = component print(f"Using '{key}' as classifier (has predict method)") break else: # Direct classifier classifier = model_data print("Model is a direct classifier object") if not classifier: print("ERROR: Could not identify a classifier component in the model") return # Check for mlb mlb = None if hasattr(classifier, 'mlb'): mlb = classifier.mlb print("\nFound MultiLabelBinarizer on classifier") if hasattr(mlb, 'classes_'): print(f"Available classes: {mlb.classes_}") else: print("WARNING: MultiLabelBinarizer has no classes_ attribute") else: print("\nNo MultiLabelBinarizer found on classifier") # Check if mlb is in the dictionary if isinstance(model_data, dict) and 'mlb' in model_data: mlb = model_data['mlb'] print("Found MultiLabelBinarizer in model dictionary") if hasattr(mlb, 'classes_'): print(f"Available classes: {mlb.classes_}") else: print("WARNING: MultiLabelBinarizer has no classes_ attribute") # Check for alpha parameter alpha = getattr(classifier, 'alpha', None) print(f"\nAlpha parameter: {alpha}") # Check for threshold parameter threshold = getattr(classifier, 'threshold', None) print(f"Threshold parameter: {threshold}") # Try making a prediction print(f"\nTesting prediction with text: '{test_text}'") # Try different prediction approaches approaches = [ ("Standard prediction with text as list", lambda: classifier.predict([test_text])), ("With specific alpha and threshold", lambda: classifier.predict([test_text], alpha=0.6, threshold=0.4)), ("With return_scores=True", lambda: classifier.predict([test_text], return_scores=True)), ("All parameters", lambda: classifier.predict([test_text], alpha=0.6, threshold=0.4, return_scores=True)) ] for description, predict_func in approaches: print(f"\n--- {description} ---") try: result = predict_func() print(f"Result type: {type(result)}") print(f"Result value: {result}") # If it's a numpy array, try to interpret it if isinstance(result, np.ndarray): print(f"Array shape: {result.shape}") print(f"Array contents: {result}") if mlb and hasattr(mlb, 'classes_'): try: # Check if it's a binary array if len(result.shape) == 2: # First dim is samples, second is classes labels = mlb.classes_[result[0].astype(bool)].tolist() print(f"Converted to labels: {labels}") except Exception as e: print(f"Error converting to labels: {e}") # If it's a list, check the first item elif isinstance(result, list) and len(result) > 0: print(f"First item type: {type(result[0])}") print(f"First item value: {result[0]}") # If it's a dictionary, check its structure elif isinstance(result, dict): print("Dictionary keys:") for key in result: value = result[key] print(f" - {key} ({type(value)})") # Show a sample of the value if isinstance(value, (list, tuple)) and len(value) > 0: print(f" Sample: {value[:3]}...") elif isinstance(value, dict) and len(value) > 0: sample_keys = list(value.keys())[:3] print(f" Sample keys: {sample_keys}...") else: print(f" Value: {value}") except Exception as e: print(f"Error during prediction: {e}") print(traceback.format_exc()) print("\nDebugging complete") except Exception as e: print(f"Error loading or processing model: {e}") print(traceback.format_exc()) if __name__ == "__main__": model_path = r"C:\Users\tueyc\CMKL Year 1\nomad_sync_app\backend\hybrid_interest_classifier.pkl" test_text = "I hike mountains and explore cultures while traveling. I also love cooking new recipes." if len(sys.argv) > 1: model_path = sys.argv[1] if len(sys.argv) > 2: test_text = sys.argv[2] debug_model(model_path, test_text)