New_Hybrid_Text_Classifier_Model / hybrid_model_debugger.py
WinterJet2021's picture
Upload 13 files
673d9a1 verified
# hybrid_model_debugger.py
import pickle
import numpy as np
import sys
import traceback
def debug_model(model_path, test_text):
"""
Debugs the hybrid model by running a detailed test prediction and inspecting the outputs
at each stage of the process
"""
print(f"Loading model from {model_path}...")
try:
# Load model
with open(model_path, "rb") as f:
model_data = pickle.load(f)
print(f"Model loaded successfully. Type: {type(model_data)}")
# Determine the type of model
if isinstance(model_data, dict):
print("\nModel is a dictionary with keys:")
for key in model_data:
print(f" - {key} ({type(model_data[key])})")
# Look for classifier in the dictionary
classifier = None
if 'model' in model_data:
classifier = model_data['model']
print("Using 'model' key as classifier")
elif 'classifier' in model_data:
classifier = model_data['classifier']
print("Using 'classifier' key as classifier")
else:
# Try to find a component with predict method
for key, component in model_data.items():
if hasattr(component, 'predict'):
classifier = component
print(f"Using '{key}' as classifier (has predict method)")
break
else:
# Direct classifier
classifier = model_data
print("Model is a direct classifier object")
if not classifier:
print("ERROR: Could not identify a classifier component in the model")
return
# Check for mlb
mlb = None
if hasattr(classifier, 'mlb'):
mlb = classifier.mlb
print("\nFound MultiLabelBinarizer on classifier")
if hasattr(mlb, 'classes_'):
print(f"Available classes: {mlb.classes_}")
else:
print("WARNING: MultiLabelBinarizer has no classes_ attribute")
else:
print("\nNo MultiLabelBinarizer found on classifier")
# Check if mlb is in the dictionary
if isinstance(model_data, dict) and 'mlb' in model_data:
mlb = model_data['mlb']
print("Found MultiLabelBinarizer in model dictionary")
if hasattr(mlb, 'classes_'):
print(f"Available classes: {mlb.classes_}")
else:
print("WARNING: MultiLabelBinarizer has no classes_ attribute")
# Check for alpha parameter
alpha = getattr(classifier, 'alpha', None)
print(f"\nAlpha parameter: {alpha}")
# Check for threshold parameter
threshold = getattr(classifier, 'threshold', None)
print(f"Threshold parameter: {threshold}")
# Try making a prediction
print(f"\nTesting prediction with text: '{test_text}'")
# Try different prediction approaches
approaches = [
("Standard prediction with text as list", lambda: classifier.predict([test_text])),
("With specific alpha and threshold", lambda: classifier.predict([test_text], alpha=0.6, threshold=0.4)),
("With return_scores=True", lambda: classifier.predict([test_text], return_scores=True)),
("All parameters", lambda: classifier.predict([test_text], alpha=0.6, threshold=0.4, return_scores=True))
]
for description, predict_func in approaches:
print(f"\n--- {description} ---")
try:
result = predict_func()
print(f"Result type: {type(result)}")
print(f"Result value: {result}")
# If it's a numpy array, try to interpret it
if isinstance(result, np.ndarray):
print(f"Array shape: {result.shape}")
print(f"Array contents: {result}")
if mlb and hasattr(mlb, 'classes_'):
try:
# Check if it's a binary array
if len(result.shape) == 2: # First dim is samples, second is classes
labels = mlb.classes_[result[0].astype(bool)].tolist()
print(f"Converted to labels: {labels}")
except Exception as e:
print(f"Error converting to labels: {e}")
# If it's a list, check the first item
elif isinstance(result, list) and len(result) > 0:
print(f"First item type: {type(result[0])}")
print(f"First item value: {result[0]}")
# If it's a dictionary, check its structure
elif isinstance(result, dict):
print("Dictionary keys:")
for key in result:
value = result[key]
print(f" - {key} ({type(value)})")
# Show a sample of the value
if isinstance(value, (list, tuple)) and len(value) > 0:
print(f" Sample: {value[:3]}...")
elif isinstance(value, dict) and len(value) > 0:
sample_keys = list(value.keys())[:3]
print(f" Sample keys: {sample_keys}...")
else:
print(f" Value: {value}")
except Exception as e:
print(f"Error during prediction: {e}")
print(traceback.format_exc())
print("\nDebugging complete")
except Exception as e:
print(f"Error loading or processing model: {e}")
print(traceback.format_exc())
if __name__ == "__main__":
model_path = r"C:\Users\tueyc\CMKL Year 1\nomad_sync_app\backend\hybrid_interest_classifier.pkl"
test_text = "I hike mountains and explore cultures while traveling. I also love cooking new recipes."
if len(sys.argv) > 1:
model_path = sys.argv[1]
if len(sys.argv) > 2:
test_text = sys.argv[2]
debug_model(model_path, test_text)