Spaces:

WinterJet2021
/

New_Hybrid_Text_Classifier_Model

Build error

App Files Files Community

New_Hybrid_Text_Classifier_Model / hybrid_model_debugger.py

WinterJet2021

Upload 13 files

673d9a1 verified about 1 year ago

raw

history blame contribute delete

6.67 kB

	# hybrid_model_debugger.py
	import pickle
	import numpy as np
	import sys
	import traceback

	def debug_model(model_path, test_text):
	"""
	Debugs the hybrid model by running a detailed test prediction and inspecting the outputs
	at each stage of the process
	"""
	print(f"Loading model from {model_path}...")

	try:
	# Load model
	with open(model_path, "rb") as f:
	model_data = pickle.load(f)

	print(f"Model loaded successfully. Type: {type(model_data)}")

	# Determine the type of model
	if isinstance(model_data, dict):
	print("\nModel is a dictionary with keys:")
	for key in model_data:
	print(f" - {key} ({type(model_data[key])})")

	# Look for classifier in the dictionary
	classifier = None
	if 'model' in model_data:
	classifier = model_data['model']
	print("Using 'model' key as classifier")
	elif 'classifier' in model_data:
	classifier = model_data['classifier']
	print("Using 'classifier' key as classifier")
	else:
	# Try to find a component with predict method
	for key, component in model_data.items():
	if hasattr(component, 'predict'):
	classifier = component
	print(f"Using '{key}' as classifier (has predict method)")
	break
	else:
	# Direct classifier
	classifier = model_data
	print("Model is a direct classifier object")

	if not classifier:
	print("ERROR: Could not identify a classifier component in the model")
	return

	# Check for mlb
	mlb = None
	if hasattr(classifier, 'mlb'):
	mlb = classifier.mlb
	print("\nFound MultiLabelBinarizer on classifier")
	if hasattr(mlb, 'classes_'):
	print(f"Available classes: {mlb.classes_}")
	else:
	print("WARNING: MultiLabelBinarizer has no classes_ attribute")
	else:
	print("\nNo MultiLabelBinarizer found on classifier")

	# Check if mlb is in the dictionary
	if isinstance(model_data, dict) and 'mlb' in model_data:
	mlb = model_data['mlb']
	print("Found MultiLabelBinarizer in model dictionary")
	if hasattr(mlb, 'classes_'):
	print(f"Available classes: {mlb.classes_}")
	else:
	print("WARNING: MultiLabelBinarizer has no classes_ attribute")

	# Check for alpha parameter
	alpha = getattr(classifier, 'alpha', None)
	print(f"\nAlpha parameter: {alpha}")

	# Check for threshold parameter
	threshold = getattr(classifier, 'threshold', None)
	print(f"Threshold parameter: {threshold}")

	# Try making a prediction
	print(f"\nTesting prediction with text: '{test_text}'")

	# Try different prediction approaches
	approaches = [
	("Standard prediction with text as list", lambda: classifier.predict([test_text])),
	("With specific alpha and threshold", lambda: classifier.predict([test_text], alpha=0.6, threshold=0.4)),
	("With return_scores=True", lambda: classifier.predict([test_text], return_scores=True)),
	("All parameters", lambda: classifier.predict([test_text], alpha=0.6, threshold=0.4, return_scores=True))
	]

	for description, predict_func in approaches:
	print(f"\n--- {description} ---")
	try:
	result = predict_func()
	print(f"Result type: {type(result)}")
	print(f"Result value: {result}")

	# If it's a numpy array, try to interpret it
	if isinstance(result, np.ndarray):
	print(f"Array shape: {result.shape}")
	print(f"Array contents: {result}")

	if mlb and hasattr(mlb, 'classes_'):
	try:
	# Check if it's a binary array
	if len(result.shape) == 2: # First dim is samples, second is classes
	labels = mlb.classes_[result[0].astype(bool)].tolist()
	print(f"Converted to labels: {labels}")
	except Exception as e:
	print(f"Error converting to labels: {e}")

	# If it's a list, check the first item
	elif isinstance(result, list) and len(result) > 0:
	print(f"First item type: {type(result[0])}")
	print(f"First item value: {result[0]}")

	# If it's a dictionary, check its structure
	elif isinstance(result, dict):
	print("Dictionary keys:")
	for key in result:
	value = result[key]
	print(f" - {key} ({type(value)})")

	# Show a sample of the value
	if isinstance(value, (list, tuple)) and len(value) > 0:
	print(f" Sample: {value[:3]}...")
	elif isinstance(value, dict) and len(value) > 0:
	sample_keys = list(value.keys())[:3]
	print(f" Sample keys: {sample_keys}...")
	else:
	print(f" Value: {value}")

	except Exception as e:
	print(f"Error during prediction: {e}")
	print(traceback.format_exc())

	print("\nDebugging complete")

	except Exception as e:
	print(f"Error loading or processing model: {e}")
	print(traceback.format_exc())

	if __name__ == "__main__":
	model_path = r"C:\Users\tueyc\CMKL Year 1\nomad_sync_app\backend\hybrid_interest_classifier.pkl"
	test_text = "I hike mountains and explore cultures while traveling. I also love cooking new recipes."

	if len(sys.argv) > 1:
	model_path = sys.argv[1]
	if len(sys.argv) > 2:
	test_text = sys.argv[2]

	debug_model(model_path, test_text)