five_digit_hand_sign / cnn_utils.py

five digit hand sign classifier

b44d0c7 verified about 1 year ago

9.91 kB

	# importing libraries and packages
	import cv2
	import h5py
	import numpy as np
	import pandas as pd
	import seaborn as sns
	import matplotlib.pyplot as plt
	from sklearn.metrics import confusion_matrix, classification_report


	def load_signs_dataset():
	"""
	Loads the hand signs dataset from HDF5 files.

	Returns:
	train_set_x_orig (numpy.ndarray): Training set features (images).
	train_set_y_orig (numpy.ndarray): Training set labels, reshaped to (1, number_of_samples).
	test_set_x_orig (numpy.ndarray): Test set features (images).
	test_set_y_orig (numpy.ndarray): Test set labels, reshaped to (1, number_of_samples).
	classes (numpy.ndarray): Array containing the list of class labels.
	"""

	# Open the HDF5 file containing the training dataset in read mode
	train_dataset = h5py.File('datasets/train_signs.h5', "r")

	# Extract training set features (images) as a NumPy array
	train_set_x_orig = np.array(train_dataset["train_set_x"][:])

	# Extract training set labels as a NumPy array
	train_set_y_orig = np.array(train_dataset["train_set_y"][:])

	# Open the HDF5 file containing the test dataset in read mode
	test_dataset = h5py.File('datasets/test_signs.h5', "r")

	# Extract test set features (images) as a NumPy array
	test_set_x_orig = np.array(test_dataset["test_set_x"][:])

	# Extract test set labels as a NumPy array
	test_set_y_orig = np.array(test_dataset["test_set_y"][:])

	# Extract the list of class labels (e.g., class names) as a NumPy array
	classes = np.array(test_dataset["list_classes"][:])

	# Reshape the training labels to (1, number_of_samples) for consistency
	train_set_y_orig = train_set_y_orig.reshape((1, train_set_y_orig.shape[0]))

	# Reshape the test labels to (1, number_of_samples) for consistency
	test_set_y_orig = test_set_y_orig.reshape((1, test_set_y_orig.shape[0]))

	# Return the training and test features, labels, and class labels
	return train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes

	def dataset_preprocessing(img_data, img_label):
	"""
	Preprocesses image data and labels for model training or testing.

	Args:
	img_data (numpy.ndarray): The input image data array.
	img_label (numpy.ndarray): The corresponding labels for the image data.

	Returns:
	norm_data (numpy.ndarray): Normalized image data with pixel values scaled between 0 and 1.
	one_hot_label (numpy.ndarray): Labels converted to one-hot encoded vectors.
	"""

	# Normalize the image data by scaling pixel values to the range [0, 1]
	norm_data = img_data / 255.0

	# Convert labels to one-hot encoding
	# Assuming there are 6 classes (from 0 to 5)
	# Reshape labels to a 1D array for correct indexing
	one_hot_label = np.eye(6)[img_label.reshape(-1)]
	# or simply use tensorflow.keras.utils.to_categorical(integer_labels, num_classes=num_classes)

	# Return the normalized image data and one-hot encoded labels
	return norm_data, one_hot_label

	def visualize_sign(data, label, max_indx):
	"""
	Visualizes a random selection of hand sign images along with their predicted labels.

	Args:
	data (numpy.ndarray): The dataset containing image data.
	Shape is expected to be (num_samples, height, width, channels).
	label (numpy.ndarray): The predicted labels corresponding to the dataset.
	Shape should be (1, num_samples).
	max_indx (int): The number of images to display in the visualization.

	Returns:
	None: Displays the images and their labels using Matplotlib.
	"""

	# Create a subplot with 1 row and `max_indx` columns to display multiple images
	fig, axs = plt.subplots(1, max_indx)

	# Randomly select `max_indx` indices from the dataset
	arr = list(np.random.randint(0, data.shape[0], size=max_indx))

	# Loop through the randomly selected indices
	for i, value in enumerate(arr):
	# Display the image at the current index in the dataset
	axs[i].imshow(data[value])

	# Set the title of the subplot to display the corresponding predicted label
	axs[i].set_title(f'It is {label[0, value]}')

	# Turn off the axis for better visualization
	axs[i].axis('off')

	# Show the complete visualization
	plt.show()

	def visualize_metrics(history):
	"""
	Visualizes the training metrics (accuracy and loss) over epochs.

	Args:
	history (tensorflow.keras.callbacks.History): The history object returned by the
	model's `fit` method. Contains training metrics.

	Returns:
	None: Displays the accuracy and loss plots.
	"""
	# Convert the history object to a DataFrame for easier visualization
	df = pd.DataFrame(history.history)

	# Create subplots for accuracy and loss
	fig, axs = plt.subplots(1, 2, figsize=(12, 4)) # 1 row, 2 columns, set figure size for better readability

	# Plot accuracy over epochs
	axs[0].plot(df['accuracy'], label='Training Accuracy') # Training accuracy
	if 'val_accuracy' in df.columns: # Check for validation accuracy
	axs[0].plot(df['val_accuracy'], label='Validation Accuracy', linestyle='--')
	axs[0].set_title('Accuracy') # Set title for the plot
	axs[0].set_xlabel('Epochs') # Label x-axis
	axs[0].set_ylabel('Accuracy') # Label y-axis
	axs[0].legend() # Add a legend

	# Plot loss over epochs
	axs[1].plot(df['loss'], label='Training Loss') # Training loss
	if 'val_loss' in df.columns: # Check for validation loss
	axs[1].plot(df['val_loss'], label='Validation Loss', linestyle='--')
	axs[1].set_title('Loss') # Set title for the plot
	axs[1].set_xlabel('Epochs') # Label x-axis
	axs[1].set_ylabel('Loss') # Label y-axis
	axs[1].legend() # Add a legend

	# Display the plots
	plt.tight_layout() # Adjust subplot spacing for a clean layout
	plt.show()


	def evaluate_model(model, test_data, test_labels, class_names):
	"""
	Evaluates a trained model on test data and visualizes metrics.

	Args:
	model: The trained model to evaluate.
	test_data (numpy.ndarray): Normalized test data used for predictions.
	test_labels (numpy.ndarray): One-hot encoded true labels of the test data.
	class_names (list of str): List of class names corresponding to the classes.

	Returns:
	None: Displays a confusion matrix and classification report.
	"""
	class_names = list(class_names.astype(str))
	# Make predictions on the test data
	y_pred = model.predict(test_data) # Predicted probabilities
	y_pred_classes = np.argmax(y_pred, axis=1) # Convert probabilities to class indices

	# Convert one-hot encoded labels to class indices
	test_labels_classes = np.argmax(test_labels, axis=1)

	# Generate a confusion matrix
	conf_matrix = confusion_matrix(test_labels_classes, y_pred_classes)

	# Print a classification report
	print("\nClassification Report:")
	print(classification_report(test_labels_classes, y_pred_classes, target_names=class_names))

	# Visualize the confusion matrix as a heatmap
	plt.figure(figsize=(8, 6))
	sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues',
	xticklabels=class_names, yticklabels=class_names)
	plt.title("Confusion Matrix")
	plt.xlabel("Predicted Labels")
	plt.ylabel("True Labels")
	plt.show()



	def predictor(model, image_array, input_size=(64, 64), class_names=None):
	"""
	Makes a prediction on a single image using the given model and visualizes the result.

	Args:
	model: The trained model to use for prediction.
	image_array (numpy.ndarray): The input image array (e.g., loaded using OpenCV).
	input_size (tuple): The expected input size of the model (default is (64, 64)).
	class_names (list of str, optional): List of class names corresponding to the model's output.

	Returns:
	None: Displays the input image with the predicted class as the title.
	"""
	# Validate and convert class_names to a list of strings if provided
	if class_names is not None:
	class_names = [str(cls) for cls in class_names]

	# Resize the image to match the model's input size
	img = cv2.resize(image_array, input_size)

	# Normalize the pixel values to [0, 1]
	img = img / 255.0

	# Add a batch dimension (1, height, width, channels)
	img = np.expand_dims(img, axis=0)

	# Make a prediction
	pred = model.predict(img) # Returns probabilities for each class

	# Squeeze the prediction array to remove unnecessary dimensions
	pred = np.squeeze(pred)

	# Print the predicted probabilities
	print("Predicted Probabilities:", pred)

	# Get the index of the class with the highest probability
	predicted_class_idx = np.argmax(pred, axis=0)

	# Print the predicted class
	print("Predicted Class Index:", predicted_class_idx)

	# Get the class name if provided, otherwise use the index
	predicted_class_name = class_names[predicted_class_idx] if class_names else str(predicted_class_idx)

	# Convert the image from BGR to RGB for Matplotlib visualization
	rgb_image = cv2.cvtColor(image_array, cv2.COLOR_BGR2RGB)

	# Display the image with the predicted class as the title
	plt.imshow(rgb_image) # Use the RGB image for Matplotlib
	plt.title(f'Predicted: {predicted_class_name}')
	plt.axis('off') # Turn off axis for better visualization
	plt.show()