five_digit_hand_sign / cnn_utils.py
JumaRubea's picture
five digit hand sign classifier
b44d0c7 verified
# importing libraries and packages
import cv2
import h5py
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, classification_report
def load_signs_dataset():
"""
Loads the hand signs dataset from HDF5 files.
Returns:
train_set_x_orig (numpy.ndarray): Training set features (images).
train_set_y_orig (numpy.ndarray): Training set labels, reshaped to (1, number_of_samples).
test_set_x_orig (numpy.ndarray): Test set features (images).
test_set_y_orig (numpy.ndarray): Test set labels, reshaped to (1, number_of_samples).
classes (numpy.ndarray): Array containing the list of class labels.
"""
# Open the HDF5 file containing the training dataset in read mode
train_dataset = h5py.File('datasets/train_signs.h5', "r")
# Extract training set features (images) as a NumPy array
train_set_x_orig = np.array(train_dataset["train_set_x"][:])
# Extract training set labels as a NumPy array
train_set_y_orig = np.array(train_dataset["train_set_y"][:])
# Open the HDF5 file containing the test dataset in read mode
test_dataset = h5py.File('datasets/test_signs.h5', "r")
# Extract test set features (images) as a NumPy array
test_set_x_orig = np.array(test_dataset["test_set_x"][:])
# Extract test set labels as a NumPy array
test_set_y_orig = np.array(test_dataset["test_set_y"][:])
# Extract the list of class labels (e.g., class names) as a NumPy array
classes = np.array(test_dataset["list_classes"][:])
# Reshape the training labels to (1, number_of_samples) for consistency
train_set_y_orig = train_set_y_orig.reshape((1, train_set_y_orig.shape[0]))
# Reshape the test labels to (1, number_of_samples) for consistency
test_set_y_orig = test_set_y_orig.reshape((1, test_set_y_orig.shape[0]))
# Return the training and test features, labels, and class labels
return train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes
def dataset_preprocessing(img_data, img_label):
"""
Preprocesses image data and labels for model training or testing.
Args:
img_data (numpy.ndarray): The input image data array.
img_label (numpy.ndarray): The corresponding labels for the image data.
Returns:
norm_data (numpy.ndarray): Normalized image data with pixel values scaled between 0 and 1.
one_hot_label (numpy.ndarray): Labels converted to one-hot encoded vectors.
"""
# Normalize the image data by scaling pixel values to the range [0, 1]
norm_data = img_data / 255.0
# Convert labels to one-hot encoding
# Assuming there are 6 classes (from 0 to 5)
# Reshape labels to a 1D array for correct indexing
one_hot_label = np.eye(6)[img_label.reshape(-1)]
# or simply use tensorflow.keras.utils.to_categorical(integer_labels, num_classes=num_classes)
# Return the normalized image data and one-hot encoded labels
return norm_data, one_hot_label
def visualize_sign(data, label, max_indx):
"""
Visualizes a random selection of hand sign images along with their predicted labels.
Args:
data (numpy.ndarray): The dataset containing image data.
Shape is expected to be (num_samples, height, width, channels).
label (numpy.ndarray): The predicted labels corresponding to the dataset.
Shape should be (1, num_samples).
max_indx (int): The number of images to display in the visualization.
Returns:
None: Displays the images and their labels using Matplotlib.
"""
# Create a subplot with 1 row and `max_indx` columns to display multiple images
fig, axs = plt.subplots(1, max_indx)
# Randomly select `max_indx` indices from the dataset
arr = list(np.random.randint(0, data.shape[0], size=max_indx))
# Loop through the randomly selected indices
for i, value in enumerate(arr):
# Display the image at the current index in the dataset
axs[i].imshow(data[value])
# Set the title of the subplot to display the corresponding predicted label
axs[i].set_title(f'It is {label[0, value]}')
# Turn off the axis for better visualization
axs[i].axis('off')
# Show the complete visualization
plt.show()
def visualize_metrics(history):
"""
Visualizes the training metrics (accuracy and loss) over epochs.
Args:
history (tensorflow.keras.callbacks.History): The history object returned by the
model's `fit` method. Contains training metrics.
Returns:
None: Displays the accuracy and loss plots.
"""
# Convert the history object to a DataFrame for easier visualization
df = pd.DataFrame(history.history)
# Create subplots for accuracy and loss
fig, axs = plt.subplots(1, 2, figsize=(12, 4)) # 1 row, 2 columns, set figure size for better readability
# Plot accuracy over epochs
axs[0].plot(df['accuracy'], label='Training Accuracy') # Training accuracy
if 'val_accuracy' in df.columns: # Check for validation accuracy
axs[0].plot(df['val_accuracy'], label='Validation Accuracy', linestyle='--')
axs[0].set_title('Accuracy') # Set title for the plot
axs[0].set_xlabel('Epochs') # Label x-axis
axs[0].set_ylabel('Accuracy') # Label y-axis
axs[0].legend() # Add a legend
# Plot loss over epochs
axs[1].plot(df['loss'], label='Training Loss') # Training loss
if 'val_loss' in df.columns: # Check for validation loss
axs[1].plot(df['val_loss'], label='Validation Loss', linestyle='--')
axs[1].set_title('Loss') # Set title for the plot
axs[1].set_xlabel('Epochs') # Label x-axis
axs[1].set_ylabel('Loss') # Label y-axis
axs[1].legend() # Add a legend
# Display the plots
plt.tight_layout() # Adjust subplot spacing for a clean layout
plt.show()
def evaluate_model(model, test_data, test_labels, class_names):
"""
Evaluates a trained model on test data and visualizes metrics.
Args:
model: The trained model to evaluate.
test_data (numpy.ndarray): Normalized test data used for predictions.
test_labels (numpy.ndarray): One-hot encoded true labels of the test data.
class_names (list of str): List of class names corresponding to the classes.
Returns:
None: Displays a confusion matrix and classification report.
"""
class_names = list(class_names.astype(str))
# Make predictions on the test data
y_pred = model.predict(test_data) # Predicted probabilities
y_pred_classes = np.argmax(y_pred, axis=1) # Convert probabilities to class indices
# Convert one-hot encoded labels to class indices
test_labels_classes = np.argmax(test_labels, axis=1)
# Generate a confusion matrix
conf_matrix = confusion_matrix(test_labels_classes, y_pred_classes)
# Print a classification report
print("\nClassification Report:")
print(classification_report(test_labels_classes, y_pred_classes, target_names=class_names))
# Visualize the confusion matrix as a heatmap
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues',
xticklabels=class_names, yticklabels=class_names)
plt.title("Confusion Matrix")
plt.xlabel("Predicted Labels")
plt.ylabel("True Labels")
plt.show()
def predictor(model, image_array, input_size=(64, 64), class_names=None):
"""
Makes a prediction on a single image using the given model and visualizes the result.
Args:
model: The trained model to use for prediction.
image_array (numpy.ndarray): The input image array (e.g., loaded using OpenCV).
input_size (tuple): The expected input size of the model (default is (64, 64)).
class_names (list of str, optional): List of class names corresponding to the model's output.
Returns:
None: Displays the input image with the predicted class as the title.
"""
# Validate and convert class_names to a list of strings if provided
if class_names is not None:
class_names = [str(cls) for cls in class_names]
# Resize the image to match the model's input size
img = cv2.resize(image_array, input_size)
# Normalize the pixel values to [0, 1]
img = img / 255.0
# Add a batch dimension (1, height, width, channels)
img = np.expand_dims(img, axis=0)
# Make a prediction
pred = model.predict(img) # Returns probabilities for each class
# Squeeze the prediction array to remove unnecessary dimensions
pred = np.squeeze(pred)
# Print the predicted probabilities
print("Predicted Probabilities:", pred)
# Get the index of the class with the highest probability
predicted_class_idx = np.argmax(pred, axis=0)
# Print the predicted class
print("Predicted Class Index:", predicted_class_idx)
# Get the class name if provided, otherwise use the index
predicted_class_name = class_names[predicted_class_idx] if class_names else str(predicted_class_idx)
# Convert the image from BGR to RGB for Matplotlib visualization
rgb_image = cv2.cvtColor(image_array, cv2.COLOR_BGR2RGB)
# Display the image with the predicted class as the title
plt.imshow(rgb_image) # Use the RGB image for Matplotlib
plt.title(f'Predicted: {predicted_class_name}')
plt.axis('off') # Turn off axis for better visualization
plt.show()