# Assignment 2: Image Classification using Deep Learning (Computer Vision) # This program classifies images into different categories using a Convolutional Neural Network (CNN) # Step 1: Import all necessary libraries import numpy as np # For numerical calculations import matplotlib.pyplot as plt # For showing images and graphs import tensorflow as tf # Main deep learning library from tensorflow import keras # High-level API for building models from tensorflow.keras import layers # For creating neural network layers from tensorflow.keras.datasets import cifar10 # Built-in image dataset from tensorflow.keras.utils import to_categorical # For preparing labels import warnings warnings.filterwarnings('ignore') # Hide unnecessary warnings # Step 2: Load the CIFAR-10 Dataset # CIFAR-10 is a famous dataset with 60,000 small color images in 10 classes print("=== Loading CIFAR-10 Dataset ===") print("This dataset contains 60,000 32x32 color images in 10 categories") print("Downloading dataset (this may take a minute on first run)...\n") # Load the data - it's automatically split into training and testing sets (X_train, y_train), (X_test, y_test) = cifar10.load_data() # Define the 10 classes in CIFAR-10 class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck'] # Step 3: Explore the dataset print("=== Dataset Information ===") print(f"Training images: {X_train.shape[0]}") print(f"Testing images: {X_test.shape[0]}") print(f"Image shape: {X_train.shape[1:]} (32x32 pixels, 3 color channels for RGB)") print(f"Number of classes: {len(class_names)}") print(f"Classes: {', '.join(class_names)}\n") # Step 4: Visualize sample images from the dataset print("=== Visualizing Sample Images ===") fig, axes = plt.subplots(3, 5, figsize=(12, 8)) fig.suptitle('Sample Images from CIFAR-10 Dataset', fontsize=16) for i in range(15): # Select random image idx = np.random.randint(0, len(X_train)) image = X_train[idx] label = class_names[y_train[idx][0]] # Plot the image ax = axes[i // 5, i % 5] ax.imshow(image) ax.set_title(f'Class: {label}') ax.axis('off') plt.tight_layout() plt.savefig('sample_images.png') plt.show() # Step 5: Preprocess the data print("\n=== Preprocessing Data ===") # Normalize pixel values to be between 0 and 1 (instead of 0-255) # This helps the neural network learn better X_train = X_train.astype('float32') / 255.0 X_test = X_test.astype('float32') / 255.0 print("āœ“ Normalized pixel values to range [0, 1]") # Convert labels to categorical (one-hot encoding) # Example: label 3 becomes [0,0,0,1,0,0,0,0,0,0] y_train_categorical = to_categorical(y_train, 10) y_test_categorical = to_categorical(y_test, 10) print("āœ“ Converted labels to categorical format") # Step 6: Build the Convolutional Neural Network (CNN) print("\n=== Building CNN Model ===") print("Creating a Convolutional Neural Network for image classification...") # Create the model model = keras.Sequential([ # First Convolutional Block # Conv2D layer: Detects features like edges and shapes layers.Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3)), layers.BatchNormalization(), # Normalizes the outputs to improve training layers.Conv2D(32, (3, 3), activation='relu'), layers.BatchNormalization(), layers.MaxPooling2D((2, 2)), # Reduces image size while keeping important features layers.Dropout(0.25), # Prevents overfitting # Second Convolutional Block # These layers detect more complex features layers.Conv2D(64, (3, 3), activation='relu'), layers.BatchNormalization(), layers.Conv2D(64, (3, 3), activation='relu'), layers.BatchNormalization(), layers.MaxPooling2D((2, 2)), layers.Dropout(0.25), # Third Convolutional Block # These layers detect even more complex patterns layers.Conv2D(128, (3, 3), activation='relu'), layers.BatchNormalization(), layers.MaxPooling2D((2, 2)), layers.Dropout(0.25), # Flatten and Dense Layers layers.Flatten(), # Convert 2D features to 1D for classification layers.Dense(128, activation='relu'), # Fully connected layer layers.BatchNormalization(), layers.Dropout(0.5), layers.Dense(10, activation='softmax') # Output layer with 10 classes ]) # Step 7: Compile the model print("\nCompiling the model...") model.compile( optimizer='adam', # Optimization algorithm loss='categorical_crossentropy', # Loss function for multi-class classification metrics=['accuracy'] # Track accuracy during training ) # Display model architecture print("\nModel Architecture:") model.summary() print(f"\nTotal parameters: {model.count_params():,}") # Step 8: Set up data augmentation (optional but improves accuracy) print("\n=== Setting up Data Augmentation ===") print("Data augmentation creates variations of images to improve model generalization") # Create data augmentation layer data_augmentation = keras.Sequential([ layers.RandomFlip("horizontal"), # Randomly flip images horizontally layers.RandomRotation(0.1), # Randomly rotate images layers.RandomZoom(0.1), # Randomly zoom images ]) # Step 9: Train the model print("\n=== Training the Model ===") print("This will take 3-5 minutes depending on your computer...") print("The model will learn to recognize patterns in the images\n") # Use callbacks for better training early_stopping = keras.callbacks.EarlyStopping( monitor='val_loss', patience=10, restore_best_weights=True ) reduce_lr = keras.callbacks.ReduceLROnPlateau( monitor='val_loss', factor=0.5, patience=5, min_lr=0.00001 ) # Train the model history = model.fit( X_train, y_train_categorical, batch_size=64, # Number of images to process at once epochs=30, # Number of times to go through the entire dataset validation_data=(X_test, y_test_categorical), # Test data for validation callbacks=[early_stopping, reduce_lr], # Training helpers verbose=1 # Show progress bar ) print("\nāœ“ Training completed!") # Step 10: Visualize training history print("\n=== Visualizing Training History ===") fig, axes = plt.subplots(1, 2, figsize=(12, 4)) # Plot accuracy axes[0].plot(history.history['accuracy'], label='Training Accuracy') axes[0].plot(history.history['val_accuracy'], label='Validation Accuracy') axes[0].set_title('Model Accuracy') axes[0].set_xlabel('Epoch') axes[0].set_ylabel('Accuracy') axes[0].legend() axes[0].grid(True) # Plot loss axes[1].plot(history.history['loss'], label='Training Loss') axes[1].plot(history.history['val_loss'], label='Validation Loss') axes[1].set_title('Model Loss') axes[1].set_xlabel('Epoch') axes[1].set_ylabel('Loss') axes[1].legend() axes[1].grid(True) plt.tight_layout() plt.savefig('training_history_cv.png') plt.show() # Step 11: Evaluate the model print("\n=== Model Evaluation ===") test_loss, test_accuracy = model.evaluate(X_test, y_test_categorical, verbose=0) print(f"Test Accuracy: {test_accuracy*100:.2f}%") print(f"Test Loss: {test_loss:.4f}") # Step 12: Make predictions and show results print("\n=== Making Predictions on Test Images ===") # Get predictions for test set predictions = model.predict(X_test[:20], verbose=0) # Visualize predictions fig, axes = plt.subplots(4, 5, figsize=(15, 12)) fig.suptitle('Model Predictions on Test Images', fontsize=16) for i in range(20): # Get image and predictions image = X_test[i] true_label = class_names[y_test[i][0]] predicted_label = class_names[np.argmax(predictions[i])] confidence = np.max(predictions[i]) * 100 # Plot image ax = axes[i // 5, i % 5] ax.imshow(image) # Color code: green for correct, red for incorrect color = 'green' if true_label == predicted_label else 'red' ax.set_title(f'True: {true_label}\nPred: {predicted_label}\nConf: {confidence:.1f}%', color=color, fontsize=10) ax.axis('off') plt.tight_layout() plt.savefig('predictions_cv.png') plt.show() # Step 13: Create confusion matrix print("\n=== Creating Confusion Matrix ===") from sklearn.metrics import confusion_matrix, classification_report import seaborn as sns # Get predictions for entire test set y_pred = model.predict(X_test, verbose=0) y_pred_classes = np.argmax(y_pred, axis=1) y_true_classes = y_test.reshape(-1) # Create confusion matrix cm = confusion_matrix(y_true_classes, y_pred_classes) # Plot confusion matrix plt.figure(figsize=(10, 8)) sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=class_names, yticklabels=class_names) plt.title('Confusion Matrix') plt.xlabel('Predicted Label') plt.ylabel('True Label') plt.savefig('confusion_matrix.png') plt.show() # Step 14: Print classification report print("\n=== Classification Report ===") print(classification_report(y_true_classes, y_pred_classes, target_names=class_names)) # Step 15: Save the model print("\n=== Saving the Model ===") model.save('image_classifier_model.h5') print("Model saved as 'image_classifier_model.h5'") # Step 16: Test with a single image print("\n=== Testing with a Single Image ===") # Pick a random test image test_idx = np.random.randint(0, len(X_test)) test_image = X_test[test_idx] test_label = class_names[y_test[test_idx][0]] # Make prediction single_prediction = model.predict(test_image.reshape(1, 32, 32, 3), verbose=0) predicted_class = class_names[np.argmax(single_prediction)] confidence = np.max(single_prediction) * 100 # Display the image and prediction plt.figure(figsize=(6, 6)) plt.imshow(test_image) plt.title(f'Actual: {test_label}\nPredicted: {predicted_class}\nConfidence: {confidence:.2f}%') plt.axis('off') plt.savefig('single_prediction.png') plt.show() print(f"Actual class: {test_label}") print(f"Predicted class: {predicted_class}") print(f"Confidence: {confidence:.2f}%") print("\nāœ… Assignment 2 Complete! Your computer vision model is ready!") print("Files created: image_classifier_model.h5, sample_images.png, training_history_cv.png,") print(" predictions_cv.png, confusion_matrix.png, single_prediction.png") print("\nYour model can now classify images into 10 different categories!")