bootcamp / image_classification.py
mFaizann137's picture
Upload 4 files
d931164 verified
# Assignment 2: Image Classification using Deep Learning (Computer Vision)
# This program classifies images into different categories using a Convolutional Neural Network (CNN)
# Step 1: Import all necessary libraries
import numpy as np # For numerical calculations
import matplotlib.pyplot as plt # For showing images and graphs
import tensorflow as tf # Main deep learning library
from tensorflow import keras # High-level API for building models
from tensorflow.keras import layers # For creating neural network layers
from tensorflow.keras.datasets import cifar10 # Built-in image dataset
from tensorflow.keras.utils import to_categorical # For preparing labels
import warnings
warnings.filterwarnings('ignore') # Hide unnecessary warnings
# Step 2: Load the CIFAR-10 Dataset
# CIFAR-10 is a famous dataset with 60,000 small color images in 10 classes
print("=== Loading CIFAR-10 Dataset ===")
print("This dataset contains 60,000 32x32 color images in 10 categories")
print("Downloading dataset (this may take a minute on first run)...\n")
# Load the data - it's automatically split into training and testing sets
(X_train, y_train), (X_test, y_test) = cifar10.load_data()
# Define the 10 classes in CIFAR-10
class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer',
'dog', 'frog', 'horse', 'ship', 'truck']
# Step 3: Explore the dataset
print("=== Dataset Information ===")
print(f"Training images: {X_train.shape[0]}")
print(f"Testing images: {X_test.shape[0]}")
print(f"Image shape: {X_train.shape[1:]} (32x32 pixels, 3 color channels for RGB)")
print(f"Number of classes: {len(class_names)}")
print(f"Classes: {', '.join(class_names)}\n")
# Step 4: Visualize sample images from the dataset
print("=== Visualizing Sample Images ===")
fig, axes = plt.subplots(3, 5, figsize=(12, 8))
fig.suptitle('Sample Images from CIFAR-10 Dataset', fontsize=16)
for i in range(15):
# Select random image
idx = np.random.randint(0, len(X_train))
image = X_train[idx]
label = class_names[y_train[idx][0]]
# Plot the image
ax = axes[i // 5, i % 5]
ax.imshow(image)
ax.set_title(f'Class: {label}')
ax.axis('off')
plt.tight_layout()
plt.savefig('sample_images.png')
plt.show()
# Step 5: Preprocess the data
print("\n=== Preprocessing Data ===")
# Normalize pixel values to be between 0 and 1 (instead of 0-255)
# This helps the neural network learn better
X_train = X_train.astype('float32') / 255.0
X_test = X_test.astype('float32') / 255.0
print("✓ Normalized pixel values to range [0, 1]")
# Convert labels to categorical (one-hot encoding)
# Example: label 3 becomes [0,0,0,1,0,0,0,0,0,0]
y_train_categorical = to_categorical(y_train, 10)
y_test_categorical = to_categorical(y_test, 10)
print("✓ Converted labels to categorical format")
# Step 6: Build the Convolutional Neural Network (CNN)
print("\n=== Building CNN Model ===")
print("Creating a Convolutional Neural Network for image classification...")
# Create the model
model = keras.Sequential([
# First Convolutional Block
# Conv2D layer: Detects features like edges and shapes
layers.Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3)),
layers.BatchNormalization(), # Normalizes the outputs to improve training
layers.Conv2D(32, (3, 3), activation='relu'),
layers.BatchNormalization(),
layers.MaxPooling2D((2, 2)), # Reduces image size while keeping important features
layers.Dropout(0.25), # Prevents overfitting
# Second Convolutional Block
# These layers detect more complex features
layers.Conv2D(64, (3, 3), activation='relu'),
layers.BatchNormalization(),
layers.Conv2D(64, (3, 3), activation='relu'),
layers.BatchNormalization(),
layers.MaxPooling2D((2, 2)),
layers.Dropout(0.25),
# Third Convolutional Block
# These layers detect even more complex patterns
layers.Conv2D(128, (3, 3), activation='relu'),
layers.BatchNormalization(),
layers.MaxPooling2D((2, 2)),
layers.Dropout(0.25),
# Flatten and Dense Layers
layers.Flatten(), # Convert 2D features to 1D for classification
layers.Dense(128, activation='relu'), # Fully connected layer
layers.BatchNormalization(),
layers.Dropout(0.5),
layers.Dense(10, activation='softmax') # Output layer with 10 classes
])
# Step 7: Compile the model
print("\nCompiling the model...")
model.compile(
optimizer='adam', # Optimization algorithm
loss='categorical_crossentropy', # Loss function for multi-class classification
metrics=['accuracy'] # Track accuracy during training
)
# Display model architecture
print("\nModel Architecture:")
model.summary()
print(f"\nTotal parameters: {model.count_params():,}")
# Step 8: Set up data augmentation (optional but improves accuracy)
print("\n=== Setting up Data Augmentation ===")
print("Data augmentation creates variations of images to improve model generalization")
# Create data augmentation layer
data_augmentation = keras.Sequential([
layers.RandomFlip("horizontal"), # Randomly flip images horizontally
layers.RandomRotation(0.1), # Randomly rotate images
layers.RandomZoom(0.1), # Randomly zoom images
])
# Step 9: Train the model
print("\n=== Training the Model ===")
print("This will take 3-5 minutes depending on your computer...")
print("The model will learn to recognize patterns in the images\n")
# Use callbacks for better training
early_stopping = keras.callbacks.EarlyStopping(
monitor='val_loss',
patience=10,
restore_best_weights=True
)
reduce_lr = keras.callbacks.ReduceLROnPlateau(
monitor='val_loss',
factor=0.5,
patience=5,
min_lr=0.00001
)
# Train the model
history = model.fit(
X_train, y_train_categorical,
batch_size=64, # Number of images to process at once
epochs=30, # Number of times to go through the entire dataset
validation_data=(X_test, y_test_categorical), # Test data for validation
callbacks=[early_stopping, reduce_lr], # Training helpers
verbose=1 # Show progress bar
)
print("\n✓ Training completed!")
# Step 10: Visualize training history
print("\n=== Visualizing Training History ===")
fig, axes = plt.subplots(1, 2, figsize=(12, 4))
# Plot accuracy
axes[0].plot(history.history['accuracy'], label='Training Accuracy')
axes[0].plot(history.history['val_accuracy'], label='Validation Accuracy')
axes[0].set_title('Model Accuracy')
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Accuracy')
axes[0].legend()
axes[0].grid(True)
# Plot loss
axes[1].plot(history.history['loss'], label='Training Loss')
axes[1].plot(history.history['val_loss'], label='Validation Loss')
axes[1].set_title('Model Loss')
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('Loss')
axes[1].legend()
axes[1].grid(True)
plt.tight_layout()
plt.savefig('training_history_cv.png')
plt.show()
# Step 11: Evaluate the model
print("\n=== Model Evaluation ===")
test_loss, test_accuracy = model.evaluate(X_test, y_test_categorical, verbose=0)
print(f"Test Accuracy: {test_accuracy*100:.2f}%")
print(f"Test Loss: {test_loss:.4f}")
# Step 12: Make predictions and show results
print("\n=== Making Predictions on Test Images ===")
# Get predictions for test set
predictions = model.predict(X_test[:20], verbose=0)
# Visualize predictions
fig, axes = plt.subplots(4, 5, figsize=(15, 12))
fig.suptitle('Model Predictions on Test Images', fontsize=16)
for i in range(20):
# Get image and predictions
image = X_test[i]
true_label = class_names[y_test[i][0]]
predicted_label = class_names[np.argmax(predictions[i])]
confidence = np.max(predictions[i]) * 100
# Plot image
ax = axes[i // 5, i % 5]
ax.imshow(image)
# Color code: green for correct, red for incorrect
color = 'green' if true_label == predicted_label else 'red'
ax.set_title(f'True: {true_label}\nPred: {predicted_label}\nConf: {confidence:.1f}%',
color=color, fontsize=10)
ax.axis('off')
plt.tight_layout()
plt.savefig('predictions_cv.png')
plt.show()
# Step 13: Create confusion matrix
print("\n=== Creating Confusion Matrix ===")
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns
# Get predictions for entire test set
y_pred = model.predict(X_test, verbose=0)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true_classes = y_test.reshape(-1)
# Create confusion matrix
cm = confusion_matrix(y_true_classes, y_pred_classes)
# Plot confusion matrix
plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
xticklabels=class_names, yticklabels=class_names)
plt.title('Confusion Matrix')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.savefig('confusion_matrix.png')
plt.show()
# Step 14: Print classification report
print("\n=== Classification Report ===")
print(classification_report(y_true_classes, y_pred_classes, target_names=class_names))
# Step 15: Save the model
print("\n=== Saving the Model ===")
model.save('image_classifier_model.h5')
print("Model saved as 'image_classifier_model.h5'")
# Step 16: Test with a single image
print("\n=== Testing with a Single Image ===")
# Pick a random test image
test_idx = np.random.randint(0, len(X_test))
test_image = X_test[test_idx]
test_label = class_names[y_test[test_idx][0]]
# Make prediction
single_prediction = model.predict(test_image.reshape(1, 32, 32, 3), verbose=0)
predicted_class = class_names[np.argmax(single_prediction)]
confidence = np.max(single_prediction) * 100
# Display the image and prediction
plt.figure(figsize=(6, 6))
plt.imshow(test_image)
plt.title(f'Actual: {test_label}\nPredicted: {predicted_class}\nConfidence: {confidence:.2f}%')
plt.axis('off')
plt.savefig('single_prediction.png')
plt.show()
print(f"Actual class: {test_label}")
print(f"Predicted class: {predicted_class}")
print(f"Confidence: {confidence:.2f}%")
print("\n✅ Assignment 2 Complete! Your computer vision model is ready!")
print("Files created: image_classifier_model.h5, sample_images.png, training_history_cv.png,")
print(" predictions_cv.png, confusion_matrix.png, single_prediction.png")
print("\nYour model can now classify images into 10 different categories!")