Spaces:

mFaizann137
/

bootcamp

No application file

File size: 10,530 Bytes

d931164

# Assignment 2: Image Classification using Deep Learning (Computer Vision)
# This program classifies images into different categories using a Convolutional Neural Network (CNN)

# Step 1: Import all necessary libraries
import numpy as np  # For numerical calculations
import matplotlib.pyplot as plt  # For showing images and graphs
import tensorflow as tf  # Main deep learning library
from tensorflow import keras  # High-level API for building models
from tensorflow.keras import layers  # For creating neural network layers
from tensorflow.keras.datasets import cifar10  # Built-in image dataset
from tensorflow.keras.utils import to_categorical  # For preparing labels
import warnings
warnings.filterwarnings('ignore')  # Hide unnecessary warnings

# Step 2: Load the CIFAR-10 Dataset
# CIFAR-10 is a famous dataset with 60,000 small color images in 10 classes
print("=== Loading CIFAR-10 Dataset ===")
print("This dataset contains 60,000 32x32 color images in 10 categories")
print("Downloading dataset (this may take a minute on first run)...\n")

# Load the data - it's automatically split into training and testing sets
(X_train, y_train), (X_test, y_test) = cifar10.load_data()

# Define the 10 classes in CIFAR-10
class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer',
               'dog', 'frog', 'horse', 'ship', 'truck']

# Step 3: Explore the dataset
print("=== Dataset Information ===")
print(f"Training images: {X_train.shape[0]}")
print(f"Testing images: {X_test.shape[0]}")
print(f"Image shape: {X_train.shape[1:]} (32x32 pixels, 3 color channels for RGB)")
print(f"Number of classes: {len(class_names)}")
print(f"Classes: {', '.join(class_names)}\n")

# Step 4: Visualize sample images from the dataset
print("=== Visualizing Sample Images ===")
fig, axes = plt.subplots(3, 5, figsize=(12, 8))
fig.suptitle('Sample Images from CIFAR-10 Dataset', fontsize=16)

for i in range(15):
    # Select random image
    idx = np.random.randint(0, len(X_train))
    image = X_train[idx]
    label = class_names[y_train[idx][0]]
    
    # Plot the image
    ax = axes[i // 5, i % 5]
    ax.imshow(image)
    ax.set_title(f'Class: {label}')
    ax.axis('off')

plt.tight_layout()
plt.savefig('sample_images.png')
plt.show()

# Step 5: Preprocess the data
print("\n=== Preprocessing Data ===")

# Normalize pixel values to be between 0 and 1 (instead of 0-255)
# This helps the neural network learn better
X_train = X_train.astype('float32') / 255.0
X_test = X_test.astype('float32') / 255.0
print("✓ Normalized pixel values to range [0, 1]")

# Convert labels to categorical (one-hot encoding)
# Example: label 3 becomes [0,0,0,1,0,0,0,0,0,0]
y_train_categorical = to_categorical(y_train, 10)
y_test_categorical = to_categorical(y_test, 10)
print("✓ Converted labels to categorical format")

# Step 6: Build the Convolutional Neural Network (CNN)
print("\n=== Building CNN Model ===")
print("Creating a Convolutional Neural Network for image classification...")

# Create the model
model = keras.Sequential([
    # First Convolutional Block
    # Conv2D layer: Detects features like edges and shapes
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3)),
    layers.BatchNormalization(),  # Normalizes the outputs to improve training
    layers.Conv2D(32, (3, 3), activation='relu'),
    layers.BatchNormalization(),
    layers.MaxPooling2D((2, 2)),  # Reduces image size while keeping important features
    layers.Dropout(0.25),  # Prevents overfitting
    
    # Second Convolutional Block
    # These layers detect more complex features
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.BatchNormalization(),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.BatchNormalization(),
    layers.MaxPooling2D((2, 2)),
    layers.Dropout(0.25),
    
    # Third Convolutional Block
    # These layers detect even more complex patterns
    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.BatchNormalization(),
    layers.MaxPooling2D((2, 2)),
    layers.Dropout(0.25),
    
    # Flatten and Dense Layers
    layers.Flatten(),  # Convert 2D features to 1D for classification
    layers.Dense(128, activation='relu'),  # Fully connected layer
    layers.BatchNormalization(),
    layers.Dropout(0.5),
    layers.Dense(10, activation='softmax')  # Output layer with 10 classes
])

# Step 7: Compile the model
print("\nCompiling the model...")
model.compile(
    optimizer='adam',  # Optimization algorithm
    loss='categorical_crossentropy',  # Loss function for multi-class classification
    metrics=['accuracy']  # Track accuracy during training
)

# Display model architecture
print("\nModel Architecture:")
model.summary()
print(f"\nTotal parameters: {model.count_params():,}")

# Step 8: Set up data augmentation (optional but improves accuracy)
print("\n=== Setting up Data Augmentation ===")
print("Data augmentation creates variations of images to improve model generalization")

# Create data augmentation layer
data_augmentation = keras.Sequential([
    layers.RandomFlip("horizontal"),  # Randomly flip images horizontally
    layers.RandomRotation(0.1),  # Randomly rotate images
    layers.RandomZoom(0.1),  # Randomly zoom images
])

# Step 9: Train the model
print("\n=== Training the Model ===")
print("This will take 3-5 minutes depending on your computer...")
print("The model will learn to recognize patterns in the images\n")

# Use callbacks for better training
early_stopping = keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=10,
    restore_best_weights=True
)

reduce_lr = keras.callbacks.ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.5,
    patience=5,
    min_lr=0.00001
)

# Train the model
history = model.fit(
    X_train, y_train_categorical,
    batch_size=64,  # Number of images to process at once
    epochs=30,  # Number of times to go through the entire dataset
    validation_data=(X_test, y_test_categorical),  # Test data for validation
    callbacks=[early_stopping, reduce_lr],  # Training helpers
    verbose=1  # Show progress bar
)

print("\n✓ Training completed!")

# Step 10: Visualize training history
print("\n=== Visualizing Training History ===")

fig, axes = plt.subplots(1, 2, figsize=(12, 4))

# Plot accuracy
axes[0].plot(history.history['accuracy'], label='Training Accuracy')
axes[0].plot(history.history['val_accuracy'], label='Validation Accuracy')
axes[0].set_title('Model Accuracy')
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Accuracy')
axes[0].legend()
axes[0].grid(True)

# Plot loss
axes[1].plot(history.history['loss'], label='Training Loss')
axes[1].plot(history.history['val_loss'], label='Validation Loss')
axes[1].set_title('Model Loss')
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('Loss')
axes[1].legend()
axes[1].grid(True)

plt.tight_layout()
plt.savefig('training_history_cv.png')
plt.show()

# Step 11: Evaluate the model
print("\n=== Model Evaluation ===")
test_loss, test_accuracy = model.evaluate(X_test, y_test_categorical, verbose=0)
print(f"Test Accuracy: {test_accuracy*100:.2f}%")
print(f"Test Loss: {test_loss:.4f}")

# Step 12: Make predictions and show results
print("\n=== Making Predictions on Test Images ===")

# Get predictions for test set
predictions = model.predict(X_test[:20], verbose=0)

# Visualize predictions
fig, axes = plt.subplots(4, 5, figsize=(15, 12))
fig.suptitle('Model Predictions on Test Images', fontsize=16)

for i in range(20):
    # Get image and predictions
    image = X_test[i]
    true_label = class_names[y_test[i][0]]
    predicted_label = class_names[np.argmax(predictions[i])]
    confidence = np.max(predictions[i]) * 100
    
    # Plot image
    ax = axes[i // 5, i % 5]
    ax.imshow(image)
    
    # Color code: green for correct, red for incorrect
    color = 'green' if true_label == predicted_label else 'red'
    ax.set_title(f'True: {true_label}\nPred: {predicted_label}\nConf: {confidence:.1f}%', 
                 color=color, fontsize=10)
    ax.axis('off')

plt.tight_layout()
plt.savefig('predictions_cv.png')
plt.show()

# Step 13: Create confusion matrix
print("\n=== Creating Confusion Matrix ===")
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns

# Get predictions for entire test set
y_pred = model.predict(X_test, verbose=0)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true_classes = y_test.reshape(-1)

# Create confusion matrix
cm = confusion_matrix(y_true_classes, y_pred_classes)

# Plot confusion matrix
plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
            xticklabels=class_names, yticklabels=class_names)
plt.title('Confusion Matrix')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.savefig('confusion_matrix.png')
plt.show()

# Step 14: Print classification report
print("\n=== Classification Report ===")
print(classification_report(y_true_classes, y_pred_classes, target_names=class_names))

# Step 15: Save the model
print("\n=== Saving the Model ===")
model.save('image_classifier_model.h5')
print("Model saved as 'image_classifier_model.h5'")

# Step 16: Test with a single image
print("\n=== Testing with a Single Image ===")

# Pick a random test image
test_idx = np.random.randint(0, len(X_test))
test_image = X_test[test_idx]
test_label = class_names[y_test[test_idx][0]]

# Make prediction
single_prediction = model.predict(test_image.reshape(1, 32, 32, 3), verbose=0)
predicted_class = class_names[np.argmax(single_prediction)]
confidence = np.max(single_prediction) * 100

# Display the image and prediction
plt.figure(figsize=(6, 6))
plt.imshow(test_image)
plt.title(f'Actual: {test_label}\nPredicted: {predicted_class}\nConfidence: {confidence:.2f}%')
plt.axis('off')
plt.savefig('single_prediction.png')
plt.show()

print(f"Actual class: {test_label}")
print(f"Predicted class: {predicted_class}")
print(f"Confidence: {confidence:.2f}%")

print("\n✅ Assignment 2 Complete! Your computer vision model is ready!")
print("Files created: image_classifier_model.h5, sample_images.png, training_history_cv.png,")
print("              predictions_cv.png, confusion_matrix.png, single_prediction.png")
print("\nYour model can now classify images into 10 different categories!")