Spaces:
No application file
No application file
File size: 10,530 Bytes
d931164 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 | # Assignment 2: Image Classification using Deep Learning (Computer Vision)
# This program classifies images into different categories using a Convolutional Neural Network (CNN)
# Step 1: Import all necessary libraries
import numpy as np # For numerical calculations
import matplotlib.pyplot as plt # For showing images and graphs
import tensorflow as tf # Main deep learning library
from tensorflow import keras # High-level API for building models
from tensorflow.keras import layers # For creating neural network layers
from tensorflow.keras.datasets import cifar10 # Built-in image dataset
from tensorflow.keras.utils import to_categorical # For preparing labels
import warnings
warnings.filterwarnings('ignore') # Hide unnecessary warnings
# Step 2: Load the CIFAR-10 Dataset
# CIFAR-10 is a famous dataset with 60,000 small color images in 10 classes
print("=== Loading CIFAR-10 Dataset ===")
print("This dataset contains 60,000 32x32 color images in 10 categories")
print("Downloading dataset (this may take a minute on first run)...\n")
# Load the data - it's automatically split into training and testing sets
(X_train, y_train), (X_test, y_test) = cifar10.load_data()
# Define the 10 classes in CIFAR-10
class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer',
'dog', 'frog', 'horse', 'ship', 'truck']
# Step 3: Explore the dataset
print("=== Dataset Information ===")
print(f"Training images: {X_train.shape[0]}")
print(f"Testing images: {X_test.shape[0]}")
print(f"Image shape: {X_train.shape[1:]} (32x32 pixels, 3 color channels for RGB)")
print(f"Number of classes: {len(class_names)}")
print(f"Classes: {', '.join(class_names)}\n")
# Step 4: Visualize sample images from the dataset
print("=== Visualizing Sample Images ===")
fig, axes = plt.subplots(3, 5, figsize=(12, 8))
fig.suptitle('Sample Images from CIFAR-10 Dataset', fontsize=16)
for i in range(15):
# Select random image
idx = np.random.randint(0, len(X_train))
image = X_train[idx]
label = class_names[y_train[idx][0]]
# Plot the image
ax = axes[i // 5, i % 5]
ax.imshow(image)
ax.set_title(f'Class: {label}')
ax.axis('off')
plt.tight_layout()
plt.savefig('sample_images.png')
plt.show()
# Step 5: Preprocess the data
print("\n=== Preprocessing Data ===")
# Normalize pixel values to be between 0 and 1 (instead of 0-255)
# This helps the neural network learn better
X_train = X_train.astype('float32') / 255.0
X_test = X_test.astype('float32') / 255.0
print("✓ Normalized pixel values to range [0, 1]")
# Convert labels to categorical (one-hot encoding)
# Example: label 3 becomes [0,0,0,1,0,0,0,0,0,0]
y_train_categorical = to_categorical(y_train, 10)
y_test_categorical = to_categorical(y_test, 10)
print("✓ Converted labels to categorical format")
# Step 6: Build the Convolutional Neural Network (CNN)
print("\n=== Building CNN Model ===")
print("Creating a Convolutional Neural Network for image classification...")
# Create the model
model = keras.Sequential([
# First Convolutional Block
# Conv2D layer: Detects features like edges and shapes
layers.Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3)),
layers.BatchNormalization(), # Normalizes the outputs to improve training
layers.Conv2D(32, (3, 3), activation='relu'),
layers.BatchNormalization(),
layers.MaxPooling2D((2, 2)), # Reduces image size while keeping important features
layers.Dropout(0.25), # Prevents overfitting
# Second Convolutional Block
# These layers detect more complex features
layers.Conv2D(64, (3, 3), activation='relu'),
layers.BatchNormalization(),
layers.Conv2D(64, (3, 3), activation='relu'),
layers.BatchNormalization(),
layers.MaxPooling2D((2, 2)),
layers.Dropout(0.25),
# Third Convolutional Block
# These layers detect even more complex patterns
layers.Conv2D(128, (3, 3), activation='relu'),
layers.BatchNormalization(),
layers.MaxPooling2D((2, 2)),
layers.Dropout(0.25),
# Flatten and Dense Layers
layers.Flatten(), # Convert 2D features to 1D for classification
layers.Dense(128, activation='relu'), # Fully connected layer
layers.BatchNormalization(),
layers.Dropout(0.5),
layers.Dense(10, activation='softmax') # Output layer with 10 classes
])
# Step 7: Compile the model
print("\nCompiling the model...")
model.compile(
optimizer='adam', # Optimization algorithm
loss='categorical_crossentropy', # Loss function for multi-class classification
metrics=['accuracy'] # Track accuracy during training
)
# Display model architecture
print("\nModel Architecture:")
model.summary()
print(f"\nTotal parameters: {model.count_params():,}")
# Step 8: Set up data augmentation (optional but improves accuracy)
print("\n=== Setting up Data Augmentation ===")
print("Data augmentation creates variations of images to improve model generalization")
# Create data augmentation layer
data_augmentation = keras.Sequential([
layers.RandomFlip("horizontal"), # Randomly flip images horizontally
layers.RandomRotation(0.1), # Randomly rotate images
layers.RandomZoom(0.1), # Randomly zoom images
])
# Step 9: Train the model
print("\n=== Training the Model ===")
print("This will take 3-5 minutes depending on your computer...")
print("The model will learn to recognize patterns in the images\n")
# Use callbacks for better training
early_stopping = keras.callbacks.EarlyStopping(
monitor='val_loss',
patience=10,
restore_best_weights=True
)
reduce_lr = keras.callbacks.ReduceLROnPlateau(
monitor='val_loss',
factor=0.5,
patience=5,
min_lr=0.00001
)
# Train the model
history = model.fit(
X_train, y_train_categorical,
batch_size=64, # Number of images to process at once
epochs=30, # Number of times to go through the entire dataset
validation_data=(X_test, y_test_categorical), # Test data for validation
callbacks=[early_stopping, reduce_lr], # Training helpers
verbose=1 # Show progress bar
)
print("\n✓ Training completed!")
# Step 10: Visualize training history
print("\n=== Visualizing Training History ===")
fig, axes = plt.subplots(1, 2, figsize=(12, 4))
# Plot accuracy
axes[0].plot(history.history['accuracy'], label='Training Accuracy')
axes[0].plot(history.history['val_accuracy'], label='Validation Accuracy')
axes[0].set_title('Model Accuracy')
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Accuracy')
axes[0].legend()
axes[0].grid(True)
# Plot loss
axes[1].plot(history.history['loss'], label='Training Loss')
axes[1].plot(history.history['val_loss'], label='Validation Loss')
axes[1].set_title('Model Loss')
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('Loss')
axes[1].legend()
axes[1].grid(True)
plt.tight_layout()
plt.savefig('training_history_cv.png')
plt.show()
# Step 11: Evaluate the model
print("\n=== Model Evaluation ===")
test_loss, test_accuracy = model.evaluate(X_test, y_test_categorical, verbose=0)
print(f"Test Accuracy: {test_accuracy*100:.2f}%")
print(f"Test Loss: {test_loss:.4f}")
# Step 12: Make predictions and show results
print("\n=== Making Predictions on Test Images ===")
# Get predictions for test set
predictions = model.predict(X_test[:20], verbose=0)
# Visualize predictions
fig, axes = plt.subplots(4, 5, figsize=(15, 12))
fig.suptitle('Model Predictions on Test Images', fontsize=16)
for i in range(20):
# Get image and predictions
image = X_test[i]
true_label = class_names[y_test[i][0]]
predicted_label = class_names[np.argmax(predictions[i])]
confidence = np.max(predictions[i]) * 100
# Plot image
ax = axes[i // 5, i % 5]
ax.imshow(image)
# Color code: green for correct, red for incorrect
color = 'green' if true_label == predicted_label else 'red'
ax.set_title(f'True: {true_label}\nPred: {predicted_label}\nConf: {confidence:.1f}%',
color=color, fontsize=10)
ax.axis('off')
plt.tight_layout()
plt.savefig('predictions_cv.png')
plt.show()
# Step 13: Create confusion matrix
print("\n=== Creating Confusion Matrix ===")
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns
# Get predictions for entire test set
y_pred = model.predict(X_test, verbose=0)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true_classes = y_test.reshape(-1)
# Create confusion matrix
cm = confusion_matrix(y_true_classes, y_pred_classes)
# Plot confusion matrix
plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
xticklabels=class_names, yticklabels=class_names)
plt.title('Confusion Matrix')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.savefig('confusion_matrix.png')
plt.show()
# Step 14: Print classification report
print("\n=== Classification Report ===")
print(classification_report(y_true_classes, y_pred_classes, target_names=class_names))
# Step 15: Save the model
print("\n=== Saving the Model ===")
model.save('image_classifier_model.h5')
print("Model saved as 'image_classifier_model.h5'")
# Step 16: Test with a single image
print("\n=== Testing with a Single Image ===")
# Pick a random test image
test_idx = np.random.randint(0, len(X_test))
test_image = X_test[test_idx]
test_label = class_names[y_test[test_idx][0]]
# Make prediction
single_prediction = model.predict(test_image.reshape(1, 32, 32, 3), verbose=0)
predicted_class = class_names[np.argmax(single_prediction)]
confidence = np.max(single_prediction) * 100
# Display the image and prediction
plt.figure(figsize=(6, 6))
plt.imshow(test_image)
plt.title(f'Actual: {test_label}\nPredicted: {predicted_class}\nConfidence: {confidence:.2f}%')
plt.axis('off')
plt.savefig('single_prediction.png')
plt.show()
print(f"Actual class: {test_label}")
print(f"Predicted class: {predicted_class}")
print(f"Confidence: {confidence:.2f}%")
print("\n✅ Assignment 2 Complete! Your computer vision model is ready!")
print("Files created: image_classifier_model.h5, sample_images.png, training_history_cv.png,")
print(" predictions_cv.png, confusion_matrix.png, single_prediction.png")
print("\nYour model can now classify images into 10 different categories!") |