Spaces:
No application file
No application file
| # Assignment 2: Image Classification using Deep Learning (Computer Vision) | |
| # This program classifies images into different categories using a Convolutional Neural Network (CNN) | |
| # Step 1: Import all necessary libraries | |
| import numpy as np # For numerical calculations | |
| import matplotlib.pyplot as plt # For showing images and graphs | |
| import tensorflow as tf # Main deep learning library | |
| from tensorflow import keras # High-level API for building models | |
| from tensorflow.keras import layers # For creating neural network layers | |
| from tensorflow.keras.datasets import cifar10 # Built-in image dataset | |
| from tensorflow.keras.utils import to_categorical # For preparing labels | |
| import warnings | |
| warnings.filterwarnings('ignore') # Hide unnecessary warnings | |
| # Step 2: Load the CIFAR-10 Dataset | |
| # CIFAR-10 is a famous dataset with 60,000 small color images in 10 classes | |
| print("=== Loading CIFAR-10 Dataset ===") | |
| print("This dataset contains 60,000 32x32 color images in 10 categories") | |
| print("Downloading dataset (this may take a minute on first run)...\n") | |
| # Load the data - it's automatically split into training and testing sets | |
| (X_train, y_train), (X_test, y_test) = cifar10.load_data() | |
| # Define the 10 classes in CIFAR-10 | |
| class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer', | |
| 'dog', 'frog', 'horse', 'ship', 'truck'] | |
| # Step 3: Explore the dataset | |
| print("=== Dataset Information ===") | |
| print(f"Training images: {X_train.shape[0]}") | |
| print(f"Testing images: {X_test.shape[0]}") | |
| print(f"Image shape: {X_train.shape[1:]} (32x32 pixels, 3 color channels for RGB)") | |
| print(f"Number of classes: {len(class_names)}") | |
| print(f"Classes: {', '.join(class_names)}\n") | |
| # Step 4: Visualize sample images from the dataset | |
| print("=== Visualizing Sample Images ===") | |
| fig, axes = plt.subplots(3, 5, figsize=(12, 8)) | |
| fig.suptitle('Sample Images from CIFAR-10 Dataset', fontsize=16) | |
| for i in range(15): | |
| # Select random image | |
| idx = np.random.randint(0, len(X_train)) | |
| image = X_train[idx] | |
| label = class_names[y_train[idx][0]] | |
| # Plot the image | |
| ax = axes[i // 5, i % 5] | |
| ax.imshow(image) | |
| ax.set_title(f'Class: {label}') | |
| ax.axis('off') | |
| plt.tight_layout() | |
| plt.savefig('sample_images.png') | |
| plt.show() | |
| # Step 5: Preprocess the data | |
| print("\n=== Preprocessing Data ===") | |
| # Normalize pixel values to be between 0 and 1 (instead of 0-255) | |
| # This helps the neural network learn better | |
| X_train = X_train.astype('float32') / 255.0 | |
| X_test = X_test.astype('float32') / 255.0 | |
| print("✓ Normalized pixel values to range [0, 1]") | |
| # Convert labels to categorical (one-hot encoding) | |
| # Example: label 3 becomes [0,0,0,1,0,0,0,0,0,0] | |
| y_train_categorical = to_categorical(y_train, 10) | |
| y_test_categorical = to_categorical(y_test, 10) | |
| print("✓ Converted labels to categorical format") | |
| # Step 6: Build the Convolutional Neural Network (CNN) | |
| print("\n=== Building CNN Model ===") | |
| print("Creating a Convolutional Neural Network for image classification...") | |
| # Create the model | |
| model = keras.Sequential([ | |
| # First Convolutional Block | |
| # Conv2D layer: Detects features like edges and shapes | |
| layers.Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3)), | |
| layers.BatchNormalization(), # Normalizes the outputs to improve training | |
| layers.Conv2D(32, (3, 3), activation='relu'), | |
| layers.BatchNormalization(), | |
| layers.MaxPooling2D((2, 2)), # Reduces image size while keeping important features | |
| layers.Dropout(0.25), # Prevents overfitting | |
| # Second Convolutional Block | |
| # These layers detect more complex features | |
| layers.Conv2D(64, (3, 3), activation='relu'), | |
| layers.BatchNormalization(), | |
| layers.Conv2D(64, (3, 3), activation='relu'), | |
| layers.BatchNormalization(), | |
| layers.MaxPooling2D((2, 2)), | |
| layers.Dropout(0.25), | |
| # Third Convolutional Block | |
| # These layers detect even more complex patterns | |
| layers.Conv2D(128, (3, 3), activation='relu'), | |
| layers.BatchNormalization(), | |
| layers.MaxPooling2D((2, 2)), | |
| layers.Dropout(0.25), | |
| # Flatten and Dense Layers | |
| layers.Flatten(), # Convert 2D features to 1D for classification | |
| layers.Dense(128, activation='relu'), # Fully connected layer | |
| layers.BatchNormalization(), | |
| layers.Dropout(0.5), | |
| layers.Dense(10, activation='softmax') # Output layer with 10 classes | |
| ]) | |
| # Step 7: Compile the model | |
| print("\nCompiling the model...") | |
| model.compile( | |
| optimizer='adam', # Optimization algorithm | |
| loss='categorical_crossentropy', # Loss function for multi-class classification | |
| metrics=['accuracy'] # Track accuracy during training | |
| ) | |
| # Display model architecture | |
| print("\nModel Architecture:") | |
| model.summary() | |
| print(f"\nTotal parameters: {model.count_params():,}") | |
| # Step 8: Set up data augmentation (optional but improves accuracy) | |
| print("\n=== Setting up Data Augmentation ===") | |
| print("Data augmentation creates variations of images to improve model generalization") | |
| # Create data augmentation layer | |
| data_augmentation = keras.Sequential([ | |
| layers.RandomFlip("horizontal"), # Randomly flip images horizontally | |
| layers.RandomRotation(0.1), # Randomly rotate images | |
| layers.RandomZoom(0.1), # Randomly zoom images | |
| ]) | |
| # Step 9: Train the model | |
| print("\n=== Training the Model ===") | |
| print("This will take 3-5 minutes depending on your computer...") | |
| print("The model will learn to recognize patterns in the images\n") | |
| # Use callbacks for better training | |
| early_stopping = keras.callbacks.EarlyStopping( | |
| monitor='val_loss', | |
| patience=10, | |
| restore_best_weights=True | |
| ) | |
| reduce_lr = keras.callbacks.ReduceLROnPlateau( | |
| monitor='val_loss', | |
| factor=0.5, | |
| patience=5, | |
| min_lr=0.00001 | |
| ) | |
| # Train the model | |
| history = model.fit( | |
| X_train, y_train_categorical, | |
| batch_size=64, # Number of images to process at once | |
| epochs=30, # Number of times to go through the entire dataset | |
| validation_data=(X_test, y_test_categorical), # Test data for validation | |
| callbacks=[early_stopping, reduce_lr], # Training helpers | |
| verbose=1 # Show progress bar | |
| ) | |
| print("\n✓ Training completed!") | |
| # Step 10: Visualize training history | |
| print("\n=== Visualizing Training History ===") | |
| fig, axes = plt.subplots(1, 2, figsize=(12, 4)) | |
| # Plot accuracy | |
| axes[0].plot(history.history['accuracy'], label='Training Accuracy') | |
| axes[0].plot(history.history['val_accuracy'], label='Validation Accuracy') | |
| axes[0].set_title('Model Accuracy') | |
| axes[0].set_xlabel('Epoch') | |
| axes[0].set_ylabel('Accuracy') | |
| axes[0].legend() | |
| axes[0].grid(True) | |
| # Plot loss | |
| axes[1].plot(history.history['loss'], label='Training Loss') | |
| axes[1].plot(history.history['val_loss'], label='Validation Loss') | |
| axes[1].set_title('Model Loss') | |
| axes[1].set_xlabel('Epoch') | |
| axes[1].set_ylabel('Loss') | |
| axes[1].legend() | |
| axes[1].grid(True) | |
| plt.tight_layout() | |
| plt.savefig('training_history_cv.png') | |
| plt.show() | |
| # Step 11: Evaluate the model | |
| print("\n=== Model Evaluation ===") | |
| test_loss, test_accuracy = model.evaluate(X_test, y_test_categorical, verbose=0) | |
| print(f"Test Accuracy: {test_accuracy*100:.2f}%") | |
| print(f"Test Loss: {test_loss:.4f}") | |
| # Step 12: Make predictions and show results | |
| print("\n=== Making Predictions on Test Images ===") | |
| # Get predictions for test set | |
| predictions = model.predict(X_test[:20], verbose=0) | |
| # Visualize predictions | |
| fig, axes = plt.subplots(4, 5, figsize=(15, 12)) | |
| fig.suptitle('Model Predictions on Test Images', fontsize=16) | |
| for i in range(20): | |
| # Get image and predictions | |
| image = X_test[i] | |
| true_label = class_names[y_test[i][0]] | |
| predicted_label = class_names[np.argmax(predictions[i])] | |
| confidence = np.max(predictions[i]) * 100 | |
| # Plot image | |
| ax = axes[i // 5, i % 5] | |
| ax.imshow(image) | |
| # Color code: green for correct, red for incorrect | |
| color = 'green' if true_label == predicted_label else 'red' | |
| ax.set_title(f'True: {true_label}\nPred: {predicted_label}\nConf: {confidence:.1f}%', | |
| color=color, fontsize=10) | |
| ax.axis('off') | |
| plt.tight_layout() | |
| plt.savefig('predictions_cv.png') | |
| plt.show() | |
| # Step 13: Create confusion matrix | |
| print("\n=== Creating Confusion Matrix ===") | |
| from sklearn.metrics import confusion_matrix, classification_report | |
| import seaborn as sns | |
| # Get predictions for entire test set | |
| y_pred = model.predict(X_test, verbose=0) | |
| y_pred_classes = np.argmax(y_pred, axis=1) | |
| y_true_classes = y_test.reshape(-1) | |
| # Create confusion matrix | |
| cm = confusion_matrix(y_true_classes, y_pred_classes) | |
| # Plot confusion matrix | |
| plt.figure(figsize=(10, 8)) | |
| sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', | |
| xticklabels=class_names, yticklabels=class_names) | |
| plt.title('Confusion Matrix') | |
| plt.xlabel('Predicted Label') | |
| plt.ylabel('True Label') | |
| plt.savefig('confusion_matrix.png') | |
| plt.show() | |
| # Step 14: Print classification report | |
| print("\n=== Classification Report ===") | |
| print(classification_report(y_true_classes, y_pred_classes, target_names=class_names)) | |
| # Step 15: Save the model | |
| print("\n=== Saving the Model ===") | |
| model.save('image_classifier_model.h5') | |
| print("Model saved as 'image_classifier_model.h5'") | |
| # Step 16: Test with a single image | |
| print("\n=== Testing with a Single Image ===") | |
| # Pick a random test image | |
| test_idx = np.random.randint(0, len(X_test)) | |
| test_image = X_test[test_idx] | |
| test_label = class_names[y_test[test_idx][0]] | |
| # Make prediction | |
| single_prediction = model.predict(test_image.reshape(1, 32, 32, 3), verbose=0) | |
| predicted_class = class_names[np.argmax(single_prediction)] | |
| confidence = np.max(single_prediction) * 100 | |
| # Display the image and prediction | |
| plt.figure(figsize=(6, 6)) | |
| plt.imshow(test_image) | |
| plt.title(f'Actual: {test_label}\nPredicted: {predicted_class}\nConfidence: {confidence:.2f}%') | |
| plt.axis('off') | |
| plt.savefig('single_prediction.png') | |
| plt.show() | |
| print(f"Actual class: {test_label}") | |
| print(f"Predicted class: {predicted_class}") | |
| print(f"Confidence: {confidence:.2f}%") | |
| print("\n✅ Assignment 2 Complete! Your computer vision model is ready!") | |
| print("Files created: image_classifier_model.h5, sample_images.png, training_history_cv.png,") | |
| print(" predictions_cv.png, confusion_matrix.png, single_prediction.png") | |
| print("\nYour model can now classify images into 10 different categories!") |