Spaces:

mFaizann137
/

bootcamp

No application file

App Files Files Community

bootcamp / image_classification.py

mFaizann137

Upload 4 files

d931164 verified 7 months ago

raw

history blame contribute delete

10.5 kB

	# Assignment 2: Image Classification using Deep Learning (Computer Vision)
	# This program classifies images into different categories using a Convolutional Neural Network (CNN)

	# Step 1: Import all necessary libraries
	import numpy as np # For numerical calculations
	import matplotlib.pyplot as plt # For showing images and graphs
	import tensorflow as tf # Main deep learning library
	from tensorflow import keras # High-level API for building models
	from tensorflow.keras import layers # For creating neural network layers
	from tensorflow.keras.datasets import cifar10 # Built-in image dataset
	from tensorflow.keras.utils import to_categorical # For preparing labels
	import warnings
	warnings.filterwarnings('ignore') # Hide unnecessary warnings

	# Step 2: Load the CIFAR-10 Dataset
	# CIFAR-10 is a famous dataset with 60,000 small color images in 10 classes
	print("=== Loading CIFAR-10 Dataset ===")
	print("This dataset contains 60,000 32x32 color images in 10 categories")
	print("Downloading dataset (this may take a minute on first run)...\n")

	# Load the data - it's automatically split into training and testing sets
	(X_train, y_train), (X_test, y_test) = cifar10.load_data()

	# Define the 10 classes in CIFAR-10
	class_names = ['airplane', 'automobile', 'bird', 'cat', 'deer',
	'dog', 'frog', 'horse', 'ship', 'truck']

	# Step 3: Explore the dataset
	print("=== Dataset Information ===")
	print(f"Training images: {X_train.shape[0]}")
	print(f"Testing images: {X_test.shape[0]}")
	print(f"Image shape: {X_train.shape[1:]} (32x32 pixels, 3 color channels for RGB)")
	print(f"Number of classes: {len(class_names)}")
	print(f"Classes: {', '.join(class_names)}\n")

	# Step 4: Visualize sample images from the dataset
	print("=== Visualizing Sample Images ===")
	fig, axes = plt.subplots(3, 5, figsize=(12, 8))
	fig.suptitle('Sample Images from CIFAR-10 Dataset', fontsize=16)

	for i in range(15):
	# Select random image
	idx = np.random.randint(0, len(X_train))
	image = X_train[idx]
	label = class_names[y_train[idx][0]]

	# Plot the image
	ax = axes[i // 5, i % 5]
	ax.imshow(image)
	ax.set_title(f'Class: {label}')
	ax.axis('off')

	plt.tight_layout()
	plt.savefig('sample_images.png')
	plt.show()

	# Step 5: Preprocess the data
	print("\n=== Preprocessing Data ===")

	# Normalize pixel values to be between 0 and 1 (instead of 0-255)
	# This helps the neural network learn better
	X_train = X_train.astype('float32') / 255.0
	X_test = X_test.astype('float32') / 255.0
	print("✓ Normalized pixel values to range [0, 1]")

	# Convert labels to categorical (one-hot encoding)
	# Example: label 3 becomes [0,0,0,1,0,0,0,0,0,0]
	y_train_categorical = to_categorical(y_train, 10)
	y_test_categorical = to_categorical(y_test, 10)
	print("✓ Converted labels to categorical format")

	# Step 6: Build the Convolutional Neural Network (CNN)
	print("\n=== Building CNN Model ===")
	print("Creating a Convolutional Neural Network for image classification...")

	# Create the model
	model = keras.Sequential([
	# First Convolutional Block
	# Conv2D layer: Detects features like edges and shapes
	layers.Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3)),
	layers.BatchNormalization(), # Normalizes the outputs to improve training
	layers.Conv2D(32, (3, 3), activation='relu'),
	layers.BatchNormalization(),
	layers.MaxPooling2D((2, 2)), # Reduces image size while keeping important features
	layers.Dropout(0.25), # Prevents overfitting

	# Second Convolutional Block
	# These layers detect more complex features
	layers.Conv2D(64, (3, 3), activation='relu'),
	layers.BatchNormalization(),
	layers.Conv2D(64, (3, 3), activation='relu'),
	layers.BatchNormalization(),
	layers.MaxPooling2D((2, 2)),
	layers.Dropout(0.25),

	# Third Convolutional Block
	# These layers detect even more complex patterns
	layers.Conv2D(128, (3, 3), activation='relu'),
	layers.BatchNormalization(),
	layers.MaxPooling2D((2, 2)),
	layers.Dropout(0.25),

	# Flatten and Dense Layers
	layers.Flatten(), # Convert 2D features to 1D for classification
	layers.Dense(128, activation='relu'), # Fully connected layer
	layers.BatchNormalization(),
	layers.Dropout(0.5),
	layers.Dense(10, activation='softmax') # Output layer with 10 classes
	])

	# Step 7: Compile the model
	print("\nCompiling the model...")
	model.compile(
	optimizer='adam', # Optimization algorithm
	loss='categorical_crossentropy', # Loss function for multi-class classification
	metrics=['accuracy'] # Track accuracy during training
	)

	# Display model architecture
	print("\nModel Architecture:")
	model.summary()
	print(f"\nTotal parameters: {model.count_params():,}")

	# Step 8: Set up data augmentation (optional but improves accuracy)
	print("\n=== Setting up Data Augmentation ===")
	print("Data augmentation creates variations of images to improve model generalization")

	# Create data augmentation layer
	data_augmentation = keras.Sequential([
	layers.RandomFlip("horizontal"), # Randomly flip images horizontally
	layers.RandomRotation(0.1), # Randomly rotate images
	layers.RandomZoom(0.1), # Randomly zoom images
	])

	# Step 9: Train the model
	print("\n=== Training the Model ===")
	print("This will take 3-5 minutes depending on your computer...")
	print("The model will learn to recognize patterns in the images\n")

	# Use callbacks for better training
	early_stopping = keras.callbacks.EarlyStopping(
	monitor='val_loss',
	patience=10,
	restore_best_weights=True
	)

	reduce_lr = keras.callbacks.ReduceLROnPlateau(
	monitor='val_loss',
	factor=0.5,
	patience=5,
	min_lr=0.00001
	)

	# Train the model
	history = model.fit(
	X_train, y_train_categorical,
	batch_size=64, # Number of images to process at once
	epochs=30, # Number of times to go through the entire dataset
	validation_data=(X_test, y_test_categorical), # Test data for validation
	callbacks=[early_stopping, reduce_lr], # Training helpers
	verbose=1 # Show progress bar
	)

	print("\n✓ Training completed!")

	# Step 10: Visualize training history
	print("\n=== Visualizing Training History ===")

	fig, axes = plt.subplots(1, 2, figsize=(12, 4))

	# Plot accuracy
	axes[0].plot(history.history['accuracy'], label='Training Accuracy')
	axes[0].plot(history.history['val_accuracy'], label='Validation Accuracy')
	axes[0].set_title('Model Accuracy')
	axes[0].set_xlabel('Epoch')
	axes[0].set_ylabel('Accuracy')
	axes[0].legend()
	axes[0].grid(True)

	# Plot loss
	axes[1].plot(history.history['loss'], label='Training Loss')
	axes[1].plot(history.history['val_loss'], label='Validation Loss')
	axes[1].set_title('Model Loss')
	axes[1].set_xlabel('Epoch')
	axes[1].set_ylabel('Loss')
	axes[1].legend()
	axes[1].grid(True)

	plt.tight_layout()
	plt.savefig('training_history_cv.png')
	plt.show()

	# Step 11: Evaluate the model
	print("\n=== Model Evaluation ===")
	test_loss, test_accuracy = model.evaluate(X_test, y_test_categorical, verbose=0)
	print(f"Test Accuracy: {test_accuracy*100:.2f}%")
	print(f"Test Loss: {test_loss:.4f}")

	# Step 12: Make predictions and show results
	print("\n=== Making Predictions on Test Images ===")

	# Get predictions for test set
	predictions = model.predict(X_test[:20], verbose=0)

	# Visualize predictions
	fig, axes = plt.subplots(4, 5, figsize=(15, 12))
	fig.suptitle('Model Predictions on Test Images', fontsize=16)

	for i in range(20):
	# Get image and predictions
	image = X_test[i]
	true_label = class_names[y_test[i][0]]
	predicted_label = class_names[np.argmax(predictions[i])]
	confidence = np.max(predictions[i]) * 100

	# Plot image
	ax = axes[i // 5, i % 5]
	ax.imshow(image)

	# Color code: green for correct, red for incorrect
	color = 'green' if true_label == predicted_label else 'red'
	ax.set_title(f'True: {true_label}\nPred: {predicted_label}\nConf: {confidence:.1f}%',
	color=color, fontsize=10)
	ax.axis('off')

	plt.tight_layout()
	plt.savefig('predictions_cv.png')
	plt.show()

	# Step 13: Create confusion matrix
	print("\n=== Creating Confusion Matrix ===")
	from sklearn.metrics import confusion_matrix, classification_report
	import seaborn as sns

	# Get predictions for entire test set
	y_pred = model.predict(X_test, verbose=0)
	y_pred_classes = np.argmax(y_pred, axis=1)
	y_true_classes = y_test.reshape(-1)

	# Create confusion matrix
	cm = confusion_matrix(y_true_classes, y_pred_classes)

	# Plot confusion matrix
	plt.figure(figsize=(10, 8))
	sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
	xticklabels=class_names, yticklabels=class_names)
	plt.title('Confusion Matrix')
	plt.xlabel('Predicted Label')
	plt.ylabel('True Label')
	plt.savefig('confusion_matrix.png')
	plt.show()

	# Step 14: Print classification report
	print("\n=== Classification Report ===")
	print(classification_report(y_true_classes, y_pred_classes, target_names=class_names))

	# Step 15: Save the model
	print("\n=== Saving the Model ===")
	model.save('image_classifier_model.h5')
	print("Model saved as 'image_classifier_model.h5'")

	# Step 16: Test with a single image
	print("\n=== Testing with a Single Image ===")

	# Pick a random test image
	test_idx = np.random.randint(0, len(X_test))
	test_image = X_test[test_idx]
	test_label = class_names[y_test[test_idx][0]]

	# Make prediction
	single_prediction = model.predict(test_image.reshape(1, 32, 32, 3), verbose=0)
	predicted_class = class_names[np.argmax(single_prediction)]
	confidence = np.max(single_prediction) * 100

	# Display the image and prediction
	plt.figure(figsize=(6, 6))
	plt.imshow(test_image)
	plt.title(f'Actual: {test_label}\nPredicted: {predicted_class}\nConfidence: {confidence:.2f}%')
	plt.axis('off')
	plt.savefig('single_prediction.png')
	plt.show()

	print(f"Actual class: {test_label}")
	print(f"Predicted class: {predicted_class}")
	print(f"Confidence: {confidence:.2f}%")

	print("\n✅ Assignment 2 Complete! Your computer vision model is ready!")
	print("Files created: image_classifier_model.h5, sample_images.png, training_history_cv.png,")
	print(" predictions_cv.png, confusion_matrix.png, single_prediction.png")
	print("\nYour model can now classify images into 10 different categories!")