Spaces:
Runtime error
Runtime error
| # -*- coding: utf-8 -*- | |
| """Yet another copy of Final CNN Pose Notebook.ipynb | |
| Automatically generated by Colaboratory. | |
| Original file is located at | |
| https://colab.research.google.com/drive/1IdEBDyEyKQdRRT9R-GkfrJINmHdf3_pF | |
| """ | |
| # from google.colab import drive | |
| # drive.mount('/content/drive') | |
| # pip install gradio | |
| import gradio as gr | |
| import torch | |
| from torch.utils.data import DataLoader, Dataset, random_split | |
| from torchvision import transforms, utils | |
| import torch.nn as nn | |
| import torch.optim as optim | |
| import torch.nn.functional as F | |
| from PIL import Image | |
| import os | |
| import numpy as np | |
| import json | |
| import matplotlib.pyplot as plt | |
| from torch.utils.data.dataloader import default_collate | |
| # Define the dataset class | |
| class HumanPoseDataset(Dataset): | |
| def __init__(self, annotations, img_dir, transform=None): | |
| self.annotations = annotations | |
| self.img_dir = img_dir | |
| self.transform = transform | |
| def __len__(self): | |
| return len(self.annotations) | |
| def __getitem__(self, idx): | |
| img_key = list(self.annotations.keys())[idx] | |
| annotation_list = self.annotations[img_key] | |
| # Skip the image if there are no annotations | |
| if not annotation_list: | |
| return None | |
| # Use the first annotation for simplicity | |
| annotation = annotation_list[0] | |
| if not annotation['landmarks']: # Check if landmarks are not empty | |
| return None | |
| img_name = os.path.join(self.img_dir, annotation['file']) | |
| image = Image.open(img_name).convert('RGB') | |
| original_image_size = image.size | |
| keypoints = annotation['landmarks'] | |
| keypoints_array = np.array([[k['x'], k['y'], k['z'], k['visibility']] for k in keypoints]) | |
| if self.transform: | |
| image = self.transform(image) | |
| sample = {'image': image, 'keypoints': keypoints_array, 'original_image_size': original_image_size} | |
| print(sample) | |
| return sample | |
| # Custom collate function to filter out None values | |
| def custom_collate(batch): | |
| batch = [b for b in batch if b is not None] | |
| return default_collate(batch) | |
| # Load the annotations JSON into a dictionary | |
| annotations_path = '/content/drive/MyDrive/annotations_CNN (3).json' # Update this path | |
| with open(annotations_path) as f: | |
| annotations_data = json.load(f) | |
| print("Annotations data loaded. Number of images:", len(annotations_data)) | |
| x = annotations_data.keys() | |
| """# Do data preprocessing. For example, resize to 32 by 32 and normalization. | |
| """ | |
| img_dir = '/content/drive/MyDrive/CNN_Dataset' | |
| # Define the transformations with resizing and augmentation | |
| transform = transforms.Compose([ | |
| transforms.Resize((32, 32)), # Resize the images to 256x256 | |
| transforms.ToTensor(), | |
| transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), | |
| transforms.RandomHorizontalFlip(), # Example augmentation | |
| # Add more augmentations if needed | |
| ]) | |
| test_transform=transforms.Compose([ | |
| transforms.ToTensor(), | |
| transforms.Resize((32,32)), | |
| ]) | |
| # Create the dataset | |
| human_pose_dataset = HumanPoseDataset(annotations_data, img_dir, transform=transform) | |
| testing_pose_dataset = HumanPoseDataset(annotations_data, img_dir, transform=test_transform) | |
| print("Dataset created. Length of dataset:", len(human_pose_dataset)) | |
| sorted(x) == sorted(os.listdir('/content/drive/MyDrive/CNN_Dataset')) | |
| """#2. Load parameters of a pretrained model. If a pretrained model for the entire network is not available, then load parameters for the backbone network/feature extraction network/encoder. | |
| Pose net model is not available so we will be using an architecture similar to PoseNet, a human pose detection CNN architecture. In the above architecture, we are given a brief description about the PoseNet Architecture. We will be using the Regression Network to find the keypoint coordinates. | |
| """ | |
| import torch | |
| import torch.nn as nn | |
| import torch.optim as optim | |
| import torch.nn.functional as F | |
| class SimpleCNN(nn.Module): | |
| def __init__(self): | |
| super(SimpleCNN, self).__init__() | |
| self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1) | |
| self.pool = nn.MaxPool2d(2, 2) | |
| self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1) | |
| self.conv3 = nn.Conv2d(32, 64, kernel_size=3, padding=1) | |
| self.conv4 = nn.Conv2d(64, 128, kernel_size=3, padding=1) | |
| # Assuming the input image size is 256x256, after four pooling layers the image size will be 16x16 | |
| self.fc1 = nn.Linear(2 * 16 * 16, 1000) | |
| self.fc2 = nn.Linear(1000, 33 * 4) # Assuming 33 keypoints | |
| def forward(self, x): | |
| x = self.pool(F.relu(self.conv1(x))) | |
| x = self.pool(F.relu(self.conv2(x))) | |
| x = self.pool(F.relu(self.conv3(x))) | |
| x = self.pool(F.relu(self.conv4(x))) | |
| x = torch.flatten(x, 1) # Flatten the tensor for the fully connected layer | |
| x = F.relu(self.fc1(x)) | |
| x = self.fc2(x) | |
| return x | |
| # Initialize the model | |
| model = SimpleCNN() | |
| print("Model initialized.") | |
| print(model) # Print the model architecture | |
| #!pip install mediapipe | |
| """#3 Replace the output layer if necessary and finetune the network for your dataset. Use validation dataset to pick a good learning rate and momentum. | |
| 1. Training for a very less samples | |
| """ | |
| # Split the dataset into training, validation, and test sets | |
| train_size = int(0.04* len(human_pose_dataset)) | |
| validation_size = int(0.1 * len(human_pose_dataset)) | |
| test_size = len(human_pose_dataset) - train_size - validation_size | |
| train_dataset, remaining_dataset = random_split(human_pose_dataset, [train_size, validation_size + test_size]) | |
| validation_dataset, test_dataset = random_split(remaining_dataset, [validation_size, test_size]) | |
| test_pose_dataset , remaining_data = random_split(testing_pose_dataset,[6,194]) | |
| # Define the batch size | |
| batch_size = 8 | |
| # Create data loaders for each set with the custom collate function | |
| train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=custom_collate) | |
| validation_loader = DataLoader(validation_dataset, batch_size=batch_size, shuffle=False, collate_fn=custom_collate) | |
| test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, collate_fn=custom_collate) | |
| test_image_loader = DataLoader(test_pose_dataset, batch_size=batch_size, shuffle=False, collate_fn=custom_collate) | |
| print("Data loaders created.") | |
| len(train_dataset) | |
| # Loss function | |
| criterion = nn.MSELoss() | |
| # Optimizer | |
| optimizer = optim.Adam(model.parameters(), lr=1e-4) | |
| # Convert the model parameters to float | |
| model = model.float() | |
| # Ensure that the tensors are also floats | |
| sample_batch = next(iter(train_loader)) | |
| import mediapipe as mp | |
| images = sample_batch['image'].float() # Convert images to float | |
| keypoints = sample_batch['keypoints'].view(-1, 132).float() # Convert keypoints to float and reshape | |
| # Now proceed with the optimization loop | |
| loss=0 | |
| for epochs in range(10): | |
| optimizer.zero_grad() | |
| outputs = model(images) | |
| loss = criterion(outputs, keypoints) | |
| loss.backward() | |
| optimizer.step() | |
| print("Optimization step completed.") | |
| print(loss.item()) | |
| loss=loss.item() | |
| import torch | |
| def calculate_accuracy(outputs, targets): | |
| accuracy = torch.mean(torch.abs(outputs - targets)) | |
| return accuracy | |
| print(outputs.shape) | |
| # Calculate accuracy | |
| with torch.no_grad(): | |
| accuracy = calculate_accuracy(outputs, keypoints) | |
| accuracy= 1- accuracy/132 | |
| print("Loss:", loss) | |
| print("Accuracy:", accuracy.item()*100, '%') | |
| """As you can see, the accuracy is very close to 100% (Overfitting) | |
| Now taking 80-10-10 split on the dataset, we create new train, val and test loaders | |
| """ | |
| # Split the dataset into training, validation, and test sets | |
| train_size = int(0.8* len(human_pose_dataset)) | |
| validation_size = int(0.1 * len(human_pose_dataset)) | |
| test_size = len(human_pose_dataset) - train_size - validation_size | |
| train_dataset, remaining_dataset = random_split(human_pose_dataset, [train_size, validation_size + test_size]) | |
| validation_dataset, test_dataset = random_split(remaining_dataset, [validation_size, test_size]) | |
| test_pose_dataset , remaining_data = random_split(testing_pose_dataset,[6,194]) | |
| # Define the batch size | |
| batch_size = 8 | |
| # Create data loaders for each set with the custom collate function | |
| train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=custom_collate) | |
| validation_loader = DataLoader(validation_dataset, batch_size=batch_size, shuffle=False, collate_fn=custom_collate) | |
| test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, collate_fn=custom_collate) | |
| test_image_loader = DataLoader(test_pose_dataset, batch_size=batch_size, shuffle=False, collate_fn=custom_collate) | |
| print("Data loaders created.") | |
| len(test_dataset) | |
| import torch | |
| import torch.nn as nn | |
| import torch.optim as optim | |
| from torch.utils.data import DataLoader, random_split | |
| from torchvision import transforms | |
| import torch.nn.functional as F | |
| class SimpleCNN(nn.Module): | |
| # Define hyperparameters to search over | |
| learning_rates = [0.001, 0.01, 0.1] | |
| momentums = [0.9, 0.95, 0.99] | |
| weight_decays = [0.0001, 0.001, 0.01] | |
| best_loss = float('inf') | |
| best_lr, best_momentum, best_weight_decay = None, None, None | |
| # Grid search over hyperparameters | |
| for lr in learning_rates: | |
| for momentum in momentums: | |
| for weight_decay in weight_decays: | |
| # Initialize the model with the current set of hyperparameters | |
| model = SimpleCNN() | |
| # Define loss function and optimizer | |
| criterion = nn.MSELoss() | |
| optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum, weight_decay=weight_decay) | |
| # Ensure that the tensors are also floats | |
| sample_batch = next(iter(train_loader)) | |
| import mediapipe as mp | |
| images = sample_batch['image'].float() # Convert images to float | |
| keypoints = sample_batch['keypoints'].view(-1, 132).float() # Convert keypoints to float and reshape | |
| # Now proceed with the optimization loop | |
| optimizer.zero_grad() | |
| outputs = model(images) | |
| print("Output shape after forward pass:", outputs.shape) | |
| outputs = model(images) | |
| loss = criterion(outputs, keypoints) | |
| print("Initial loss:", loss.item()) | |
| loss.backward() | |
| optimizer.step() | |
| print("Optimization step completed.") | |
| total_loss = 0 | |
| avg_loss = total_loss / len(train_loader) | |
| model.train() | |
| # Check if the current set of hyperparameters resulted in a better performance | |
| if avg_loss < best_loss: | |
| best_loss = avg_loss | |
| best_lr, best_momentum, best_weight_decay = lr, momentum, weight_decay | |
| # After the grid search, choose the hyperparameters that performed the best | |
| print("Best Hyperparameters - lr: {}, momentum: {}, weight_decay: {}".format( | |
| best_lr, best_momentum, best_weight_decay)) | |
| # Train the final model with the selected hyperparameters on the full dataset | |
| model = SimpleCNN() | |
| optimizer = optim.SGD(model.parameters(), lr=best_lr, momentum=best_momentum, weight_decay=best_weight_decay) | |
| """#3. Plotting Validation and Test Loss | |
| The best parameters are: | |
| * Learning Rate: 0.001 | |
| * Momentum: 0.9 | |
| * Weight Decay: 0.0001 | |
| """ | |
| import torch | |
| import matplotlib.pyplot as plt | |
| # Assuming you have already defined your model, optimizer, and criterion | |
| # Ensure that the tensors are also floats for training | |
| sample_batch = next(iter(train_loader)) | |
| images = sample_batch['image'].float() | |
| keypoints = sample_batch['keypoints'].view(-1, 132).float() | |
| # Ensure that the tensors are also floats for validation | |
| validation_sample_batch = next(iter(validation_loader)) | |
| validation_images = validation_sample_batch['image'].float() | |
| validation_keypoints = validation_sample_batch['keypoints'].view(-1, 132).float() | |
| # Now proceed with the optimization loop | |
| optimizer = torch.optim.Adam(model.parameters(), lr=0.001) | |
| criterion = torch.nn.MSELoss() | |
| train_loss = [] | |
| val_loss = [] | |
| for epoch in range(15): | |
| model.train() | |
| optimizer.zero_grad() | |
| outputs = model(images) | |
| current_loss = criterion(outputs, keypoints) | |
| current_loss.backward() | |
| optimizer.step() | |
| model.eval() # Switch to evaluation mode for validation | |
| with torch.no_grad(): | |
| # Calculate validation loss | |
| val_outputs = model(validation_images) | |
| val_current_loss = criterion(val_outputs, validation_keypoints) | |
| print(f"Epoch [{epoch + 1}/100], Loss: {current_loss.item():.4f}, Val Loss: {val_current_loss.item():.4f}") | |
| train_loss.append(current_loss.item()) | |
| val_loss.append(val_current_loss.item()) | |
| plotting_val_loss = val_loss | |
| plotting_train_loss = train_loss | |
| import matplotlib.pyplot as plt | |
| # Plotting | |
| plt.figure(figsize=(8, 4)) | |
| plt.plot( plotting_train_loss, marker='o', linestyle='-', color='b',label='train loss') | |
| plt.plot( plotting_val_loss, marker='o', linestyle= '-', color='r', label='val loss') | |
| plt.title('Loss vs Epochs') | |
| plt.xlabel('Epochs') | |
| plt.ylabel('Loss') | |
| plt.grid(True) | |
| plt.legend() | |
| # Show the legend in a small box | |
| plt.legend(loc='upper right') | |
| plt.show() | |
| """#4. Final Run on Test Dataset""" | |
| # Ensure that the tensors are also floats | |
| sample_batch = next(iter(test_loader)) | |
| import mediapipe as mp | |
| test_images = sample_batch['image'].float() # Convert images to float | |
| test_keypoints = sample_batch['keypoints'].view(-1, 132).float() # Convert keypoints to float and reshape | |
| model.eval() | |
| optimizer.zero_grad() | |
| outputs = model(test_images) | |
| print("Testing Done") | |
| test_image_tensor = test_images[0] | |
| test_actual_plot = test_keypoints.reshape(len(test_images),33,4)[0] | |
| test_predict_plot = outputs.reshape(len(test_images),33,4)[0] | |
| test_predict_plot.shape | |
| """# 4. Finally, evaluate on the test dataset.""" | |
| import cv2 | |
| import matplotlib.pyplot as plt | |
| import numpy as np | |
| def plot_human_pose(keypoints): | |
| # Create a figure and axis | |
| fig, ax = plt.subplots() | |
| # Plot keypoints | |
| for i in range(len(keypoints)): | |
| x, y, _, _ = keypoints[i] | |
| ax.scatter(x, -y, color='blue') # Invert y-axis | |
| # Connect body parts | |
| connect_lines = [(0, 2), (2, 7), # Left eye | |
| (0, 5), (5, 8), # Right eye | |
| (9,10), # Left side | |
| (11, 12), (12, 24), (11, 23), # Right side | |
| (24,23), (24,26), (23,25), # Connect ears and wrists | |
| (26, 28), (25, 27), | |
| (28, 30), (28, 32), (30,32),# Connect left and right pinky fingers | |
| (27, 29), (27, 31), (31,29), # Connect left and right index fingers | |
| (12, 14), (11, 13), # Connect left and right thumbs | |
| (14, 16), (13, 15), # Connect left and right hips | |
| (16, 18), (18, 20), (16,20), (16,22), # Connect left and right knees | |
| (15, 17), (15, 19), # Connect left and right ankles | |
| (17, 19), (15, 21)] # Connect left and right heels | |
| for line in connect_lines: | |
| start, end = line | |
| x_vals = [keypoints[start][0], keypoints[end][0]] | |
| y_vals = [-keypoints[start][1], -keypoints[end][1]] # Invert y-axis | |
| ax.plot(x_vals, y_vals, linewidth=2, color='red') | |
| ax.set_aspect('equal', adjustable='datalim') | |
| plt.title('Actual Pose') | |
| plt.axis('off') | |
| plt.show() | |
| # Example usage: | |
| keypoints = test_actual_plot # Replace with your 33 key points | |
| plot_human_pose(keypoints) | |
| def plot_human_pose(keypoints): | |
| # Create a figure and axis | |
| fig, ax = plt.subplots() | |
| # Plot keypoints | |
| for i in range(len(keypoints)): | |
| x, y, _, _ = keypoints[i] | |
| ax.scatter(x, -y, color='blue') # Invert y-axis | |
| # Connect body parts | |
| connect_lines = [(0, 2), (2, 7), # Left eye | |
| (0, 5), (5, 8), # Right eye | |
| (9,10), # Left side | |
| (11, 12), (12, 24), (11, 23), # Right side | |
| (24,23), (24,26), (23,25), # Connect ears and wrists | |
| (26, 28), (25, 27), | |
| (28, 30), (28, 32), (30,32),# Connect left and right pinky fingers | |
| (27, 29), (27, 31), (31,29), # Connect left and right index fingers | |
| (12, 14), (11, 13), # Connect left and right thumbs | |
| (14, 16), (13, 15), # Connect left and right hips | |
| (16, 18), (18, 20), (16,20), (16,22), # Connect left and right knees | |
| (15, 17), (15, 19), # Connect left and right ankles | |
| (17, 19), (15, 21)] # Connect left and right heels | |
| for line in connect_lines: | |
| start, end = line | |
| x_vals = [keypoints[start][0], keypoints[end][0]] | |
| y_vals = [-keypoints[start][1], -keypoints[end][1]] # Invert y-axis | |
| ax.plot(x_vals, y_vals, linewidth=2, color='green') | |
| ax.set_aspect('equal', adjustable='datalim') | |
| plt.title('Predicted Pose') | |
| plt.axis('off') | |
| plt.show() | |
| # Example usage: | |
| keypoints = test_predict_plot.detach().numpy() # Replace with your 33 key points | |
| plot_human_pose(keypoints) | |
| """### As you can see, the model predicts the pose of the person very accurately as depicted by its train and validation accuracy""" | |
| from PIL import Image | |
| def predict_pose(image_path): | |
| img = Image.open(str(image_path)).resize((32,32)) | |
| convert_tensor = transforms.ToTensor() | |
| tensor_img = convert_tensor(img) | |
| model.eval() | |
| optimizer.zero_grad() | |
| outputs = model(img) | |
| pred_keypoints = outputs.reshape(1,33,4)[0] | |
| pred_keypoints = pred_keypoints.detach().numpy() | |
| plot_human_pose(pred_keypoints) | |
| pose_detector = gr.Interface(fn = predict_pose, inputs = gr.Image(label = 'input image'), outputs = gr.Image(label = 'output image'), title = 'pose_detector' ) | |
| gr.TabbedInterface([pose_detector],tab_names = ['pose_detection']).queue().launch() | |