Spaces:
Build error
Build error
| # -*- coding: utf-8 -*- | |
| """Yet another copy of Final CNN Pose Notebook.ipynb | |
| Automatically generated by Colaboratory. | |
| Original file is located at | |
| https://colab.research.google.com/drive/1IdEBDyEyKQdRRT9R-GkfrJINmHdf3_pF | |
| """ | |
| # from google.colab import drive | |
| # drive.mount('/content/drive') | |
| # pip install gradio | |
| import gradio as gr | |
| import torch | |
| from torch.utils.data import DataLoader, Dataset, random_split | |
| from torchvision import transforms, utils | |
| import torch.nn as nn | |
| import torch.optim as optim | |
| import torch.nn.functional as F | |
| from PIL import Image | |
| import os | |
| import numpy as np | |
| import json | |
| import matplotlib.pyplot as plt | |
| from torch.utils.data.dataloader import default_collate | |
| # Define the dataset class | |
| # class HumanPoseDataset(Dataset): | |
| # def __init__(self, annotations, img_dir, transform=None): | |
| # self.annotations = annotations | |
| # self.img_dir = img_dir | |
| # self.transform = transform | |
| # def __len__(self): | |
| # return len(self.annotations) | |
| # def __getitem__(self, idx): | |
| # img_key = list(self.annotations.keys())[idx] | |
| # annotation_list = self.annotations[img_key] | |
| # # Skip the image if there are no annotations | |
| # if not annotation_list: | |
| # return None | |
| # # Use the first annotation for simplicity | |
| # annotation = annotation_list[0] | |
| # if not annotation['landmarks']: # Check if landmarks are not empty | |
| # return None | |
| # img_name = os.path.join(self.img_dir, annotation['file']) | |
| # image = Image.open(img_name).convert('RGB') | |
| # original_image_size = image.size | |
| # keypoints = annotation['landmarks'] | |
| # keypoints_array = np.array([[k['x'], k['y'], k['z'], k['visibility']] for k in keypoints]) | |
| # if self.transform: | |
| # image = self.transform(image) | |
| # sample = {'image': image, 'keypoints': keypoints_array, 'original_image_size': original_image_size} | |
| # print(sample) | |
| # return sample | |
| # # Custom collate function to filter out None values | |
| # def custom_collate(batch): | |
| # batch = [b for b in batch if b is not None] | |
| # return default_collate(batch) | |
| # # Load the annotations JSON into a dictionary | |
| # annotations_path = '/content/drive/MyDrive/annotations_CNN (3).json' # Update this path | |
| # with open(annotations_path) as f: | |
| # annotations_data = json.load(f) | |
| # print("Annotations data loaded. Number of images:", len(annotations_data)) | |
| # x = annotations_data.keys() | |
| """# Do data preprocessing. For example, resize to 32 by 32 and normalization. | |
| """ | |
| # img_dir = '/content/drive/MyDrive/CNN_Dataset' | |
| # # Define the transformations with resizing and augmentation | |
| # transform = transforms.Compose([ | |
| # transforms.Resize((32, 32)), # Resize the images to 256x256 | |
| # transforms.ToTensor(), | |
| # transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), | |
| # transforms.RandomHorizontalFlip(), # Example augmentation | |
| # # Add more augmentations if needed | |
| # ]) | |
| # test_transform=transforms.Compose([ | |
| # transforms.ToTensor(), | |
| # transforms.Resize((32,32)), | |
| # ]) | |
| # # Create the dataset | |
| # human_pose_dataset = HumanPoseDataset(annotations_data, img_dir, transform=transform) | |
| # testing_pose_dataset = HumanPoseDataset(annotations_data, img_dir, transform=test_transform) | |
| # print("Dataset created. Length of dataset:", len(human_pose_dataset)) | |
| # sorted(x) == sorted(os.listdir('/content/drive/MyDrive/CNN_Dataset')) | |
| """#2. Load parameters of a pretrained model. If a pretrained model for the entire network is not available, then load parameters for the backbone network/feature extraction network/encoder. | |
| Pose net model is not available so we will be using an architecture similar to PoseNet, a human pose detection CNN architecture. In the above architecture, we are given a brief description about the PoseNet Architecture. We will be using the Regression Network to find the keypoint coordinates. | |
| """ | |
| import torch | |
| import torch.nn as nn | |
| import torch.optim as optim | |
| import torch.nn.functional as F | |
| class SimpleCNN(nn.Module): | |
| def __init__(self): | |
| super(SimpleCNN, self).__init__() | |
| self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1) | |
| self.pool = nn.MaxPool2d(2, 2) | |
| self.conv2 = nn.Conv2d(16, 32, kernel_size=3, padding=1) | |
| self.conv3 = nn.Conv2d(32, 64, kernel_size=3, padding=1) | |
| self.conv4 = nn.Conv2d(64, 128, kernel_size=3, padding=1) | |
| # Assuming the input image size is 256x256, after four pooling layers the image size will be 16x16 | |
| self.fc1 = nn.Linear(2 * 16 * 16, 1000) | |
| self.fc2 = nn.Linear(1000, 33 * 4) # Assuming 33 keypoints | |
| def forward(self, x): | |
| x = self.pool(F.relu(self.conv1(x))) | |
| x = self.pool(F.relu(self.conv2(x))) | |
| x = self.pool(F.relu(self.conv3(x))) | |
| x = self.pool(F.relu(self.conv4(x))) | |
| x = torch.flatten(x, 1) # Flatten the tensor for the fully connected layer | |
| x = F.relu(self.fc1(x)) | |
| x = self.fc2(x) | |
| return x | |
| # Initialize the model | |
| model = SimpleCNN() | |
| # print("Model initialized.") | |
| # print(model) # Print the model architecture | |
| #!pip install mediapipe | |
| """#3 Replace the output layer if necessary and finetune the network for your dataset. Use validation dataset to pick a good learning rate and momentum. | |
| 1. Training for a very less samples | |
| """ | |
| # Split the dataset into training, validation, and test sets | |
| # train_size = int(0.04* len(human_pose_dataset)) | |
| # validation_size = int(0.1 * len(human_pose_dataset)) | |
| # test_size = len(human_pose_dataset) - train_size - validation_size | |
| # train_dataset, remaining_dataset = random_split(human_pose_dataset, [train_size, validation_size + test_size]) | |
| # validation_dataset, test_dataset = random_split(remaining_dataset, [validation_size, test_size]) | |
| # test_pose_dataset , remaining_data = random_split(testing_pose_dataset,[6,194]) | |
| # # Define the batch size | |
| # batch_size = 8 | |
| # # Create data loaders for each set with the custom collate function | |
| # train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=custom_collate) | |
| # validation_loader = DataLoader(validation_dataset, batch_size=batch_size, shuffle=False, collate_fn=custom_collate) | |
| # test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, collate_fn=custom_collate) | |
| # test_image_loader = DataLoader(test_pose_dataset, batch_size=batch_size, shuffle=False, collate_fn=custom_collate) | |
| # print("Data loaders created.") | |
| # len(train_dataset) | |
| # # Loss function | |
| # criterion = nn.MSELoss() | |
| # # Optimizer | |
| # optimizer = optim.Adam(model.parameters(), lr=1e-4) | |
| # # Convert the model parameters to float | |
| # model = model.float() | |
| # # Ensure that the tensors are also floats | |
| # sample_batch = next(iter(train_loader)) | |
| # #import mediapipe as mp | |
| # images = sample_batch['image'].float() # Convert images to float | |
| # keypoints = sample_batch['keypoints'].view(-1, 132).float() # Convert keypoints to float and reshape | |
| # # Now proceed with the optimization loop | |
| # loss=0 | |
| # for epochs in range(10): | |
| # optimizer.zero_grad() | |
| # outputs = model(images) | |
| # loss = criterion(outputs, keypoints) | |
| # loss.backward() | |
| # optimizer.step() | |
| # print("Optimization step completed.") | |
| # print(loss.item()) | |
| # loss=loss.item() | |
| # import torch | |
| # def calculate_accuracy(outputs, targets): | |
| # accuracy = torch.mean(torch.abs(outputs - targets)) | |
| # return accuracy | |
| # print(outputs.shape) | |
| # # Calculate accuracy | |
| # with torch.no_grad(): | |
| # accuracy = calculate_accuracy(outputs, keypoints) | |
| # accuracy= 1- accuracy/132 | |
| # print("Loss:", loss) | |
| # print("Accuracy:", accuracy.item()*100, '%') | |
| # """As you can see, the accuracy is very close to 100% (Overfitting) | |
| # Now taking 80-10-10 split on the dataset, we create new train, val and test loaders | |
| # """ | |
| # # Split the dataset into training, validation, and test sets | |
| # train_size = int(0.8* len(human_pose_dataset)) | |
| # validation_size = int(0.1 * len(human_pose_dataset)) | |
| # test_size = len(human_pose_dataset) - train_size - validation_size | |
| # train_dataset, remaining_dataset = random_split(human_pose_dataset, [train_size, validation_size + test_size]) | |
| # validation_dataset, test_dataset = random_split(remaining_dataset, [validation_size, test_size]) | |
| # test_pose_dataset , remaining_data = random_split(testing_pose_dataset,[6,194]) | |
| # # Define the batch size | |
| # batch_size = 8 | |
| # # Create data loaders for each set with the custom collate function | |
| # train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=custom_collate) | |
| # validation_loader = DataLoader(validation_dataset, batch_size=batch_size, shuffle=False, collate_fn=custom_collate) | |
| # test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, collate_fn=custom_collate) | |
| # test_image_loader = DataLoader(test_pose_dataset, batch_size=batch_size, shuffle=False, collate_fn=custom_collate) | |
| # print("Data loaders created.") | |
| # len(test_dataset) | |
| # import torch | |
| # import torch.nn as nn | |
| # import torch.optim as optim | |
| # from torch.utils.data import DataLoader, random_split | |
| # from torchvision import transforms | |
| # import torch.nn.functional as F | |
| # class SimpleCNN(nn.Module): | |
| # # Define hyperparameters to search over | |
| # learning_rates = [0.001, 0.01, 0.1] | |
| # momentums = [0.9, 0.95, 0.99] | |
| # weight_decays = [0.0001, 0.001, 0.01] | |
| # best_loss = float('inf') | |
| # best_lr, best_momentum, best_weight_decay = None, None, None | |
| # # Grid search over hyperparameters | |
| # for lr in learning_rates: | |
| # for momentum in momentums: | |
| # for weight_decay in weight_decays: | |
| # # Initialize the model with the current set of hyperparameters | |
| # model = SimpleCNN() | |
| # # Define loss function and optimizer | |
| # criterion = nn.MSELoss() | |
| # optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum, weight_decay=weight_decay) | |
| # # Ensure that the tensors are also floats | |
| # sample_batch = next(iter(train_loader)) | |
| # images = sample_batch['image'].float() # Convert images to float | |
| # keypoints = sample_batch['keypoints'].view(-1, 132).float() # Convert keypoints to float and reshape | |
| # # Now proceed with the optimization loop | |
| # optimizer.zero_grad() | |
| # outputs = model(images) | |
| # print("Output shape after forward pass:", outputs.shape) | |
| # outputs = model(images) | |
| # loss = criterion(outputs, keypoints) | |
| # print("Initial loss:", loss.item()) | |
| # loss.backward() | |
| # optimizer.step() | |
| # print("Optimization step completed.") | |
| # total_loss = 0 | |
| # avg_loss = total_loss / len(train_loader) | |
| # model.train() | |
| # # Check if the current set of hyperparameters resulted in a better performance | |
| # if avg_loss < best_loss: | |
| # best_loss = avg_loss | |
| # best_lr, best_momentum, best_weight_decay = lr, momentum, weight_decay | |
| # # After the grid search, choose the hyperparameters that performed the best | |
| # print("Best Hyperparameters - lr: {}, momentum: {}, weight_decay: {}".format( | |
| # best_lr, best_momentum, best_weight_decay)) | |
| # # Train the final model with the selected hyperparameters on the full dataset | |
| # model = SimpleCNN() | |
| # optimizer = optim.SGD(model.parameters(), lr=best_lr, momentum=best_momentum, weight_decay=best_weight_decay) | |
| # """#3. Plotting Validation and Test Loss | |
| # The best parameters are: | |
| # * Learning Rate: 0.001 | |
| # * Momentum: 0.9 | |
| # * Weight Decay: 0.0001 | |
| # """ | |
| # import torch | |
| # import matplotlib.pyplot as plt | |
| # # Assuming you have already defined your model, optimizer, and criterion | |
| # # Ensure that the tensors are also floats for training | |
| # sample_batch = next(iter(train_loader)) | |
| # images = sample_batch['image'].float() | |
| # keypoints = sample_batch['keypoints'].view(-1, 132).float() | |
| # # Ensure that the tensors are also floats for validation | |
| # validation_sample_batch = next(iter(validation_loader)) | |
| # validation_images = validation_sample_batch['image'].float() | |
| # validation_keypoints = validation_sample_batch['keypoints'].view(-1, 132).float() | |
| # # Now proceed with the optimization loop | |
| # optimizer = torch.optim.Adam(model.parameters(), lr=0.001) | |
| # criterion = torch.nn.MSELoss() | |
| # train_loss = [] | |
| # val_loss = [] | |
| # for epoch in range(20): | |
| # model.train() | |
| # optimizer.zero_grad() | |
| # outputs = model(images) | |
| # current_loss = criterion(outputs, keypoints) | |
| # current_loss.backward() | |
| # optimizer.step() | |
| # model.eval() # Switch to evaluation mode for validation | |
| # with torch.no_grad(): | |
| # # Calculate validation loss | |
| # val_outputs = model(validation_images) | |
| # val_current_loss = criterion(val_outputs, validation_keypoints) | |
| # print(f"Epoch [{epoch + 1}/100], Loss: {current_loss.item():.4f}, Val Loss: {val_current_loss.item():.4f}") | |
| # train_loss.append(current_loss.item()) | |
| # val_loss.append(val_current_loss.item()) | |
| # plotting_val_loss = val_loss | |
| # plotting_train_loss = train_loss | |
| # import matplotlib.pyplot as plt | |
| # # Plotting | |
| # plt.figure(figsize=(8, 4)) | |
| # plt.plot( plotting_train_loss, marker='o', linestyle='-', color='b',label='train loss') | |
| # plt.plot( plotting_val_loss, marker='o', linestyle= '-', color='r', label='val loss') | |
| # plt.title('Loss vs Epochs') | |
| # plt.xlabel('Epochs') | |
| # plt.ylabel('Loss') | |
| # plt.grid(True) | |
| # plt.legend() | |
| # # Show the legend in a small box | |
| # plt.legend(loc='upper right') | |
| # plt.show() | |
| # """#4. Final Run on Test Dataset""" | |
| # # Ensure that the tensors are also floats | |
| # sample_batch = next(iter(test_loader)) | |
| # #import mediapipe as mp | |
| # test_images = sample_batch['image'].float() # Convert images to float | |
| # test_keypoints = sample_batch['keypoints'].view(-1, 132).float() # Convert keypoints to float and reshape | |
| # model.eval() | |
| # optimizer.zero_grad() | |
| # outputs = model(test_images) | |
| # print("Testing Done") | |
| # test_images.shape | |
| # test_actual_plot = test_keypoints.reshape(len(test_images),33,4)[0] | |
| # test_predict_plot = outputs.reshape(len(test_images),33,4)[0] | |
| # test_predict_plot.shape | |
| # """# 4. Finally, evaluate on the test dataset.""" | |
| # import cv2 | |
| # import matplotlib.pyplot as plt | |
| # import numpy as np | |
| # def plot_human_pose(keypoints): | |
| # # Create a figure and axis | |
| # fig, ax = plt.subplots() | |
| # # Plot keypoints | |
| # for i in range(len(keypoints)): | |
| # x, y, _, _ = keypoints[i] | |
| # ax.scatter(x, -y, color='blue') # Invert y-axis | |
| # # Connect body parts | |
| # connect_lines = [(0, 2), (2, 7), # Left eye | |
| # (0, 5), (5, 8), # Right eye | |
| # (9,10), # Left side | |
| # (11, 12), (12, 24), (11, 23), # Right side | |
| # (24,23), (24,26), (23,25), # Connect ears and wrists | |
| # (26, 28), (25, 27), | |
| # (28, 30), (28, 32), (30,32),# Connect left and right pinky fingers | |
| # (27, 29), (27, 31), (31,29), # Connect left and right index fingers | |
| # (12, 14), (11, 13), # Connect left and right thumbs | |
| # (14, 16), (13, 15), # Connect left and right hips | |
| # (16, 18), (18, 20), (16,20), (16,22), # Connect left and right knees | |
| # (15, 17), (15, 19), # Connect left and right ankles | |
| # (17, 19), (15, 21)] # Connect left and right heels | |
| # for line in connect_lines: | |
| # start, end = line | |
| # x_vals = [keypoints[start][0], keypoints[end][0]] | |
| # y_vals = [-keypoints[start][1], -keypoints[end][1]] # Invert y-axis | |
| # ax.plot(x_vals, y_vals, linewidth=2, color='red') | |
| # ax.set_aspect('equal', adjustable='datalim') | |
| # plt.title('Actual Pose') | |
| # plt.axis('off') | |
| # plt.show() | |
| # # Example usage: | |
| # keypoints = test_actual_plot # Replace with your 33 key points | |
| # plot_human_pose(keypoints) | |
| from io import BytesIO | |
| from PIL import Image | |
| def plot_human_pose(keypoints): | |
| # Create a figure and axis | |
| fig, ax = plt.subplots() | |
| # Plot keypoints | |
| for i in range(len(keypoints)): | |
| x, y, _, _ = keypoints[i] | |
| ax.scatter(x, -y, color='blue') # Invert y-axis | |
| # Connect body parts | |
| connect_lines = [(0, 2), (2, 7), # Left eye | |
| (0, 5), (5, 8), # Right eye | |
| (9,10), # Left side | |
| (11, 12), (12, 24), (11, 23), # Right side | |
| (24,23), (24,26), (23,25), # Connect ears and wrists | |
| (26, 28), (25, 27), | |
| (28, 30), (28, 32), (30,32),# Connect left and right pinky fingers | |
| (27, 29), (27, 31), (31,29), # Connect left and right index fingers | |
| (12, 14), (11, 13), # Connect left and right thumbs | |
| (14, 16), (13, 15), # Connect left and right hips | |
| (16, 18), (18, 20), (16,20), (16,22), # Connect left and right knees | |
| (15, 17), (15, 19), # Connect left and right ankles | |
| (17, 19), (15, 21)] # Connect left and right heels | |
| for line in connect_lines: | |
| start, end = line | |
| x_vals = [keypoints[start][0], keypoints[end][0]] | |
| y_vals = [-keypoints[start][1], -keypoints[end][1]] # Invert y-axis | |
| ax.plot(x_vals, y_vals, linewidth=2, color='green') | |
| ax.set_aspect('equal', adjustable='datalim') | |
| plt.title('Predicted Pose') | |
| plt.axis('off') | |
| buffer = BytesIO() | |
| plt.savefig(buffer, format="png") | |
| buffer.seek(0) # Reset the buffer position to the beginning | |
| # Close the plot to release resources | |
| plt.close() | |
| out = Image.open(buffer) | |
| return out | |
| # Example usage: | |
| # keypoints = test_predict_plot.detach().numpy() # Replace with your 33 key points | |
| # plot_human_pose(keypoints) | |
| """### As you can see, the model predicts the pose of the person very accurately as depicted by its train and validation accuracy""" | |
| # torch.save(model.state_dict(), '/content/drive/MyDrive/Ayush sarangi/model.pth') | |
| # torch.save( model, '/content/drive/MyDrive/Ayush sarangi/entire_model.pt') | |
| import cv2 | |
| from torchvision import transforms, utils | |
| from matplotlib import pyplot as plt | |
| import numpy as np | |
| model = SimpleCNN() | |
| model.load_state_dict(torch.load("model.pth")) | |
| model.eval() | |
| def predict_pose(img): | |
| img= cv2.resize(img, (32,32)) | |
| convert_tensor = transforms.ToTensor() | |
| tensor_img = convert_tensor(img) | |
| tensor_img = tensor_img[None,:,:,:] | |
| outputs = model(tensor_img) | |
| pred_keypoints = outputs.reshape(1,33,4)[0] | |
| pred_keypoints = pred_keypoints.detach().numpy() | |
| return plot_human_pose(pred_keypoints) | |
| # predict_pose(test_image) | |
| # input_image = [ | |
| # gr.components.Image(type = "pil"), | |
| # ] | |
| # output_image = [ | |
| # gr.components.Image(type = "pil"), | |
| # ] | |
| pose_detector = gr.Interface(fn = predict_pose, inputs = gr.Image() , outputs = gr.Image()) | |
| pose_detector.launch(share = True) | |