Spaces:

miguelflor
/

limbsAI_API

Sleeping

Miguel Cid Flor

receiving an image and predicting it

65f6a85 about 1 year ago

8.49 kB

	#!/usr/bin/env python
	# coding: utf-8

	# In[1]:


	import torch.nn as nn
	import torchvision.transforms as transforms


	# First Model

	# In[ ]:


	class PoseNetV1(nn.Module):
	def __init__(self):
	super(PoseNetV1, self).__init__()
	self.conv = nn.Sequential(

	nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1),
	nn.ReLU(),
	nn.MaxPool2d(2, 2), # 112x112

	nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
	nn.ReLU(),
	nn.MaxPool2d(2, 2), # 56x56

	nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
	nn.ReLU(),
	nn.MaxPool2d(2, 2), # 28x28

	)
	self.fc = nn.Sequential(
	nn.Flatten(),
	nn.Linear(512 * 14 * 14, 512),
	nn.ReLU(),
	nn.Dropout(0.3),
	nn.Linear(512, 32)
	)

	def forward(self, x):
	x = self.conv(x)
	x = self.fc(x)
	return x


	# Here, we added one more layer and we added Dropout to the fully connected layer. We also added a Flatten layer to flatten the output of the convolutional layers before passing it to the fully connected layers.

	# In[ ]:


	class PoseNetV2(nn.Module):
	def __init__(self):
	super(PoseNetV2, self).__init__()
	self.conv = nn.Sequential(
	nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1),
	nn.ReLU(),
	nn.MaxPool2d(2, 2), # 112x112

	nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
	nn.ReLU(),
	nn.MaxPool2d(2, 2), # 56x56

	nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
	nn.ReLU(),
	nn.MaxPool2d(2, 2), # 28x28

	nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
	nn.ReLU(),
	nn.MaxPool2d(2, 2), # 14x14

	)
	self.fc = nn.Sequential(
	nn.Flatten(),
	nn.Linear(256 * 14 * 14, 512),
	nn.ReLU(),
	nn.Dropout(0.3),
	nn.Linear(512, 32)
	)

	def forward(self, x):
	x = self.conv(x)
	x = self.fc(x)
	return x


	# In[ ]:


	class PoseNetV3(nn.Module):
	def __init__(self):
	super(PoseNetV3, self).__init__()
	self.conv = nn.Sequential(
	nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1),
	nn.ReLU(),
	nn.MaxPool2d(2, 2), # 112x112

	nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
	nn.ReLU(),
	nn.MaxPool2d(2, 2), # 56x56

	nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
	nn.ReLU(),
	nn.MaxPool2d(2, 2), # 28x28

	nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
	nn.ReLU(),
	nn.MaxPool2d(2, 2), # 14x14
	)
	self.fc = nn.Sequential(
	nn.Flatten(),
	nn.Linear(256 * 14 * 14, 512),
	nn.ReLU(),
	nn.Dropout(0.3),
	nn.Linear(512, 32)
	)

	def forward(self, x):
	x = self.conv(x)
	x = self.fc(x)
	return x


	# We added batch normalization in each layer, Adaptive Pooling and a Tahn function at the end of the fully conected layers

	# In[ ]:


	class PoseNetV4(nn.Module):
	def __init__(self):
	super(PoseNetV4, self).__init__()
	self.conv = nn.Sequential(
	nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1),
	nn.BatchNorm2d(32),
	nn.ReLU(),
	nn.MaxPool2d(2, 2), # 112x112

	nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
	nn.BatchNorm2d(64),
	nn.ReLU(),
	nn.MaxPool2d(2, 2), # 56x56

	nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
	nn.BatchNorm2d(128),
	nn.ReLU(),
	nn.MaxPool2d(2, 2), # 28x28

	nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
	nn.BatchNorm2d(256),
	nn.ReLU(),
	nn.AdaptiveAvgPool2d((7, 7)) # Adaptive pooling to make output size consistent
	)

	self.fc = nn.Sequential(
	nn.Flatten(),
	nn.Linear(256 * 7 * 7, 512),
	nn.ReLU(),
	nn.Dropout(0.4), # Increased dropout to prevent overfitting
	nn.Linear(512, 32),
	nn.Tanh() # Normalizing keypoint predictions
	)

	def forward(self, x):
	x = self.conv(x)
	x = self.fc(x)
	return x


	# 4 Layers -> 5 Layers
	#
	# Tahn() -> Sigmoid()

	# In[ ]:


	class PoseNetV5(nn.Module):
	def __init__(self):
	super(PoseNetV5, self).__init__()
	self.conv = nn.Sequential(
	nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1),
	nn.BatchNorm2d(32),
	nn.ReLU(),
	nn.MaxPool2d(2, 2), # 112x112

	nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
	nn.BatchNorm2d(64),
	nn.ReLU(),
	nn.MaxPool2d(2, 2), # 56x56

	nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
	nn.BatchNorm2d(128),
	nn.ReLU(),
	nn.MaxPool2d(2, 2), # 28x28

	nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
	nn.BatchNorm2d(256),
	nn.ReLU(),
	nn.MaxPool2d(2, 2), # 28x28

	nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1),
	nn.BatchNorm2d(512),
	nn.ReLU(),
	nn.AdaptiveAvgPool2d((7, 7)) # Adaptive pooling to make output size consistent
	)

	self.fc = nn.Sequential(
	nn.Flatten(),
	nn.Linear(512 * 7 * 7, 512),
	nn.ReLU(),
	nn.Dropout(0.50), # Increased dropout to prevent overfitting
	nn.Linear(512, 32),
	nn.Sigmoid() # Normalizing keypoint predictions
	)

	def forward(self, x):
	x = self.conv(x)
	x = self.fc(x)
	return x


	# In[ ]:


	class ResidualBlock(nn.Module):
	def __init__(self, in_channels, out_channels):
	super(ResidualBlock, self).__init__()
	self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=1, padding=1)
	self.bn1 = nn.BatchNorm2d(out_channels)
	self.relu = nn.ReLU()
	self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1)
	self.bn2 = nn.BatchNorm2d(out_channels)

	# Skip connection (identity mapping)
	self.shortcut = nn.Sequential()
	if in_channels != out_channels:
	self.shortcut = nn.Sequential(
	nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=1, padding=0),
	nn.BatchNorm2d(out_channels)
	)

	def forward(self, x):
	out = self.relu(self.bn1(self.conv1(x)))
	out = self.bn2(self.conv2(out))
	out += self.shortcut(x) # Adding the residual connection
	out = self.relu(out)
	return out

	class ResPoseNet(nn.Module):
	def __init__(self):
	super(ResPoseNet, self).__init__()
	# Using residual blocks for feature extraction
	self.conv = nn.Sequential(
	ResidualBlock(3, 32), # Initial Conv + Residual Block
	nn.MaxPool2d(2, 2), # 112x112

	ResidualBlock(32, 64), # Residual Block
	nn.MaxPool2d(2, 2), # 56x56

	ResidualBlock(64, 128), # Residual Block
	nn.MaxPool2d(2, 2), # 28x28

	ResidualBlock(128, 256), # Residual Block
	nn.MaxPool2d(2, 2), # 28x28

	ResidualBlock(256, 512), # Residual Block
	nn.AdaptiveAvgPool2d((7, 7)) # 14x14 output
	)

	self.fc = nn.Sequential(
	nn.Flatten(),

	nn.Linear(512 * 7 * 7, 1024),
	nn.ReLU(),
	nn.Dropout(0.40),

	nn.Linear(1024, 32), # Assuming 16 keypoints, each with x, y = 32 values
	nn.Sigmoid() # Output keypoint coordinates between [0,1]
	)

	def forward(self, x):
	x = self.conv(x)
	x = self.fc(x)
	return x



	transform = transforms.Compose([
	transforms.ToTensor(), # Convert to tensor (3, 224, 224)
	transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) # Normalize RGB
	])