Abs6187's picture
Upload 16 files
e2cffd9 verified
"""
ISL Sign Language Translation - TechMatrix Solvers Initiative
Model definitions for body pose and hand pose estimation
Developed by: TechMatrix Solvers Team
"""
import torch
from collections import OrderedDict
import torch.nn as nn
def construct_layers(layer_config, no_relu_layers, prelu_layers=[]):
"""
Constructs neural network layers based on configuration
Args:
layer_config: Dictionary defining layer parameters
no_relu_layers: List of layers that shouldn't have ReLU activation
prelu_layers: List of layers that should use PReLU instead of ReLU
"""
layers = []
for layer_name, params in layer_config.items():
if 'pool' in layer_name:
layer = nn.MaxPool2d(kernel_size=params[0], stride=params[1], padding=params[2])
layers.append((layer_name, layer))
else:
conv2d = nn.Conv2d(
in_channels=params[0],
out_channels=params[1],
kernel_size=params[2],
stride=params[3],
padding=params[4]
)
layers.append((layer_name, conv2d))
if layer_name not in no_relu_layers:
if layer_name not in prelu_layers:
layers.append(('relu_' + layer_name, nn.ReLU(inplace=True)))
else:
layers.append(('prelu' + layer_name[4:], nn.PReLU(params[1])))
return nn.Sequential(OrderedDict(layers))
def construct_multi_conv_layers(layer_config, no_relu_layers):
"""
Constructs multiple convolution layers for complex architectures
"""
modules = []
for layer_name, params in layer_config.items():
layers = []
if 'pool' in layer_name:
layer = nn.MaxPool2d(kernel_size=params[0], stride=params[1], padding=params[2])
layers.append((layer_name, layer))
else:
conv2d = nn.Conv2d(
in_channels=params[0],
out_channels=params[1],
kernel_size=params[2],
stride=params[3],
padding=params[4]
)
layers.append((layer_name, conv2d))
if layer_name not in no_relu_layers:
layers.append(('Mprelu' + layer_name[5:], nn.PReLU(params[1])))
modules.append(nn.Sequential(OrderedDict(layers)))
return nn.ModuleList(modules)
class BodyPose25Model(nn.Module):
"""
Body pose estimation model using 25-point skeleton
Developed by TechMatrix Solvers for ISL translation
"""
def __init__(self):
super(BodyPose25Model, self).__init__()
# Define layers without ReLU activation
no_relu_layers = [
'Mconv7_stage0_L1', 'Mconv7_stage0_L2',
'Mconv7_stage1_L1', 'Mconv7_stage1_L2',
'Mconv7_stage2_L2', 'Mconv7_stage3_L2'
]
prelu_layers = ['conv4_2', 'conv4_3_CPM', 'conv4_4_CPM']
# Initial feature extraction layers
base_layers = OrderedDict([
('conv1_1', [3, 64, 3, 1, 1]),
('conv1_2', [64, 64, 3, 1, 1]),
('pool1_stage1', [2, 2, 0]),
('conv2_1', [64, 128, 3, 1, 1]),
('conv2_2', [128, 128, 3, 1, 1]),
('pool2_stage1', [2, 2, 0]),
('conv3_1', [128, 256, 3, 1, 1]),
('conv3_2', [256, 256, 3, 1, 1]),
('conv3_3', [256, 256, 3, 1, 1]),
('conv3_4', [256, 256, 3, 1, 1]),
('pool3_stage1', [2, 2, 0]),
('conv4_1', [256, 512, 3, 1, 1]),
('conv4_2', [512, 512, 3, 1, 1]),
('conv4_3_CPM', [512, 256, 3, 1, 1]),
('conv4_4_CPM', [256, 128, 3, 1, 1])
])
self.base_model = construct_layers(base_layers, no_relu_layers, prelu_layers)
# Multi-stage refinement blocks
stage_blocks = {}
# L2 branch - Stage 0
stage_blocks['Mconv1_stage0_L2'] = OrderedDict([
('Mconv1_stage0_L2_0', [128, 96, 3, 1, 1]),
('Mconv1_stage0_L2_1', [96, 96, 3, 1, 1]),
('Mconv1_stage0_L2_2', [96, 96, 3, 1, 1])
])
for i in range(2, 6):
stage_blocks[f'Mconv{i}_stage0_L2'] = OrderedDict([
(f'Mconv{i}_stage0_L2_0', [288, 96, 3, 1, 1]),
(f'Mconv{i}_stage0_L2_1', [96, 96, 3, 1, 1]),
(f'Mconv{i}_stage0_L2_2', [96, 96, 3, 1, 1])
])
stage_blocks['Mconv6_7_stage0_L2'] = OrderedDict([
('Mconv6_stage0_L2', [288, 256, 1, 1, 0]),
('Mconv7_stage0_L2', [256, 52, 1, 1, 0])
])
# L2 branch - Stages 1-3
for stage in range(1, 4):
stage_blocks[f'Mconv1_stage{stage}_L2'] = OrderedDict([
(f'Mconv1_stage{stage}_L2_0', [180, 128, 3, 1, 1]),
(f'Mconv1_stage{stage}_L2_1', [128, 128, 3, 1, 1]),
(f'Mconv1_stage{stage}_L2_2', [128, 128, 3, 1, 1])
])
for i in range(2, 6):
stage_blocks[f'Mconv{i}_stage{stage}_L2'] = OrderedDict([
(f'Mconv{i}_stage{stage}_L2_0', [384, 128, 3, 1, 1]),
(f'Mconv{i}_stage{stage}_L2_1', [128, 128, 3, 1, 1]),
(f'Mconv{i}_stage{stage}_L2_2', [128, 128, 3, 1, 1])
])
stage_blocks[f'Mconv6_7_stage{stage}_L2'] = OrderedDict([
(f'Mconv6_stage{stage}_L2', [384, 512, 1, 1, 0]),
(f'Mconv7_stage{stage}_L2', [512, 52, 1, 1, 0])
])
# L1 branch configurations
stage_blocks['Mconv1_stage0_L1'] = OrderedDict([
('Mconv1_stage0_L1_0', [180, 96, 3, 1, 1]),
('Mconv1_stage0_L1_1', [96, 96, 3, 1, 1]),
('Mconv1_stage0_L1_2', [96, 96, 3, 1, 1])
])
for i in range(2, 6):
stage_blocks[f'Mconv{i}_stage0_L1'] = OrderedDict([
(f'Mconv{i}_stage0_L1_0', [288, 96, 3, 1, 1]),
(f'Mconv{i}_stage0_L1_1', [96, 96, 3, 1, 1]),
(f'Mconv{i}_stage0_L1_2', [96, 96, 3, 1, 1])
])
stage_blocks['Mconv6_7_stage0_L1'] = OrderedDict([
('Mconv6_stage0_L1', [288, 256, 1, 1, 0]),
('Mconv7_stage0_L1', [256, 26, 1, 1, 0])
])
stage_blocks['Mconv1_stage1_L1'] = OrderedDict([
('Mconv1_stage1_L1_0', [206, 128, 3, 1, 1]),
('Mconv1_stage1_L1_1', [128, 128, 3, 1, 1]),
('Mconv1_stage1_L1_2', [128, 128, 3, 1, 1])
])
for i in range(2, 6):
stage_blocks[f'Mconv{i}_stage1_L1'] = OrderedDict([
(f'Mconv{i}_stage1_L1_0', [384, 128, 3, 1, 1]),
(f'Mconv{i}_stage1_L1_1', [128, 128, 3, 1, 1]),
(f'Mconv{i}_stage1_L1_2', [128, 128, 3, 1, 1])
])
stage_blocks['Mconv6_7_stage1_L1'] = OrderedDict([
('Mconv6_stage1_L1', [384, 512, 1, 1, 0]),
('Mconv7_stage1_L1', [512, 26, 1, 1, 0])
])
# Build multi-conv modules
for block_name in stage_blocks.keys():
stage_blocks[block_name] = construct_multi_conv_layers(stage_blocks[block_name], no_relu_layers)
self.stage_models = nn.ModuleDict(stage_blocks)
# Freeze parameters for efficiency
for param in self.parameters():
param.requires_grad = False
def _multi_conv_forward(self, x, models):
"""Forward pass through multi-convolution blocks"""
outputs = []
current_output = x
for model in models:
current_output = model(current_output)
outputs.append(current_output)
return torch.cat(outputs, 1)
def forward(self, x):
"""Forward pass through the body pose model"""
base_features = self.base_model(x)
# L2 branch processing
current_features = base_features
for stage in range(4):
current_features = self._multi_conv_forward(
current_features, self.stage_models[f'Mconv1_stage{stage}_L2']
)
for layer in range(2, 6):
current_features = self._multi_conv_forward(
current_features, self.stage_models[f'Mconv{layer}_stage{stage}_L2']
)
current_features = self.stage_models[f'Mconv6_7_stage{stage}_L2'][0](current_features)
current_features = self.stage_models[f'Mconv6_7_stage{stage}_L2'][1](current_features)
l2_output = current_features
current_features = torch.cat([base_features, current_features], 1)
# L1 branch - Stage 0
current_features = self._multi_conv_forward(
current_features, self.stage_models['Mconv1_stage0_L1']
)
for layer in range(2, 6):
current_features = self._multi_conv_forward(
current_features, self.stage_models[f'Mconv{layer}_stage0_L1']
)
current_features = self.stage_models['Mconv6_7_stage0_L1'][0](current_features)
current_features = self.stage_models['Mconv6_7_stage0_L1'][1](current_features)
stage0_l1_output = current_features
current_features = torch.cat([base_features, stage0_l1_output, l2_output], 1)
# L1 branch - Stage 1
current_features = self._multi_conv_forward(
current_features, self.stage_models['Mconv1_stage1_L1']
)
for layer in range(2, 6):
current_features = self._multi_conv_forward(
current_features, self.stage_models[f'Mconv{layer}_stage1_L1']
)
current_features = self.stage_models['Mconv6_7_stage1_L1'][0](current_features)
stage1_l1_output = self.stage_models['Mconv6_7_stage1_L1'][1](current_features)
return l2_output, stage1_l1_output
class HandPoseModel(nn.Module):
"""
Hand pose estimation model using 21-point hand landmarks
Developed by TechMatrix Solvers for ISL translation
"""
def __init__(self):
super(HandPoseModel, self).__init__()
# Layers without ReLU activation
no_relu_layers = [
'conv6_2_CPM', 'Mconv7_stage2', 'Mconv7_stage3',
'Mconv7_stage4', 'Mconv7_stage5', 'Mconv7_stage6'
]
# Stage 1 - Feature extraction
stage1_base = OrderedDict([
('conv1_1', [3, 64, 3, 1, 1]),
('conv1_2', [64, 64, 3, 1, 1]),
('pool1_stage1', [2, 2, 0]),
('conv2_1', [64, 128, 3, 1, 1]),
('conv2_2', [128, 128, 3, 1, 1]),
('pool2_stage1', [2, 2, 0]),
('conv3_1', [128, 256, 3, 1, 1]),
('conv3_2', [256, 256, 3, 1, 1]),
('conv3_3', [256, 256, 3, 1, 1]),
('conv3_4', [256, 256, 3, 1, 1]),
('pool3_stage1', [2, 2, 0]),
('conv4_1', [256, 512, 3, 1, 1]),
('conv4_2', [512, 512, 3, 1, 1]),
('conv4_3', [512, 512, 3, 1, 1]),
('conv4_4', [512, 512, 3, 1, 1]),
('conv5_1', [512, 512, 3, 1, 1]),
('conv5_2', [512, 512, 3, 1, 1]),
('conv5_3_CPM', [512, 128, 3, 1, 1])
])
stage1_prediction = OrderedDict([
('conv6_1_CPM', [128, 512, 1, 1, 0]),
('conv6_2_CPM', [512, 22, 1, 1, 0])
])
stage_blocks = {}
stage_blocks['stage1_base'] = stage1_base
stage_blocks['stage1_prediction'] = stage1_prediction
# Stages 2-6 refinement
for i in range(2, 7):
stage_blocks[f'stage{i}'] = OrderedDict([
(f'Mconv1_stage{i}', [150, 128, 7, 1, 3]),
(f'Mconv2_stage{i}', [128, 128, 7, 1, 3]),
(f'Mconv3_stage{i}', [128, 128, 7, 1, 3]),
(f'Mconv4_stage{i}', [128, 128, 7, 1, 3]),
(f'Mconv5_stage{i}', [128, 128, 7, 1, 3]),
(f'Mconv6_stage{i}', [128, 128, 1, 1, 0]),
(f'Mconv7_stage{i}', [128, 22, 1, 1, 0])
])
# Build all stage models
for block_name in stage_blocks.keys():
stage_blocks[block_name] = construct_layers(stage_blocks[block_name], no_relu_layers)
self.stage1_base_model = stage_blocks['stage1_base']
self.stage1_prediction_model = stage_blocks['stage1_prediction']
self.stage2_model = stage_blocks['stage2']
self.stage3_model = stage_blocks['stage3']
self.stage4_model = stage_blocks['stage4']
self.stage5_model = stage_blocks['stage5']
self.stage6_model = stage_blocks['stage6']
# Freeze parameters for efficiency
for param in self.parameters():
param.requires_grad = False
def forward(self, x):
"""Forward pass through the hand pose model"""
base_features = self.stage1_base_model(x)
stage1_output = self.stage1_prediction_model(base_features)
# Stage 2
stage2_input = torch.cat([stage1_output, base_features], 1)
stage2_output = self.stage2_model(stage2_input)
# Stage 3
stage3_input = torch.cat([stage2_output, base_features], 1)
stage3_output = self.stage3_model(stage3_input)
# Stage 4
stage4_input = torch.cat([stage3_output, base_features], 1)
stage4_output = self.stage4_model(stage4_input)
# Stage 5
stage5_input = torch.cat([stage4_output, base_features], 1)
stage5_output = self.stage5_model(stage5_input)
# Stage 6
stage6_input = torch.cat([stage5_output, base_features], 1)
stage6_output = self.stage6_model(stage6_input)
return stage6_output
# Factory functions for easy model instantiation
def create_bodypose_model():
"""Create and return body pose detection model"""
return BodyPose25Model()
def create_handpose_model():
"""Create and return hand pose detection model"""
return HandPoseModel()