Spaces:

Abs6187
/

ISL_Sign_Language_Translation

Sleeping

App Files Files Community

ISL_Sign_Language_Translation / pose_models.py

Abs6187

Upload 16 files

e2cffd9 verified 3 months ago

raw

history blame contribute delete

14.5 kB

	"""
	ISL Sign Language Translation - TechMatrix Solvers Initiative
	Model definitions for body pose and hand pose estimation
	Developed by: TechMatrix Solvers Team
	"""

	import torch
	from collections import OrderedDict
	import torch.nn as nn


	def construct_layers(layer_config, no_relu_layers, prelu_layers=[]):
	"""
	Constructs neural network layers based on configuration

	Args:
	layer_config: Dictionary defining layer parameters
	no_relu_layers: List of layers that shouldn't have ReLU activation
	prelu_layers: List of layers that should use PReLU instead of ReLU
	"""
	layers = []

	for layer_name, params in layer_config.items():
	if 'pool' in layer_name:
	layer = nn.MaxPool2d(kernel_size=params[0], stride=params[1], padding=params[2])
	layers.append((layer_name, layer))
	else:
	conv2d = nn.Conv2d(
	in_channels=params[0],
	out_channels=params[1],
	kernel_size=params[2],
	stride=params[3],
	padding=params[4]
	)
	layers.append((layer_name, conv2d))

	if layer_name not in no_relu_layers:
	if layer_name not in prelu_layers:
	layers.append(('relu_' + layer_name, nn.ReLU(inplace=True)))
	else:
	layers.append(('prelu' + layer_name[4:], nn.PReLU(params[1])))

	return nn.Sequential(OrderedDict(layers))


	def construct_multi_conv_layers(layer_config, no_relu_layers):
	"""
	Constructs multiple convolution layers for complex architectures
	"""
	modules = []
	for layer_name, params in layer_config.items():
	layers = []
	if 'pool' in layer_name:
	layer = nn.MaxPool2d(kernel_size=params[0], stride=params[1], padding=params[2])
	layers.append((layer_name, layer))
	else:
	conv2d = nn.Conv2d(
	in_channels=params[0],
	out_channels=params[1],
	kernel_size=params[2],
	stride=params[3],
	padding=params[4]
	)
	layers.append((layer_name, conv2d))
	if layer_name not in no_relu_layers:
	layers.append(('Mprelu' + layer_name[5:], nn.PReLU(params[1])))
	modules.append(nn.Sequential(OrderedDict(layers)))
	return nn.ModuleList(modules)


	class BodyPose25Model(nn.Module):
	"""
	Body pose estimation model using 25-point skeleton
	Developed by TechMatrix Solvers for ISL translation
	"""

	def __init__(self):
	super(BodyPose25Model, self).__init__()

	# Define layers without ReLU activation
	no_relu_layers = [
	'Mconv7_stage0_L1', 'Mconv7_stage0_L2',
	'Mconv7_stage1_L1', 'Mconv7_stage1_L2',
	'Mconv7_stage2_L2', 'Mconv7_stage3_L2'
	]
	prelu_layers = ['conv4_2', 'conv4_3_CPM', 'conv4_4_CPM']

	# Initial feature extraction layers
	base_layers = OrderedDict([
	('conv1_1', [3, 64, 3, 1, 1]),
	('conv1_2', [64, 64, 3, 1, 1]),
	('pool1_stage1', [2, 2, 0]),
	('conv2_1', [64, 128, 3, 1, 1]),
	('conv2_2', [128, 128, 3, 1, 1]),
	('pool2_stage1', [2, 2, 0]),
	('conv3_1', [128, 256, 3, 1, 1]),
	('conv3_2', [256, 256, 3, 1, 1]),
	('conv3_3', [256, 256, 3, 1, 1]),
	('conv3_4', [256, 256, 3, 1, 1]),
	('pool3_stage1', [2, 2, 0]),
	('conv4_1', [256, 512, 3, 1, 1]),
	('conv4_2', [512, 512, 3, 1, 1]),
	('conv4_3_CPM', [512, 256, 3, 1, 1]),
	('conv4_4_CPM', [256, 128, 3, 1, 1])
	])
	self.base_model = construct_layers(base_layers, no_relu_layers, prelu_layers)

	# Multi-stage refinement blocks
	stage_blocks = {}

	# L2 branch - Stage 0
	stage_blocks['Mconv1_stage0_L2'] = OrderedDict([
	('Mconv1_stage0_L2_0', [128, 96, 3, 1, 1]),
	('Mconv1_stage0_L2_1', [96, 96, 3, 1, 1]),
	('Mconv1_stage0_L2_2', [96, 96, 3, 1, 1])
	])

	for i in range(2, 6):
	stage_blocks[f'Mconv{i}_stage0_L2'] = OrderedDict([
	(f'Mconv{i}_stage0_L2_0', [288, 96, 3, 1, 1]),
	(f'Mconv{i}_stage0_L2_1', [96, 96, 3, 1, 1]),
	(f'Mconv{i}_stage0_L2_2', [96, 96, 3, 1, 1])
	])

	stage_blocks['Mconv6_7_stage0_L2'] = OrderedDict([
	('Mconv6_stage0_L2', [288, 256, 1, 1, 0]),
	('Mconv7_stage0_L2', [256, 52, 1, 1, 0])
	])

	# L2 branch - Stages 1-3
	for stage in range(1, 4):
	stage_blocks[f'Mconv1_stage{stage}_L2'] = OrderedDict([
	(f'Mconv1_stage{stage}_L2_0', [180, 128, 3, 1, 1]),
	(f'Mconv1_stage{stage}_L2_1', [128, 128, 3, 1, 1]),
	(f'Mconv1_stage{stage}_L2_2', [128, 128, 3, 1, 1])
	])
	for i in range(2, 6):
	stage_blocks[f'Mconv{i}_stage{stage}_L2'] = OrderedDict([
	(f'Mconv{i}_stage{stage}_L2_0', [384, 128, 3, 1, 1]),
	(f'Mconv{i}_stage{stage}_L2_1', [128, 128, 3, 1, 1]),
	(f'Mconv{i}_stage{stage}_L2_2', [128, 128, 3, 1, 1])
	])
	stage_blocks[f'Mconv6_7_stage{stage}_L2'] = OrderedDict([
	(f'Mconv6_stage{stage}_L2', [384, 512, 1, 1, 0]),
	(f'Mconv7_stage{stage}_L2', [512, 52, 1, 1, 0])
	])

	# L1 branch configurations
	stage_blocks['Mconv1_stage0_L1'] = OrderedDict([
	('Mconv1_stage0_L1_0', [180, 96, 3, 1, 1]),
	('Mconv1_stage0_L1_1', [96, 96, 3, 1, 1]),
	('Mconv1_stage0_L1_2', [96, 96, 3, 1, 1])
	])

	for i in range(2, 6):
	stage_blocks[f'Mconv{i}_stage0_L1'] = OrderedDict([
	(f'Mconv{i}_stage0_L1_0', [288, 96, 3, 1, 1]),
	(f'Mconv{i}_stage0_L1_1', [96, 96, 3, 1, 1]),
	(f'Mconv{i}_stage0_L1_2', [96, 96, 3, 1, 1])
	])

	stage_blocks['Mconv6_7_stage0_L1'] = OrderedDict([
	('Mconv6_stage0_L1', [288, 256, 1, 1, 0]),
	('Mconv7_stage0_L1', [256, 26, 1, 1, 0])
	])

	stage_blocks['Mconv1_stage1_L1'] = OrderedDict([
	('Mconv1_stage1_L1_0', [206, 128, 3, 1, 1]),
	('Mconv1_stage1_L1_1', [128, 128, 3, 1, 1]),
	('Mconv1_stage1_L1_2', [128, 128, 3, 1, 1])
	])

	for i in range(2, 6):
	stage_blocks[f'Mconv{i}_stage1_L1'] = OrderedDict([
	(f'Mconv{i}_stage1_L1_0', [384, 128, 3, 1, 1]),
	(f'Mconv{i}_stage1_L1_1', [128, 128, 3, 1, 1]),
	(f'Mconv{i}_stage1_L1_2', [128, 128, 3, 1, 1])
	])

	stage_blocks['Mconv6_7_stage1_L1'] = OrderedDict([
	('Mconv6_stage1_L1', [384, 512, 1, 1, 0]),
	('Mconv7_stage1_L1', [512, 26, 1, 1, 0])
	])

	# Build multi-conv modules
	for block_name in stage_blocks.keys():
	stage_blocks[block_name] = construct_multi_conv_layers(stage_blocks[block_name], no_relu_layers)

	self.stage_models = nn.ModuleDict(stage_blocks)

	# Freeze parameters for efficiency
	for param in self.parameters():
	param.requires_grad = False

	def _multi_conv_forward(self, x, models):
	"""Forward pass through multi-convolution blocks"""
	outputs = []
	current_output = x
	for model in models:
	current_output = model(current_output)
	outputs.append(current_output)
	return torch.cat(outputs, 1)

	def forward(self, x):
	"""Forward pass through the body pose model"""
	base_features = self.base_model(x)

	# L2 branch processing
	current_features = base_features
	for stage in range(4):
	current_features = self._multi_conv_forward(
	current_features, self.stage_models[f'Mconv1_stage{stage}_L2']
	)
	for layer in range(2, 6):
	current_features = self._multi_conv_forward(
	current_features, self.stage_models[f'Mconv{layer}_stage{stage}_L2']
	)
	current_features = self.stage_models[f'Mconv6_7_stage{stage}_L2'][0](current_features)
	current_features = self.stage_models[f'Mconv6_7_stage{stage}_L2'][1](current_features)
	l2_output = current_features
	current_features = torch.cat([base_features, current_features], 1)

	# L1 branch - Stage 0
	current_features = self._multi_conv_forward(
	current_features, self.stage_models['Mconv1_stage0_L1']
	)
	for layer in range(2, 6):
	current_features = self._multi_conv_forward(
	current_features, self.stage_models[f'Mconv{layer}_stage0_L1']
	)
	current_features = self.stage_models['Mconv6_7_stage0_L1'][0](current_features)
	current_features = self.stage_models['Mconv6_7_stage0_L1'][1](current_features)
	stage0_l1_output = current_features
	current_features = torch.cat([base_features, stage0_l1_output, l2_output], 1)

	# L1 branch - Stage 1
	current_features = self._multi_conv_forward(
	current_features, self.stage_models['Mconv1_stage1_L1']
	)
	for layer in range(2, 6):
	current_features = self._multi_conv_forward(
	current_features, self.stage_models[f'Mconv{layer}_stage1_L1']
	)
	current_features = self.stage_models['Mconv6_7_stage1_L1'][0](current_features)
	stage1_l1_output = self.stage_models['Mconv6_7_stage1_L1'][1](current_features)

	return l2_output, stage1_l1_output


	class HandPoseModel(nn.Module):
	"""
	Hand pose estimation model using 21-point hand landmarks
	Developed by TechMatrix Solvers for ISL translation
	"""

	def __init__(self):
	super(HandPoseModel, self).__init__()

	# Layers without ReLU activation
	no_relu_layers = [
	'conv6_2_CPM', 'Mconv7_stage2', 'Mconv7_stage3',
	'Mconv7_stage4', 'Mconv7_stage5', 'Mconv7_stage6'
	]

	# Stage 1 - Feature extraction
	stage1_base = OrderedDict([
	('conv1_1', [3, 64, 3, 1, 1]),
	('conv1_2', [64, 64, 3, 1, 1]),
	('pool1_stage1', [2, 2, 0]),
	('conv2_1', [64, 128, 3, 1, 1]),
	('conv2_2', [128, 128, 3, 1, 1]),
	('pool2_stage1', [2, 2, 0]),
	('conv3_1', [128, 256, 3, 1, 1]),
	('conv3_2', [256, 256, 3, 1, 1]),
	('conv3_3', [256, 256, 3, 1, 1]),
	('conv3_4', [256, 256, 3, 1, 1]),
	('pool3_stage1', [2, 2, 0]),
	('conv4_1', [256, 512, 3, 1, 1]),
	('conv4_2', [512, 512, 3, 1, 1]),
	('conv4_3', [512, 512, 3, 1, 1]),
	('conv4_4', [512, 512, 3, 1, 1]),
	('conv5_1', [512, 512, 3, 1, 1]),
	('conv5_2', [512, 512, 3, 1, 1]),
	('conv5_3_CPM', [512, 128, 3, 1, 1])
	])

	stage1_prediction = OrderedDict([
	('conv6_1_CPM', [128, 512, 1, 1, 0]),
	('conv6_2_CPM', [512, 22, 1, 1, 0])
	])

	stage_blocks = {}
	stage_blocks['stage1_base'] = stage1_base
	stage_blocks['stage1_prediction'] = stage1_prediction

	# Stages 2-6 refinement
	for i in range(2, 7):
	stage_blocks[f'stage{i}'] = OrderedDict([
	(f'Mconv1_stage{i}', [150, 128, 7, 1, 3]),
	(f'Mconv2_stage{i}', [128, 128, 7, 1, 3]),
	(f'Mconv3_stage{i}', [128, 128, 7, 1, 3]),
	(f'Mconv4_stage{i}', [128, 128, 7, 1, 3]),
	(f'Mconv5_stage{i}', [128, 128, 7, 1, 3]),
	(f'Mconv6_stage{i}', [128, 128, 1, 1, 0]),
	(f'Mconv7_stage{i}', [128, 22, 1, 1, 0])
	])

	# Build all stage models
	for block_name in stage_blocks.keys():
	stage_blocks[block_name] = construct_layers(stage_blocks[block_name], no_relu_layers)

	self.stage1_base_model = stage_blocks['stage1_base']
	self.stage1_prediction_model = stage_blocks['stage1_prediction']
	self.stage2_model = stage_blocks['stage2']
	self.stage3_model = stage_blocks['stage3']
	self.stage4_model = stage_blocks['stage4']
	self.stage5_model = stage_blocks['stage5']
	self.stage6_model = stage_blocks['stage6']

	# Freeze parameters for efficiency
	for param in self.parameters():
	param.requires_grad = False

	def forward(self, x):
	"""Forward pass through the hand pose model"""
	base_features = self.stage1_base_model(x)
	stage1_output = self.stage1_prediction_model(base_features)

	# Stage 2
	stage2_input = torch.cat([stage1_output, base_features], 1)
	stage2_output = self.stage2_model(stage2_input)

	# Stage 3
	stage3_input = torch.cat([stage2_output, base_features], 1)
	stage3_output = self.stage3_model(stage3_input)

	# Stage 4
	stage4_input = torch.cat([stage3_output, base_features], 1)
	stage4_output = self.stage4_model(stage4_input)

	# Stage 5
	stage5_input = torch.cat([stage4_output, base_features], 1)
	stage5_output = self.stage5_model(stage5_input)

	# Stage 6
	stage6_input = torch.cat([stage5_output, base_features], 1)
	stage6_output = self.stage6_model(stage6_input)

	return stage6_output


	# Factory functions for easy model instantiation
	def create_bodypose_model():
	"""Create and return body pose detection model"""
	return BodyPose25Model()


	def create_handpose_model():
	"""Create and return hand pose detection model"""
	return HandPoseModel()