Spaces:

Abs6187
/

ISL_Sign_Language_Translation

Sleeping

App Files Files Community

ISL_Sign_Language_Translation / pose_utils.py

Abs6187

Upload 2 files

6739284 verified 3 months ago

raw

history blame contribute delete

17.3 kB

	"""
	ISL Sign Language Translation - TechMatrix Solvers Initiative
	Utility functions for pose processing and visualization
	Developed by: TechMatrix Solvers Team
	"""

	import numpy as np
	import math
	import cv2
	import matplotlib
	from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
	from matplotlib.figure import Figure
	import matplotlib.pyplot as plt
	import copy
	import seaborn as sns


	def pad_image_corner(img, stride, pad_value):
	"""
	Pad image to ensure dimensions are divisible by stride

	Args:
	img: Input image array
	stride: Stride value for padding calculation
	pad_value: Value to use for padding
	"""
	h, w = img.shape[:2]

	pad = [0, 0, 0, 0] # [up, left, down, right]
	pad[2] = 0 if (h % stride == 0) else stride - (h % stride) # down
	pad[3] = 0 if (w % stride == 0) else stride - (w % stride) # right

	img_padded = img

	# Add padding
	if pad[0] > 0: # up
	pad_up = np.tile(img_padded[0:1, :, :] * 0 + pad_value, (pad[0], 1, 1))
	img_padded = np.concatenate((pad_up, img_padded), axis=0)

	if pad[1] > 0: # left
	pad_left = np.tile(img_padded[:, 0:1, :] * 0 + pad_value, (1, pad[1], 1))
	img_padded = np.concatenate((pad_left, img_padded), axis=1)

	if pad[2] > 0: # down
	pad_down = np.tile(img_padded[-2:-1, :, :] * 0 + pad_value, (pad[2], 1, 1))
	img_padded = np.concatenate((img_padded, pad_down), axis=0)

	if pad[3] > 0: # right
	pad_right = np.tile(img_padded[:, -2:-1, :] * 0 + pad_value, (1, pad[3], 1))
	img_padded = np.concatenate((img_padded, pad_right), axis=1)

	return img_padded, pad


	def transfer_model_weights(model, model_weights):
	"""
	Transfer weights from caffe model to pytorch model format

	Args:
	model: PyTorch model
	model_weights: Dictionary of weights from caffe model
	"""
	transferred_weights = {}
	for weights_name in model.state_dict().keys():
	if len(weights_name.split('.')) > 4: # body25 format
	transferred_weights[weights_name] = model_weights['.'.join(
	weights_name.split('.')[3:])]
	else:
	transferred_weights[weights_name] = model_weights['.'.join(
	weights_name.split('.')[1:])]
	return transferred_weights


	def draw_body_pose_visualization(canvas, candidate, subset, model_type='body25'):
	"""
	Draw body pose keypoints and connections on image

	Args:
	canvas: Image to draw on
	candidate: Detected keypoint candidates
	subset: Valid keypoint connections
	model_type: Type of pose model ('body25' or 'coco')
	"""
	stick_width = 4

	if model_type == 'body25':
	limb_sequence = [
	[1,0],[1,2],[2,3],[3,4],[1,5],[5,6],[6,7],[1,8],[8,9],[9,10],
	[10,11],[8,12],[12,13],[13,14],[0,15],[0,16],[15,17],[16,18],
	[11,24],[11,22],[14,21],[14,19],[22,23],[19,20]
	]
	num_joints = 25
	else:
	limb_sequence = [
	[1, 2], [1, 5], [2, 3], [3, 4], [5, 6], [6, 7], [1, 8], [8, 9],
	[9, 10], [1, 11], [11, 12], [12, 13], [1, 0], [0, 14], [14, 16],
	[0, 15], [15, 17], [2, 16], [5, 17]
	]
	num_joints = 18

	# Color scheme for different joints
	colors = [
	[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0],
	[85, 255, 0], [0, 255, 0], [0, 255, 85], [0, 255, 170], [0, 255, 255],
	[0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], [170, 0, 255],
	[255, 0, 255], [255, 0, 170], [255, 0, 85], [255,255,0], [255,255,85],
	[255,255,170], [255,255,255], [170,255,255], [85,255,255], [0,255,255]
	]

	# Draw keypoints
	for i in range(num_joints):
	for n in range(len(subset)):
	index = int(subset[n][i])
	if index == -1:
	continue
	x, y = candidate[index][0:2]
	cv2.circle(canvas, (int(x), int(y)), 4, colors[i], thickness=-1)

	# Draw limbs
	for i in range(num_joints - 1):
	for n in range(len(subset)):
	index = subset[n][np.array(limb_sequence[i])]
	if -1 in index:
	continue
	current_canvas = canvas.copy()
	Y = candidate[index.astype(int), 0]
	X = candidate[index.astype(int), 1]
	mean_x = np.mean(X)
	mean_y = np.mean(Y)
	length = ((X[0] - X[1]) 2 + (Y[0] - Y[1]) 2) ** 0.5
	angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1]))
	polygon = cv2.ellipse2Poly((int(mean_y), int(mean_x)),
	(int(length / 2), stick_width),
	int(angle), 0, 360, 1)
	cv2.fillConvexPoly(current_canvas, polygon, colors[i])
	canvas = cv2.addWeighted(canvas, 0.4, current_canvas, 0.6, 0)

	return canvas


	def extract_body_pose_data(candidate, subset, model_type='body25'):
	"""
	Extract body pose data without drawing

	Returns:
	tuple: (keypoint_circles, limb_sticks) data for further processing
	"""
	stick_width = 4

	if model_type == 'body25':
	limb_sequence = [
	[1,0],[1,2],[2,3],[3,4],[1,5],[5,6],[6,7],[1,8],[8,9],[9,10],
	[10,11],[8,12],[12,13],[13,14],[0,15],[0,16],[15,17],[16,18],
	[11,24],[11,22],[14,21],[14,19],[22,23],[19,20]
	]
	num_joints = 25
	else:
	limb_sequence = [
	[1, 2], [1, 5], [2, 3], [3, 4], [5, 6], [6, 7], [1, 8], [8, 9],
	[9, 10], [1, 11], [11, 12], [12, 13], [1, 0], [0, 14], [14, 16],
	[0, 15], [15, 17], [2, 16], [5, 17]
	]
	num_joints = 18

	# Extract keypoint coordinates
	keypoint_circles = []
	for i in range(num_joints):
	for n in range(len(subset)):
	index = int(subset[n][i])
	if index == -1:
	continue
	x, y = candidate[index][0:2]
	keypoint_circles.append((x, y))

	# Extract limb stick data
	limb_sticks = []
	for i in range(num_joints - 1):
	for n in range(len(subset)):
	index = subset[n][np.array(limb_sequence[i])]
	if -1 in index:
	continue
	Y = candidate[index.astype(int), 0]
	X = candidate[index.astype(int), 1]
	mean_x = np.mean(X)
	mean_y = np.mean(Y)
	length = ((X[0] - X[1]) 2 + (Y[0] - Y[1]) 2) ** 0.5
	angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1]))
	limb_sticks.append((mean_y, mean_x, angle, length))

	return keypoint_circles, limb_sticks


	def draw_hand_pose_visualization(canvas, all_hand_peaks, show_numbers=False):
	"""
	Draw hand pose keypoints and connections

	Args:
	canvas: Image to draw on
	all_hand_peaks: Detected hand keypoints for both hands
	show_numbers: Whether to show keypoint numbers
	"""
	edges = [
	[0, 1], [1, 2], [2, 3], [3, 4], [0, 5], [5, 6], [6, 7], [7, 8], [0, 9], [9, 10],
	[10, 11], [11, 12], [0, 13], [13, 14], [14, 15], [15, 16], [0, 17], [17, 18], [18, 19], [19, 20]
	]

	fig = Figure(figsize=plt.figaspect(canvas))
	fig.subplots_adjust(0, 0, 1, 1)
	bg = FigureCanvas(fig)
	ax = fig.subplots()
	ax.axis('off')
	ax.imshow(canvas)

	width, height = ax.figure.get_size_inches() * ax.figure.get_dpi()

	for peaks in all_hand_peaks:
	for ie, e in enumerate(edges):
	if np.sum(np.all(peaks[e], axis=1) == 0) == 0:
	x1, y1 = peaks[e[0]]
	x2, y2 = peaks[e[1]]
	ax.plot([x1, x2], [y1, y2],
	color=matplotlib.colors.hsv_to_rgb([ie/float(len(edges)), 1.0, 1.0]))

	for i, keypoint in enumerate(peaks):
	x, y = keypoint
	ax.plot(x, y, 'r.')
	if show_numbers:
	ax.text(x, y, str(i))

	bg.draw()
	canvas = np.fromstring(bg.tostring_rgb(), dtype='uint8').reshape(int(height), int(width), 3)
	return canvas


	def extract_hand_pose_data(all_hand_peaks, show_numbers=False):
	"""
	Extract hand pose data without drawing

	Returns:
	tuple: (hand_edges, hand_peaks) data for further processing
	"""
	edges = [
	[0, 1], [1, 2], [2, 3], [3, 4], [0, 5], [5, 6], [6, 7], [7, 8], [0, 9], [9, 10],
	[10, 11], [11, 12], [0, 13], [13, 14], [14, 15], [15, 16], [0, 17], [17, 18], [18, 19], [19, 20]
	]

	export_edges = [[], []]
	export_peaks = [[], []]

	for idx, peaks in enumerate(all_hand_peaks):
	for ie, e in enumerate(edges):
	if np.sum(np.all(peaks[e], axis=1) == 0) == 0:
	x1, y1 = peaks[e[0]]
	x2, y2 = peaks[e[1]]
	export_edges[idx].append((ie, (x1, y1), (x2, y2)))

	for i, keypoint in enumerate(peaks):
	x, y = keypoint
	export_peaks[idx].append((x, y, str(i)))

	return export_edges, export_peaks


	def detect_hand_regions(candidate, subset, original_image):
	"""
	Detect hand regions based on body pose keypoints

	Args:
	candidate: Body pose candidates
	subset: Valid body pose connections
	original_image: Original input image

	Returns:
	List of detected hand regions [x, y, width, is_left_hand]
	"""
	ratio_wrist_elbow = 0.33
	detection_results = []

	image_height, image_width = original_image.shape[0:2]

	for person in subset.astype(int):
	# Check if left hand keypoints exist (shoulder, elbow, wrist)
	has_left_hand = np.sum(person[[5, 6, 7]] == -1) == 0
	has_right_hand = np.sum(person[[2, 3, 4]] == -1) == 0

	if not (has_left_hand or has_right_hand):
	continue

	hands = []

	# Process left hand
	if has_left_hand:
	left_shoulder_idx, left_elbow_idx, left_wrist_idx = person[[5, 6, 7]]
	x1, y1 = candidate[left_shoulder_idx][:2]
	x2, y2 = candidate[left_elbow_idx][:2]
	x3, y3 = candidate[left_wrist_idx][:2]
	hands.append([x1, y1, x2, y2, x3, y3, True])

	# Process right hand
	if has_right_hand:
	right_shoulder_idx, right_elbow_idx, right_wrist_idx = person[[2, 3, 4]]
	x1, y1 = candidate[right_shoulder_idx][:2]
	x2, y2 = candidate[right_elbow_idx][:2]
	x3, y3 = candidate[right_wrist_idx][:2]
	hands.append([x1, y1, x2, y2, x3, y3, False])

	for x1, y1, x2, y2, x3, y3, is_left in hands:
	# Calculate hand region based on wrist and elbow positions
	x = x3 + ratio_wrist_elbow * (x3 - x2)
	y = y3 + ratio_wrist_elbow * (y3 - y2)

	distance_wrist_elbow = math.sqrt((x3 - x2) 2 + (y3 - y2) 2)
	distance_elbow_shoulder = math.sqrt((x2 - x1) 2 + (y2 - y1) 2)
	width = 1.5 * max(distance_wrist_elbow, 0.9 * distance_elbow_shoulder)

	# Adjust to top-left corner
	x -= width / 2
	y -= width / 2

	# Ensure bounds are within image
	x = max(0, x)
	y = max(0, y)

	width1 = width if x + width <= image_width else image_width - x
	width2 = width if y + width <= image_height else image_height - y
	width = min(width1, width2)

	# Only include if region is large enough
	if width >= 20:
	detection_results.append([int(x), int(y), int(width), is_left])

	return detection_results


	def render_stick_model(original_img, keypoint_circles, limb_sticks, hand_edges, hand_peaks):
	"""
	Render complete stick model with body and hand poses

	Args:
	original_img: Original image
	keypoint_circles: Body keypoint coordinates
	limb_sticks: Body limb stick data
	hand_edges: Hand connection data
	hand_peaks: Hand keypoint data
	"""
	canvas = copy.deepcopy(original_img)

	colors = [
	[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0],
	[85, 255, 0], [0, 255, 0], [0, 255, 85], [0, 255, 170], [0, 255, 255],
	[0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], [170, 0, 255],
	[255, 0, 255], [255, 0, 170], [255, 0, 85], [255,255,0], [255,255,85],
	[255,255,170], [255,255,255], [170,255,255], [85,255,255], [0,255,255]
	]
	stick_width = 4

	# Draw body limbs
	for idx, (mean_x, mean_y, angle, length) in enumerate(limb_sticks):
	current_canvas = canvas.copy()
	polygon = cv2.ellipse2Poly(
	(int(mean_x), int(mean_y)),
	(int(length / 2), stick_width),
	int(angle), 0, 360, 1
	)
	cv2.fillConvexPoly(current_canvas, polygon, colors[idx])
	canvas = cv2.addWeighted(canvas, 0.4, current_canvas, 0.6, 0)

	# Draw body keypoints
	for idx, (x, y) in enumerate(keypoint_circles):
	cv2.circle(canvas, (int(x), int(y)), 4, colors[idx], thickness=-1)

	# Draw hand poses using matplotlib
	fig = Figure(figsize=plt.figaspect(canvas))
	fig.subplots_adjust(0, 0, 1, 1)
	ax = fig.subplots()
	ax.axis('off')
	ax.imshow(canvas)

	edges = [
	[0, 1], [1, 2], [2, 3], [3, 4], [0, 5], [5, 6], [6, 7], [7, 8], [0, 9],
	[9, 10], [10, 11], [11, 12], [0, 13], [13, 14], [14, 15], [15, 16],
	[0, 17], [17, 18], [18, 19], [19, 20]
	]

	for hand_edge_set in hand_edges:
	for (ie, (x1, y1), (x2, y2)) in hand_edge_set:
	ax.plot([x1, x2], [y1, y2],
	color=matplotlib.colors.hsv_to_rgb([ie/float(len(edges)), 1.0, 1.0]))

	for hand_peak_set in hand_peaks:
	for (x, y, text) in hand_peak_set:
	ax.plot(x, y, 'r.')

	# Convert figure to numpy array
	bg = FigureCanvas(fig)
	bg.draw()

	width, height = fig.get_size_inches() * fig.get_dpi()
	buf = bg.buffer_rgba()
	canvas = np.frombuffer(buf, dtype=np.uint8).reshape(int(height), int(width), 4)
	canvas = canvas[:, :, :3] # Keep only RGB channels

	plt.close(fig) # Clean up
	return cv2.resize(canvas, (math.ceil(width), math.ceil(height)))


	def create_bar_plot_visualization(image, predictions, title, orig_img):
	"""
	Create bar plot visualization below the image

	Args:
	image: Input image
	predictions: Dictionary of prediction probabilities
	title: Plot title
	orig_img: Original image for sizing
	"""
	# Handle empty predictions case
	if not predictions or len(predictions) == 0:
	# Create a simple plot showing "No predictions available"
	fig, ax = plt.subplots(figsize=(orig_img.shape[1]/100, orig_img.shape[0]/200), dpi=100)
	ax.text(0.5, 0.5, 'No Predictions Available',
	horizontalalignment='center', verticalalignment='center',
	transform=ax.transAxes, fontsize=14)
	ax.set_title(title)
	ax.set_xlim(0, 1)
	ax.set_ylim(0, 1)
	ax.set_xticks([])
	ax.set_yticks([])
	else:
	fig, ax = plt.subplots(figsize=(orig_img.shape[1]/100, orig_img.shape[0]/200), dpi=100)
	plt.title(title)

	# Create bar plot data
	labels = list(predictions.keys())
	probabilities = list(predictions.values())

	# Create seaborn bar plot
	sns.barplot(x=labels, y=probabilities, ax=ax)

	fig.canvas.draw()

	# Convert plot to numpy array
	plot_image = np.array(fig.canvas.renderer.buffer_rgba())[:, :, :3] # Remove alpha
	plt.close(fig) # Close to avoid memory leaks

	# Combine image and plot vertically
	combined_image = np.vstack((image, cv2.resize(plot_image, (image.shape[1], plot_image.shape[0]))))

	return combined_image


	def add_bottom_padding(image, pad_value, pad_height):
	"""
	Add padding to the bottom of an image

	Args:
	image: Input image
	pad_value: Color value for padding (tuple or int)
	pad_height: Height of padding to add
	"""
	height, width, channels = image.shape
	padding = np.zeros((pad_height, width, channels), dtype=image.dtype)
	padding[:, :, :] = pad_value

	return np.vstack((image, padding))


	def find_array_maximum(array):
	"""
	Get maximum index of 2D array

	Args:
	array: 2D numpy array

	Returns:
	tuple: (row_index, col_index) of maximum value
	"""
	array_index = array.argmax(1)
	array_value = array.max(1)
	i = array_value.argmax()
	j = array_index[i]
	return i, j