|
|
"""
|
|
|
ISL Sign Language Translation - TechMatrix Solvers Initiative
|
|
|
Utility functions for pose processing and visualization
|
|
|
Developed by: TechMatrix Solvers Team
|
|
|
"""
|
|
|
|
|
|
import numpy as np
|
|
|
import math
|
|
|
import cv2
|
|
|
import matplotlib
|
|
|
from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
|
|
|
from matplotlib.figure import Figure
|
|
|
import matplotlib.pyplot as plt
|
|
|
import copy
|
|
|
import seaborn as sns
|
|
|
|
|
|
|
|
|
def pad_image_corner(img, stride, pad_value):
|
|
|
"""
|
|
|
Pad image to ensure dimensions are divisible by stride
|
|
|
|
|
|
Args:
|
|
|
img: Input image array
|
|
|
stride: Stride value for padding calculation
|
|
|
pad_value: Value to use for padding
|
|
|
"""
|
|
|
h, w = img.shape[:2]
|
|
|
|
|
|
pad = [0, 0, 0, 0]
|
|
|
pad[2] = 0 if (h % stride == 0) else stride - (h % stride)
|
|
|
pad[3] = 0 if (w % stride == 0) else stride - (w % stride)
|
|
|
|
|
|
img_padded = img
|
|
|
|
|
|
|
|
|
if pad[0] > 0:
|
|
|
pad_up = np.tile(img_padded[0:1, :, :] * 0 + pad_value, (pad[0], 1, 1))
|
|
|
img_padded = np.concatenate((pad_up, img_padded), axis=0)
|
|
|
|
|
|
if pad[1] > 0:
|
|
|
pad_left = np.tile(img_padded[:, 0:1, :] * 0 + pad_value, (1, pad[1], 1))
|
|
|
img_padded = np.concatenate((pad_left, img_padded), axis=1)
|
|
|
|
|
|
if pad[2] > 0:
|
|
|
pad_down = np.tile(img_padded[-2:-1, :, :] * 0 + pad_value, (pad[2], 1, 1))
|
|
|
img_padded = np.concatenate((img_padded, pad_down), axis=0)
|
|
|
|
|
|
if pad[3] > 0:
|
|
|
pad_right = np.tile(img_padded[:, -2:-1, :] * 0 + pad_value, (1, pad[3], 1))
|
|
|
img_padded = np.concatenate((img_padded, pad_right), axis=1)
|
|
|
|
|
|
return img_padded, pad
|
|
|
|
|
|
|
|
|
def transfer_model_weights(model, model_weights):
|
|
|
"""
|
|
|
Transfer weights from caffe model to pytorch model format
|
|
|
|
|
|
Args:
|
|
|
model: PyTorch model
|
|
|
model_weights: Dictionary of weights from caffe model
|
|
|
"""
|
|
|
transferred_weights = {}
|
|
|
for weights_name in model.state_dict().keys():
|
|
|
if len(weights_name.split('.')) > 4:
|
|
|
transferred_weights[weights_name] = model_weights['.'.join(
|
|
|
weights_name.split('.')[3:])]
|
|
|
else:
|
|
|
transferred_weights[weights_name] = model_weights['.'.join(
|
|
|
weights_name.split('.')[1:])]
|
|
|
return transferred_weights
|
|
|
|
|
|
|
|
|
def draw_body_pose_visualization(canvas, candidate, subset, model_type='body25'):
|
|
|
"""
|
|
|
Draw body pose keypoints and connections on image
|
|
|
|
|
|
Args:
|
|
|
canvas: Image to draw on
|
|
|
candidate: Detected keypoint candidates
|
|
|
subset: Valid keypoint connections
|
|
|
model_type: Type of pose model ('body25' or 'coco')
|
|
|
"""
|
|
|
stick_width = 4
|
|
|
|
|
|
if model_type == 'body25':
|
|
|
limb_sequence = [
|
|
|
[1,0],[1,2],[2,3],[3,4],[1,5],[5,6],[6,7],[1,8],[8,9],[9,10],
|
|
|
[10,11],[8,12],[12,13],[13,14],[0,15],[0,16],[15,17],[16,18],
|
|
|
[11,24],[11,22],[14,21],[14,19],[22,23],[19,20]
|
|
|
]
|
|
|
num_joints = 25
|
|
|
else:
|
|
|
limb_sequence = [
|
|
|
[1, 2], [1, 5], [2, 3], [3, 4], [5, 6], [6, 7], [1, 8], [8, 9],
|
|
|
[9, 10], [1, 11], [11, 12], [12, 13], [1, 0], [0, 14], [14, 16],
|
|
|
[0, 15], [15, 17], [2, 16], [5, 17]
|
|
|
]
|
|
|
num_joints = 18
|
|
|
|
|
|
|
|
|
colors = [
|
|
|
[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0],
|
|
|
[85, 255, 0], [0, 255, 0], [0, 255, 85], [0, 255, 170], [0, 255, 255],
|
|
|
[0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], [170, 0, 255],
|
|
|
[255, 0, 255], [255, 0, 170], [255, 0, 85], [255,255,0], [255,255,85],
|
|
|
[255,255,170], [255,255,255], [170,255,255], [85,255,255], [0,255,255]
|
|
|
]
|
|
|
|
|
|
|
|
|
for i in range(num_joints):
|
|
|
for n in range(len(subset)):
|
|
|
index = int(subset[n][i])
|
|
|
if index == -1:
|
|
|
continue
|
|
|
x, y = candidate[index][0:2]
|
|
|
cv2.circle(canvas, (int(x), int(y)), 4, colors[i], thickness=-1)
|
|
|
|
|
|
|
|
|
for i in range(num_joints - 1):
|
|
|
for n in range(len(subset)):
|
|
|
index = subset[n][np.array(limb_sequence[i])]
|
|
|
if -1 in index:
|
|
|
continue
|
|
|
current_canvas = canvas.copy()
|
|
|
Y = candidate[index.astype(int), 0]
|
|
|
X = candidate[index.astype(int), 1]
|
|
|
mean_x = np.mean(X)
|
|
|
mean_y = np.mean(Y)
|
|
|
length = ((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) ** 0.5
|
|
|
angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1]))
|
|
|
polygon = cv2.ellipse2Poly((int(mean_y), int(mean_x)),
|
|
|
(int(length / 2), stick_width),
|
|
|
int(angle), 0, 360, 1)
|
|
|
cv2.fillConvexPoly(current_canvas, polygon, colors[i])
|
|
|
canvas = cv2.addWeighted(canvas, 0.4, current_canvas, 0.6, 0)
|
|
|
|
|
|
return canvas
|
|
|
|
|
|
|
|
|
def extract_body_pose_data(candidate, subset, model_type='body25'):
|
|
|
"""
|
|
|
Extract body pose data without drawing
|
|
|
|
|
|
Returns:
|
|
|
tuple: (keypoint_circles, limb_sticks) data for further processing
|
|
|
"""
|
|
|
stick_width = 4
|
|
|
|
|
|
if model_type == 'body25':
|
|
|
limb_sequence = [
|
|
|
[1,0],[1,2],[2,3],[3,4],[1,5],[5,6],[6,7],[1,8],[8,9],[9,10],
|
|
|
[10,11],[8,12],[12,13],[13,14],[0,15],[0,16],[15,17],[16,18],
|
|
|
[11,24],[11,22],[14,21],[14,19],[22,23],[19,20]
|
|
|
]
|
|
|
num_joints = 25
|
|
|
else:
|
|
|
limb_sequence = [
|
|
|
[1, 2], [1, 5], [2, 3], [3, 4], [5, 6], [6, 7], [1, 8], [8, 9],
|
|
|
[9, 10], [1, 11], [11, 12], [12, 13], [1, 0], [0, 14], [14, 16],
|
|
|
[0, 15], [15, 17], [2, 16], [5, 17]
|
|
|
]
|
|
|
num_joints = 18
|
|
|
|
|
|
|
|
|
keypoint_circles = []
|
|
|
for i in range(num_joints):
|
|
|
for n in range(len(subset)):
|
|
|
index = int(subset[n][i])
|
|
|
if index == -1:
|
|
|
continue
|
|
|
x, y = candidate[index][0:2]
|
|
|
keypoint_circles.append((x, y))
|
|
|
|
|
|
|
|
|
limb_sticks = []
|
|
|
for i in range(num_joints - 1):
|
|
|
for n in range(len(subset)):
|
|
|
index = subset[n][np.array(limb_sequence[i])]
|
|
|
if -1 in index:
|
|
|
continue
|
|
|
Y = candidate[index.astype(int), 0]
|
|
|
X = candidate[index.astype(int), 1]
|
|
|
mean_x = np.mean(X)
|
|
|
mean_y = np.mean(Y)
|
|
|
length = ((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) ** 0.5
|
|
|
angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1]))
|
|
|
limb_sticks.append((mean_y, mean_x, angle, length))
|
|
|
|
|
|
return keypoint_circles, limb_sticks
|
|
|
|
|
|
|
|
|
def draw_hand_pose_visualization(canvas, all_hand_peaks, show_numbers=False):
|
|
|
"""
|
|
|
Draw hand pose keypoints and connections
|
|
|
|
|
|
Args:
|
|
|
canvas: Image to draw on
|
|
|
all_hand_peaks: Detected hand keypoints for both hands
|
|
|
show_numbers: Whether to show keypoint numbers
|
|
|
"""
|
|
|
edges = [
|
|
|
[0, 1], [1, 2], [2, 3], [3, 4], [0, 5], [5, 6], [6, 7], [7, 8], [0, 9], [9, 10],
|
|
|
[10, 11], [11, 12], [0, 13], [13, 14], [14, 15], [15, 16], [0, 17], [17, 18], [18, 19], [19, 20]
|
|
|
]
|
|
|
|
|
|
fig = Figure(figsize=plt.figaspect(canvas))
|
|
|
fig.subplots_adjust(0, 0, 1, 1)
|
|
|
bg = FigureCanvas(fig)
|
|
|
ax = fig.subplots()
|
|
|
ax.axis('off')
|
|
|
ax.imshow(canvas)
|
|
|
|
|
|
width, height = ax.figure.get_size_inches() * ax.figure.get_dpi()
|
|
|
|
|
|
for peaks in all_hand_peaks:
|
|
|
for ie, e in enumerate(edges):
|
|
|
if np.sum(np.all(peaks[e], axis=1) == 0) == 0:
|
|
|
x1, y1 = peaks[e[0]]
|
|
|
x2, y2 = peaks[e[1]]
|
|
|
ax.plot([x1, x2], [y1, y2],
|
|
|
color=matplotlib.colors.hsv_to_rgb([ie/float(len(edges)), 1.0, 1.0]))
|
|
|
|
|
|
for i, keypoint in enumerate(peaks):
|
|
|
x, y = keypoint
|
|
|
ax.plot(x, y, 'r.')
|
|
|
if show_numbers:
|
|
|
ax.text(x, y, str(i))
|
|
|
|
|
|
bg.draw()
|
|
|
canvas = np.fromstring(bg.tostring_rgb(), dtype='uint8').reshape(int(height), int(width), 3)
|
|
|
return canvas
|
|
|
|
|
|
|
|
|
def extract_hand_pose_data(all_hand_peaks, show_numbers=False):
|
|
|
"""
|
|
|
Extract hand pose data without drawing
|
|
|
|
|
|
Returns:
|
|
|
tuple: (hand_edges, hand_peaks) data for further processing
|
|
|
"""
|
|
|
edges = [
|
|
|
[0, 1], [1, 2], [2, 3], [3, 4], [0, 5], [5, 6], [6, 7], [7, 8], [0, 9], [9, 10],
|
|
|
[10, 11], [11, 12], [0, 13], [13, 14], [14, 15], [15, 16], [0, 17], [17, 18], [18, 19], [19, 20]
|
|
|
]
|
|
|
|
|
|
export_edges = [[], []]
|
|
|
export_peaks = [[], []]
|
|
|
|
|
|
for idx, peaks in enumerate(all_hand_peaks):
|
|
|
for ie, e in enumerate(edges):
|
|
|
if np.sum(np.all(peaks[e], axis=1) == 0) == 0:
|
|
|
x1, y1 = peaks[e[0]]
|
|
|
x2, y2 = peaks[e[1]]
|
|
|
export_edges[idx].append((ie, (x1, y1), (x2, y2)))
|
|
|
|
|
|
for i, keypoint in enumerate(peaks):
|
|
|
x, y = keypoint
|
|
|
export_peaks[idx].append((x, y, str(i)))
|
|
|
|
|
|
return export_edges, export_peaks
|
|
|
|
|
|
|
|
|
def detect_hand_regions(candidate, subset, original_image):
|
|
|
"""
|
|
|
Detect hand regions based on body pose keypoints
|
|
|
|
|
|
Args:
|
|
|
candidate: Body pose candidates
|
|
|
subset: Valid body pose connections
|
|
|
original_image: Original input image
|
|
|
|
|
|
Returns:
|
|
|
List of detected hand regions [x, y, width, is_left_hand]
|
|
|
"""
|
|
|
ratio_wrist_elbow = 0.33
|
|
|
detection_results = []
|
|
|
|
|
|
image_height, image_width = original_image.shape[0:2]
|
|
|
|
|
|
for person in subset.astype(int):
|
|
|
|
|
|
has_left_hand = np.sum(person[[5, 6, 7]] == -1) == 0
|
|
|
has_right_hand = np.sum(person[[2, 3, 4]] == -1) == 0
|
|
|
|
|
|
if not (has_left_hand or has_right_hand):
|
|
|
continue
|
|
|
|
|
|
hands = []
|
|
|
|
|
|
|
|
|
if has_left_hand:
|
|
|
left_shoulder_idx, left_elbow_idx, left_wrist_idx = person[[5, 6, 7]]
|
|
|
x1, y1 = candidate[left_shoulder_idx][:2]
|
|
|
x2, y2 = candidate[left_elbow_idx][:2]
|
|
|
x3, y3 = candidate[left_wrist_idx][:2]
|
|
|
hands.append([x1, y1, x2, y2, x3, y3, True])
|
|
|
|
|
|
|
|
|
if has_right_hand:
|
|
|
right_shoulder_idx, right_elbow_idx, right_wrist_idx = person[[2, 3, 4]]
|
|
|
x1, y1 = candidate[right_shoulder_idx][:2]
|
|
|
x2, y2 = candidate[right_elbow_idx][:2]
|
|
|
x3, y3 = candidate[right_wrist_idx][:2]
|
|
|
hands.append([x1, y1, x2, y2, x3, y3, False])
|
|
|
|
|
|
for x1, y1, x2, y2, x3, y3, is_left in hands:
|
|
|
|
|
|
x = x3 + ratio_wrist_elbow * (x3 - x2)
|
|
|
y = y3 + ratio_wrist_elbow * (y3 - y2)
|
|
|
|
|
|
distance_wrist_elbow = math.sqrt((x3 - x2) ** 2 + (y3 - y2) ** 2)
|
|
|
distance_elbow_shoulder = math.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2)
|
|
|
width = 1.5 * max(distance_wrist_elbow, 0.9 * distance_elbow_shoulder)
|
|
|
|
|
|
|
|
|
x -= width / 2
|
|
|
y -= width / 2
|
|
|
|
|
|
|
|
|
x = max(0, x)
|
|
|
y = max(0, y)
|
|
|
|
|
|
width1 = width if x + width <= image_width else image_width - x
|
|
|
width2 = width if y + width <= image_height else image_height - y
|
|
|
width = min(width1, width2)
|
|
|
|
|
|
|
|
|
if width >= 20:
|
|
|
detection_results.append([int(x), int(y), int(width), is_left])
|
|
|
|
|
|
return detection_results
|
|
|
|
|
|
|
|
|
def render_stick_model(original_img, keypoint_circles, limb_sticks, hand_edges, hand_peaks):
|
|
|
"""
|
|
|
Render complete stick model with body and hand poses
|
|
|
|
|
|
Args:
|
|
|
original_img: Original image
|
|
|
keypoint_circles: Body keypoint coordinates
|
|
|
limb_sticks: Body limb stick data
|
|
|
hand_edges: Hand connection data
|
|
|
hand_peaks: Hand keypoint data
|
|
|
"""
|
|
|
canvas = copy.deepcopy(original_img)
|
|
|
|
|
|
colors = [
|
|
|
[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0],
|
|
|
[85, 255, 0], [0, 255, 0], [0, 255, 85], [0, 255, 170], [0, 255, 255],
|
|
|
[0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], [170, 0, 255],
|
|
|
[255, 0, 255], [255, 0, 170], [255, 0, 85], [255,255,0], [255,255,85],
|
|
|
[255,255,170], [255,255,255], [170,255,255], [85,255,255], [0,255,255]
|
|
|
]
|
|
|
stick_width = 4
|
|
|
|
|
|
|
|
|
for idx, (mean_x, mean_y, angle, length) in enumerate(limb_sticks):
|
|
|
current_canvas = canvas.copy()
|
|
|
polygon = cv2.ellipse2Poly(
|
|
|
(int(mean_x), int(mean_y)),
|
|
|
(int(length / 2), stick_width),
|
|
|
int(angle), 0, 360, 1
|
|
|
)
|
|
|
cv2.fillConvexPoly(current_canvas, polygon, colors[idx])
|
|
|
canvas = cv2.addWeighted(canvas, 0.4, current_canvas, 0.6, 0)
|
|
|
|
|
|
|
|
|
for idx, (x, y) in enumerate(keypoint_circles):
|
|
|
cv2.circle(canvas, (int(x), int(y)), 4, colors[idx], thickness=-1)
|
|
|
|
|
|
|
|
|
fig = Figure(figsize=plt.figaspect(canvas))
|
|
|
fig.subplots_adjust(0, 0, 1, 1)
|
|
|
ax = fig.subplots()
|
|
|
ax.axis('off')
|
|
|
ax.imshow(canvas)
|
|
|
|
|
|
edges = [
|
|
|
[0, 1], [1, 2], [2, 3], [3, 4], [0, 5], [5, 6], [6, 7], [7, 8], [0, 9],
|
|
|
[9, 10], [10, 11], [11, 12], [0, 13], [13, 14], [14, 15], [15, 16],
|
|
|
[0, 17], [17, 18], [18, 19], [19, 20]
|
|
|
]
|
|
|
|
|
|
for hand_edge_set in hand_edges:
|
|
|
for (ie, (x1, y1), (x2, y2)) in hand_edge_set:
|
|
|
ax.plot([x1, x2], [y1, y2],
|
|
|
color=matplotlib.colors.hsv_to_rgb([ie/float(len(edges)), 1.0, 1.0]))
|
|
|
|
|
|
for hand_peak_set in hand_peaks:
|
|
|
for (x, y, text) in hand_peak_set:
|
|
|
ax.plot(x, y, 'r.')
|
|
|
|
|
|
|
|
|
bg = FigureCanvas(fig)
|
|
|
bg.draw()
|
|
|
|
|
|
width, height = fig.get_size_inches() * fig.get_dpi()
|
|
|
buf = bg.buffer_rgba()
|
|
|
canvas = np.frombuffer(buf, dtype=np.uint8).reshape(int(height), int(width), 4)
|
|
|
canvas = canvas[:, :, :3]
|
|
|
|
|
|
plt.close(fig)
|
|
|
return cv2.resize(canvas, (math.ceil(width), math.ceil(height)))
|
|
|
|
|
|
|
|
|
def create_bar_plot_visualization(image, predictions, title, orig_img):
|
|
|
"""
|
|
|
Create bar plot visualization below the image
|
|
|
|
|
|
Args:
|
|
|
image: Input image
|
|
|
predictions: Dictionary of prediction probabilities
|
|
|
title: Plot title
|
|
|
orig_img: Original image for sizing
|
|
|
"""
|
|
|
|
|
|
if not predictions or len(predictions) == 0:
|
|
|
|
|
|
fig, ax = plt.subplots(figsize=(orig_img.shape[1]/100, orig_img.shape[0]/200), dpi=100)
|
|
|
ax.text(0.5, 0.5, 'No Predictions Available',
|
|
|
horizontalalignment='center', verticalalignment='center',
|
|
|
transform=ax.transAxes, fontsize=14)
|
|
|
ax.set_title(title)
|
|
|
ax.set_xlim(0, 1)
|
|
|
ax.set_ylim(0, 1)
|
|
|
ax.set_xticks([])
|
|
|
ax.set_yticks([])
|
|
|
else:
|
|
|
fig, ax = plt.subplots(figsize=(orig_img.shape[1]/100, orig_img.shape[0]/200), dpi=100)
|
|
|
plt.title(title)
|
|
|
|
|
|
|
|
|
labels = list(predictions.keys())
|
|
|
probabilities = list(predictions.values())
|
|
|
|
|
|
|
|
|
sns.barplot(x=labels, y=probabilities, ax=ax)
|
|
|
|
|
|
fig.canvas.draw()
|
|
|
|
|
|
|
|
|
plot_image = np.array(fig.canvas.renderer.buffer_rgba())[:, :, :3]
|
|
|
plt.close(fig)
|
|
|
|
|
|
|
|
|
combined_image = np.vstack((image, cv2.resize(plot_image, (image.shape[1], plot_image.shape[0]))))
|
|
|
|
|
|
return combined_image
|
|
|
|
|
|
|
|
|
def add_bottom_padding(image, pad_value, pad_height):
|
|
|
"""
|
|
|
Add padding to the bottom of an image
|
|
|
|
|
|
Args:
|
|
|
image: Input image
|
|
|
pad_value: Color value for padding (tuple or int)
|
|
|
pad_height: Height of padding to add
|
|
|
"""
|
|
|
height, width, channels = image.shape
|
|
|
padding = np.zeros((pad_height, width, channels), dtype=image.dtype)
|
|
|
padding[:, :, :] = pad_value
|
|
|
|
|
|
return np.vstack((image, padding))
|
|
|
|
|
|
|
|
|
def find_array_maximum(array):
|
|
|
"""
|
|
|
Get maximum index of 2D array
|
|
|
|
|
|
Args:
|
|
|
array: 2D numpy array
|
|
|
|
|
|
Returns:
|
|
|
tuple: (row_index, col_index) of maximum value
|
|
|
"""
|
|
|
array_index = array.argmax(1)
|
|
|
array_value = array.max(1)
|
|
|
i = array_value.argmax()
|
|
|
j = array_index[i]
|
|
|
return i, j |