Abs6187's picture
Upload 2 files
6739284 verified
"""
ISL Sign Language Translation - TechMatrix Solvers Initiative
Utility functions for pose processing and visualization
Developed by: TechMatrix Solvers Team
"""
import numpy as np
import math
import cv2
import matplotlib
from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
from matplotlib.figure import Figure
import matplotlib.pyplot as plt
import copy
import seaborn as sns
def pad_image_corner(img, stride, pad_value):
"""
Pad image to ensure dimensions are divisible by stride
Args:
img: Input image array
stride: Stride value for padding calculation
pad_value: Value to use for padding
"""
h, w = img.shape[:2]
pad = [0, 0, 0, 0] # [up, left, down, right]
pad[2] = 0 if (h % stride == 0) else stride - (h % stride) # down
pad[3] = 0 if (w % stride == 0) else stride - (w % stride) # right
img_padded = img
# Add padding
if pad[0] > 0: # up
pad_up = np.tile(img_padded[0:1, :, :] * 0 + pad_value, (pad[0], 1, 1))
img_padded = np.concatenate((pad_up, img_padded), axis=0)
if pad[1] > 0: # left
pad_left = np.tile(img_padded[:, 0:1, :] * 0 + pad_value, (1, pad[1], 1))
img_padded = np.concatenate((pad_left, img_padded), axis=1)
if pad[2] > 0: # down
pad_down = np.tile(img_padded[-2:-1, :, :] * 0 + pad_value, (pad[2], 1, 1))
img_padded = np.concatenate((img_padded, pad_down), axis=0)
if pad[3] > 0: # right
pad_right = np.tile(img_padded[:, -2:-1, :] * 0 + pad_value, (1, pad[3], 1))
img_padded = np.concatenate((img_padded, pad_right), axis=1)
return img_padded, pad
def transfer_model_weights(model, model_weights):
"""
Transfer weights from caffe model to pytorch model format
Args:
model: PyTorch model
model_weights: Dictionary of weights from caffe model
"""
transferred_weights = {}
for weights_name in model.state_dict().keys():
if len(weights_name.split('.')) > 4: # body25 format
transferred_weights[weights_name] = model_weights['.'.join(
weights_name.split('.')[3:])]
else:
transferred_weights[weights_name] = model_weights['.'.join(
weights_name.split('.')[1:])]
return transferred_weights
def draw_body_pose_visualization(canvas, candidate, subset, model_type='body25'):
"""
Draw body pose keypoints and connections on image
Args:
canvas: Image to draw on
candidate: Detected keypoint candidates
subset: Valid keypoint connections
model_type: Type of pose model ('body25' or 'coco')
"""
stick_width = 4
if model_type == 'body25':
limb_sequence = [
[1,0],[1,2],[2,3],[3,4],[1,5],[5,6],[6,7],[1,8],[8,9],[9,10],
[10,11],[8,12],[12,13],[13,14],[0,15],[0,16],[15,17],[16,18],
[11,24],[11,22],[14,21],[14,19],[22,23],[19,20]
]
num_joints = 25
else:
limb_sequence = [
[1, 2], [1, 5], [2, 3], [3, 4], [5, 6], [6, 7], [1, 8], [8, 9],
[9, 10], [1, 11], [11, 12], [12, 13], [1, 0], [0, 14], [14, 16],
[0, 15], [15, 17], [2, 16], [5, 17]
]
num_joints = 18
# Color scheme for different joints
colors = [
[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0],
[85, 255, 0], [0, 255, 0], [0, 255, 85], [0, 255, 170], [0, 255, 255],
[0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], [170, 0, 255],
[255, 0, 255], [255, 0, 170], [255, 0, 85], [255,255,0], [255,255,85],
[255,255,170], [255,255,255], [170,255,255], [85,255,255], [0,255,255]
]
# Draw keypoints
for i in range(num_joints):
for n in range(len(subset)):
index = int(subset[n][i])
if index == -1:
continue
x, y = candidate[index][0:2]
cv2.circle(canvas, (int(x), int(y)), 4, colors[i], thickness=-1)
# Draw limbs
for i in range(num_joints - 1):
for n in range(len(subset)):
index = subset[n][np.array(limb_sequence[i])]
if -1 in index:
continue
current_canvas = canvas.copy()
Y = candidate[index.astype(int), 0]
X = candidate[index.astype(int), 1]
mean_x = np.mean(X)
mean_y = np.mean(Y)
length = ((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) ** 0.5
angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1]))
polygon = cv2.ellipse2Poly((int(mean_y), int(mean_x)),
(int(length / 2), stick_width),
int(angle), 0, 360, 1)
cv2.fillConvexPoly(current_canvas, polygon, colors[i])
canvas = cv2.addWeighted(canvas, 0.4, current_canvas, 0.6, 0)
return canvas
def extract_body_pose_data(candidate, subset, model_type='body25'):
"""
Extract body pose data without drawing
Returns:
tuple: (keypoint_circles, limb_sticks) data for further processing
"""
stick_width = 4
if model_type == 'body25':
limb_sequence = [
[1,0],[1,2],[2,3],[3,4],[1,5],[5,6],[6,7],[1,8],[8,9],[9,10],
[10,11],[8,12],[12,13],[13,14],[0,15],[0,16],[15,17],[16,18],
[11,24],[11,22],[14,21],[14,19],[22,23],[19,20]
]
num_joints = 25
else:
limb_sequence = [
[1, 2], [1, 5], [2, 3], [3, 4], [5, 6], [6, 7], [1, 8], [8, 9],
[9, 10], [1, 11], [11, 12], [12, 13], [1, 0], [0, 14], [14, 16],
[0, 15], [15, 17], [2, 16], [5, 17]
]
num_joints = 18
# Extract keypoint coordinates
keypoint_circles = []
for i in range(num_joints):
for n in range(len(subset)):
index = int(subset[n][i])
if index == -1:
continue
x, y = candidate[index][0:2]
keypoint_circles.append((x, y))
# Extract limb stick data
limb_sticks = []
for i in range(num_joints - 1):
for n in range(len(subset)):
index = subset[n][np.array(limb_sequence[i])]
if -1 in index:
continue
Y = candidate[index.astype(int), 0]
X = candidate[index.astype(int), 1]
mean_x = np.mean(X)
mean_y = np.mean(Y)
length = ((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) ** 0.5
angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1]))
limb_sticks.append((mean_y, mean_x, angle, length))
return keypoint_circles, limb_sticks
def draw_hand_pose_visualization(canvas, all_hand_peaks, show_numbers=False):
"""
Draw hand pose keypoints and connections
Args:
canvas: Image to draw on
all_hand_peaks: Detected hand keypoints for both hands
show_numbers: Whether to show keypoint numbers
"""
edges = [
[0, 1], [1, 2], [2, 3], [3, 4], [0, 5], [5, 6], [6, 7], [7, 8], [0, 9], [9, 10],
[10, 11], [11, 12], [0, 13], [13, 14], [14, 15], [15, 16], [0, 17], [17, 18], [18, 19], [19, 20]
]
fig = Figure(figsize=plt.figaspect(canvas))
fig.subplots_adjust(0, 0, 1, 1)
bg = FigureCanvas(fig)
ax = fig.subplots()
ax.axis('off')
ax.imshow(canvas)
width, height = ax.figure.get_size_inches() * ax.figure.get_dpi()
for peaks in all_hand_peaks:
for ie, e in enumerate(edges):
if np.sum(np.all(peaks[e], axis=1) == 0) == 0:
x1, y1 = peaks[e[0]]
x2, y2 = peaks[e[1]]
ax.plot([x1, x2], [y1, y2],
color=matplotlib.colors.hsv_to_rgb([ie/float(len(edges)), 1.0, 1.0]))
for i, keypoint in enumerate(peaks):
x, y = keypoint
ax.plot(x, y, 'r.')
if show_numbers:
ax.text(x, y, str(i))
bg.draw()
canvas = np.fromstring(bg.tostring_rgb(), dtype='uint8').reshape(int(height), int(width), 3)
return canvas
def extract_hand_pose_data(all_hand_peaks, show_numbers=False):
"""
Extract hand pose data without drawing
Returns:
tuple: (hand_edges, hand_peaks) data for further processing
"""
edges = [
[0, 1], [1, 2], [2, 3], [3, 4], [0, 5], [5, 6], [6, 7], [7, 8], [0, 9], [9, 10],
[10, 11], [11, 12], [0, 13], [13, 14], [14, 15], [15, 16], [0, 17], [17, 18], [18, 19], [19, 20]
]
export_edges = [[], []]
export_peaks = [[], []]
for idx, peaks in enumerate(all_hand_peaks):
for ie, e in enumerate(edges):
if np.sum(np.all(peaks[e], axis=1) == 0) == 0:
x1, y1 = peaks[e[0]]
x2, y2 = peaks[e[1]]
export_edges[idx].append((ie, (x1, y1), (x2, y2)))
for i, keypoint in enumerate(peaks):
x, y = keypoint
export_peaks[idx].append((x, y, str(i)))
return export_edges, export_peaks
def detect_hand_regions(candidate, subset, original_image):
"""
Detect hand regions based on body pose keypoints
Args:
candidate: Body pose candidates
subset: Valid body pose connections
original_image: Original input image
Returns:
List of detected hand regions [x, y, width, is_left_hand]
"""
ratio_wrist_elbow = 0.33
detection_results = []
image_height, image_width = original_image.shape[0:2]
for person in subset.astype(int):
# Check if left hand keypoints exist (shoulder, elbow, wrist)
has_left_hand = np.sum(person[[5, 6, 7]] == -1) == 0
has_right_hand = np.sum(person[[2, 3, 4]] == -1) == 0
if not (has_left_hand or has_right_hand):
continue
hands = []
# Process left hand
if has_left_hand:
left_shoulder_idx, left_elbow_idx, left_wrist_idx = person[[5, 6, 7]]
x1, y1 = candidate[left_shoulder_idx][:2]
x2, y2 = candidate[left_elbow_idx][:2]
x3, y3 = candidate[left_wrist_idx][:2]
hands.append([x1, y1, x2, y2, x3, y3, True])
# Process right hand
if has_right_hand:
right_shoulder_idx, right_elbow_idx, right_wrist_idx = person[[2, 3, 4]]
x1, y1 = candidate[right_shoulder_idx][:2]
x2, y2 = candidate[right_elbow_idx][:2]
x3, y3 = candidate[right_wrist_idx][:2]
hands.append([x1, y1, x2, y2, x3, y3, False])
for x1, y1, x2, y2, x3, y3, is_left in hands:
# Calculate hand region based on wrist and elbow positions
x = x3 + ratio_wrist_elbow * (x3 - x2)
y = y3 + ratio_wrist_elbow * (y3 - y2)
distance_wrist_elbow = math.sqrt((x3 - x2) ** 2 + (y3 - y2) ** 2)
distance_elbow_shoulder = math.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2)
width = 1.5 * max(distance_wrist_elbow, 0.9 * distance_elbow_shoulder)
# Adjust to top-left corner
x -= width / 2
y -= width / 2
# Ensure bounds are within image
x = max(0, x)
y = max(0, y)
width1 = width if x + width <= image_width else image_width - x
width2 = width if y + width <= image_height else image_height - y
width = min(width1, width2)
# Only include if region is large enough
if width >= 20:
detection_results.append([int(x), int(y), int(width), is_left])
return detection_results
def render_stick_model(original_img, keypoint_circles, limb_sticks, hand_edges, hand_peaks):
"""
Render complete stick model with body and hand poses
Args:
original_img: Original image
keypoint_circles: Body keypoint coordinates
limb_sticks: Body limb stick data
hand_edges: Hand connection data
hand_peaks: Hand keypoint data
"""
canvas = copy.deepcopy(original_img)
colors = [
[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0],
[85, 255, 0], [0, 255, 0], [0, 255, 85], [0, 255, 170], [0, 255, 255],
[0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], [170, 0, 255],
[255, 0, 255], [255, 0, 170], [255, 0, 85], [255,255,0], [255,255,85],
[255,255,170], [255,255,255], [170,255,255], [85,255,255], [0,255,255]
]
stick_width = 4
# Draw body limbs
for idx, (mean_x, mean_y, angle, length) in enumerate(limb_sticks):
current_canvas = canvas.copy()
polygon = cv2.ellipse2Poly(
(int(mean_x), int(mean_y)),
(int(length / 2), stick_width),
int(angle), 0, 360, 1
)
cv2.fillConvexPoly(current_canvas, polygon, colors[idx])
canvas = cv2.addWeighted(canvas, 0.4, current_canvas, 0.6, 0)
# Draw body keypoints
for idx, (x, y) in enumerate(keypoint_circles):
cv2.circle(canvas, (int(x), int(y)), 4, colors[idx], thickness=-1)
# Draw hand poses using matplotlib
fig = Figure(figsize=plt.figaspect(canvas))
fig.subplots_adjust(0, 0, 1, 1)
ax = fig.subplots()
ax.axis('off')
ax.imshow(canvas)
edges = [
[0, 1], [1, 2], [2, 3], [3, 4], [0, 5], [5, 6], [6, 7], [7, 8], [0, 9],
[9, 10], [10, 11], [11, 12], [0, 13], [13, 14], [14, 15], [15, 16],
[0, 17], [17, 18], [18, 19], [19, 20]
]
for hand_edge_set in hand_edges:
for (ie, (x1, y1), (x2, y2)) in hand_edge_set:
ax.plot([x1, x2], [y1, y2],
color=matplotlib.colors.hsv_to_rgb([ie/float(len(edges)), 1.0, 1.0]))
for hand_peak_set in hand_peaks:
for (x, y, text) in hand_peak_set:
ax.plot(x, y, 'r.')
# Convert figure to numpy array
bg = FigureCanvas(fig)
bg.draw()
width, height = fig.get_size_inches() * fig.get_dpi()
buf = bg.buffer_rgba()
canvas = np.frombuffer(buf, dtype=np.uint8).reshape(int(height), int(width), 4)
canvas = canvas[:, :, :3] # Keep only RGB channels
plt.close(fig) # Clean up
return cv2.resize(canvas, (math.ceil(width), math.ceil(height)))
def create_bar_plot_visualization(image, predictions, title, orig_img):
"""
Create bar plot visualization below the image
Args:
image: Input image
predictions: Dictionary of prediction probabilities
title: Plot title
orig_img: Original image for sizing
"""
# Handle empty predictions case
if not predictions or len(predictions) == 0:
# Create a simple plot showing "No predictions available"
fig, ax = plt.subplots(figsize=(orig_img.shape[1]/100, orig_img.shape[0]/200), dpi=100)
ax.text(0.5, 0.5, 'No Predictions Available',
horizontalalignment='center', verticalalignment='center',
transform=ax.transAxes, fontsize=14)
ax.set_title(title)
ax.set_xlim(0, 1)
ax.set_ylim(0, 1)
ax.set_xticks([])
ax.set_yticks([])
else:
fig, ax = plt.subplots(figsize=(orig_img.shape[1]/100, orig_img.shape[0]/200), dpi=100)
plt.title(title)
# Create bar plot data
labels = list(predictions.keys())
probabilities = list(predictions.values())
# Create seaborn bar plot
sns.barplot(x=labels, y=probabilities, ax=ax)
fig.canvas.draw()
# Convert plot to numpy array
plot_image = np.array(fig.canvas.renderer.buffer_rgba())[:, :, :3] # Remove alpha
plt.close(fig) # Close to avoid memory leaks
# Combine image and plot vertically
combined_image = np.vstack((image, cv2.resize(plot_image, (image.shape[1], plot_image.shape[0]))))
return combined_image
def add_bottom_padding(image, pad_value, pad_height):
"""
Add padding to the bottom of an image
Args:
image: Input image
pad_value: Color value for padding (tuple or int)
pad_height: Height of padding to add
"""
height, width, channels = image.shape
padding = np.zeros((pad_height, width, channels), dtype=image.dtype)
padding[:, :, :] = pad_value
return np.vstack((image, padding))
def find_array_maximum(array):
"""
Get maximum index of 2D array
Args:
array: 2D numpy array
Returns:
tuple: (row_index, col_index) of maximum value
"""
array_index = array.argmax(1)
array_value = array.max(1)
i = array_value.argmax()
j = array_index[i]
return i, j