""" ISL Sign Language Translation - TechMatrix Solvers Initiative Utility functions for pose processing and visualization Developed by: TechMatrix Solvers Team """ import numpy as np import math import cv2 import matplotlib from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas from matplotlib.figure import Figure import matplotlib.pyplot as plt import copy import seaborn as sns def pad_image_corner(img, stride, pad_value): """ Pad image to ensure dimensions are divisible by stride Args: img: Input image array stride: Stride value for padding calculation pad_value: Value to use for padding """ h, w = img.shape[:2] pad = [0, 0, 0, 0] # [up, left, down, right] pad[2] = 0 if (h % stride == 0) else stride - (h % stride) # down pad[3] = 0 if (w % stride == 0) else stride - (w % stride) # right img_padded = img # Add padding if pad[0] > 0: # up pad_up = np.tile(img_padded[0:1, :, :] * 0 + pad_value, (pad[0], 1, 1)) img_padded = np.concatenate((pad_up, img_padded), axis=0) if pad[1] > 0: # left pad_left = np.tile(img_padded[:, 0:1, :] * 0 + pad_value, (1, pad[1], 1)) img_padded = np.concatenate((pad_left, img_padded), axis=1) if pad[2] > 0: # down pad_down = np.tile(img_padded[-2:-1, :, :] * 0 + pad_value, (pad[2], 1, 1)) img_padded = np.concatenate((img_padded, pad_down), axis=0) if pad[3] > 0: # right pad_right = np.tile(img_padded[:, -2:-1, :] * 0 + pad_value, (1, pad[3], 1)) img_padded = np.concatenate((img_padded, pad_right), axis=1) return img_padded, pad def transfer_model_weights(model, model_weights): """ Transfer weights from caffe model to pytorch model format Args: model: PyTorch model model_weights: Dictionary of weights from caffe model """ transferred_weights = {} for weights_name in model.state_dict().keys(): if len(weights_name.split('.')) > 4: # body25 format transferred_weights[weights_name] = model_weights['.'.join( weights_name.split('.')[3:])] else: transferred_weights[weights_name] = model_weights['.'.join( weights_name.split('.')[1:])] return transferred_weights def draw_body_pose_visualization(canvas, candidate, subset, model_type='body25'): """ Draw body pose keypoints and connections on image Args: canvas: Image to draw on candidate: Detected keypoint candidates subset: Valid keypoint connections model_type: Type of pose model ('body25' or 'coco') """ stick_width = 4 if model_type == 'body25': limb_sequence = [ [1,0],[1,2],[2,3],[3,4],[1,5],[5,6],[6,7],[1,8],[8,9],[9,10], [10,11],[8,12],[12,13],[13,14],[0,15],[0,16],[15,17],[16,18], [11,24],[11,22],[14,21],[14,19],[22,23],[19,20] ] num_joints = 25 else: limb_sequence = [ [1, 2], [1, 5], [2, 3], [3, 4], [5, 6], [6, 7], [1, 8], [8, 9], [9, 10], [1, 11], [11, 12], [12, 13], [1, 0], [0, 14], [14, 16], [0, 15], [15, 17], [2, 16], [5, 17] ] num_joints = 18 # Color scheme for different joints colors = [ [255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0], [85, 255, 0], [0, 255, 0], [0, 255, 85], [0, 255, 170], [0, 255, 255], [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], [170, 0, 255], [255, 0, 255], [255, 0, 170], [255, 0, 85], [255,255,0], [255,255,85], [255,255,170], [255,255,255], [170,255,255], [85,255,255], [0,255,255] ] # Draw keypoints for i in range(num_joints): for n in range(len(subset)): index = int(subset[n][i]) if index == -1: continue x, y = candidate[index][0:2] cv2.circle(canvas, (int(x), int(y)), 4, colors[i], thickness=-1) # Draw limbs for i in range(num_joints - 1): for n in range(len(subset)): index = subset[n][np.array(limb_sequence[i])] if -1 in index: continue current_canvas = canvas.copy() Y = candidate[index.astype(int), 0] X = candidate[index.astype(int), 1] mean_x = np.mean(X) mean_y = np.mean(Y) length = ((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) ** 0.5 angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1])) polygon = cv2.ellipse2Poly((int(mean_y), int(mean_x)), (int(length / 2), stick_width), int(angle), 0, 360, 1) cv2.fillConvexPoly(current_canvas, polygon, colors[i]) canvas = cv2.addWeighted(canvas, 0.4, current_canvas, 0.6, 0) return canvas def extract_body_pose_data(candidate, subset, model_type='body25'): """ Extract body pose data without drawing Returns: tuple: (keypoint_circles, limb_sticks) data for further processing """ stick_width = 4 if model_type == 'body25': limb_sequence = [ [1,0],[1,2],[2,3],[3,4],[1,5],[5,6],[6,7],[1,8],[8,9],[9,10], [10,11],[8,12],[12,13],[13,14],[0,15],[0,16],[15,17],[16,18], [11,24],[11,22],[14,21],[14,19],[22,23],[19,20] ] num_joints = 25 else: limb_sequence = [ [1, 2], [1, 5], [2, 3], [3, 4], [5, 6], [6, 7], [1, 8], [8, 9], [9, 10], [1, 11], [11, 12], [12, 13], [1, 0], [0, 14], [14, 16], [0, 15], [15, 17], [2, 16], [5, 17] ] num_joints = 18 # Extract keypoint coordinates keypoint_circles = [] for i in range(num_joints): for n in range(len(subset)): index = int(subset[n][i]) if index == -1: continue x, y = candidate[index][0:2] keypoint_circles.append((x, y)) # Extract limb stick data limb_sticks = [] for i in range(num_joints - 1): for n in range(len(subset)): index = subset[n][np.array(limb_sequence[i])] if -1 in index: continue Y = candidate[index.astype(int), 0] X = candidate[index.astype(int), 1] mean_x = np.mean(X) mean_y = np.mean(Y) length = ((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) ** 0.5 angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1])) limb_sticks.append((mean_y, mean_x, angle, length)) return keypoint_circles, limb_sticks def draw_hand_pose_visualization(canvas, all_hand_peaks, show_numbers=False): """ Draw hand pose keypoints and connections Args: canvas: Image to draw on all_hand_peaks: Detected hand keypoints for both hands show_numbers: Whether to show keypoint numbers """ edges = [ [0, 1], [1, 2], [2, 3], [3, 4], [0, 5], [5, 6], [6, 7], [7, 8], [0, 9], [9, 10], [10, 11], [11, 12], [0, 13], [13, 14], [14, 15], [15, 16], [0, 17], [17, 18], [18, 19], [19, 20] ] fig = Figure(figsize=plt.figaspect(canvas)) fig.subplots_adjust(0, 0, 1, 1) bg = FigureCanvas(fig) ax = fig.subplots() ax.axis('off') ax.imshow(canvas) width, height = ax.figure.get_size_inches() * ax.figure.get_dpi() for peaks in all_hand_peaks: for ie, e in enumerate(edges): if np.sum(np.all(peaks[e], axis=1) == 0) == 0: x1, y1 = peaks[e[0]] x2, y2 = peaks[e[1]] ax.plot([x1, x2], [y1, y2], color=matplotlib.colors.hsv_to_rgb([ie/float(len(edges)), 1.0, 1.0])) for i, keypoint in enumerate(peaks): x, y = keypoint ax.plot(x, y, 'r.') if show_numbers: ax.text(x, y, str(i)) bg.draw() canvas = np.fromstring(bg.tostring_rgb(), dtype='uint8').reshape(int(height), int(width), 3) return canvas def extract_hand_pose_data(all_hand_peaks, show_numbers=False): """ Extract hand pose data without drawing Returns: tuple: (hand_edges, hand_peaks) data for further processing """ edges = [ [0, 1], [1, 2], [2, 3], [3, 4], [0, 5], [5, 6], [6, 7], [7, 8], [0, 9], [9, 10], [10, 11], [11, 12], [0, 13], [13, 14], [14, 15], [15, 16], [0, 17], [17, 18], [18, 19], [19, 20] ] export_edges = [[], []] export_peaks = [[], []] for idx, peaks in enumerate(all_hand_peaks): for ie, e in enumerate(edges): if np.sum(np.all(peaks[e], axis=1) == 0) == 0: x1, y1 = peaks[e[0]] x2, y2 = peaks[e[1]] export_edges[idx].append((ie, (x1, y1), (x2, y2))) for i, keypoint in enumerate(peaks): x, y = keypoint export_peaks[idx].append((x, y, str(i))) return export_edges, export_peaks def detect_hand_regions(candidate, subset, original_image): """ Detect hand regions based on body pose keypoints Args: candidate: Body pose candidates subset: Valid body pose connections original_image: Original input image Returns: List of detected hand regions [x, y, width, is_left_hand] """ ratio_wrist_elbow = 0.33 detection_results = [] image_height, image_width = original_image.shape[0:2] for person in subset.astype(int): # Check if left hand keypoints exist (shoulder, elbow, wrist) has_left_hand = np.sum(person[[5, 6, 7]] == -1) == 0 has_right_hand = np.sum(person[[2, 3, 4]] == -1) == 0 if not (has_left_hand or has_right_hand): continue hands = [] # Process left hand if has_left_hand: left_shoulder_idx, left_elbow_idx, left_wrist_idx = person[[5, 6, 7]] x1, y1 = candidate[left_shoulder_idx][:2] x2, y2 = candidate[left_elbow_idx][:2] x3, y3 = candidate[left_wrist_idx][:2] hands.append([x1, y1, x2, y2, x3, y3, True]) # Process right hand if has_right_hand: right_shoulder_idx, right_elbow_idx, right_wrist_idx = person[[2, 3, 4]] x1, y1 = candidate[right_shoulder_idx][:2] x2, y2 = candidate[right_elbow_idx][:2] x3, y3 = candidate[right_wrist_idx][:2] hands.append([x1, y1, x2, y2, x3, y3, False]) for x1, y1, x2, y2, x3, y3, is_left in hands: # Calculate hand region based on wrist and elbow positions x = x3 + ratio_wrist_elbow * (x3 - x2) y = y3 + ratio_wrist_elbow * (y3 - y2) distance_wrist_elbow = math.sqrt((x3 - x2) ** 2 + (y3 - y2) ** 2) distance_elbow_shoulder = math.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2) width = 1.5 * max(distance_wrist_elbow, 0.9 * distance_elbow_shoulder) # Adjust to top-left corner x -= width / 2 y -= width / 2 # Ensure bounds are within image x = max(0, x) y = max(0, y) width1 = width if x + width <= image_width else image_width - x width2 = width if y + width <= image_height else image_height - y width = min(width1, width2) # Only include if region is large enough if width >= 20: detection_results.append([int(x), int(y), int(width), is_left]) return detection_results def render_stick_model(original_img, keypoint_circles, limb_sticks, hand_edges, hand_peaks): """ Render complete stick model with body and hand poses Args: original_img: Original image keypoint_circles: Body keypoint coordinates limb_sticks: Body limb stick data hand_edges: Hand connection data hand_peaks: Hand keypoint data """ canvas = copy.deepcopy(original_img) colors = [ [255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0], [85, 255, 0], [0, 255, 0], [0, 255, 85], [0, 255, 170], [0, 255, 255], [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], [170, 0, 255], [255, 0, 255], [255, 0, 170], [255, 0, 85], [255,255,0], [255,255,85], [255,255,170], [255,255,255], [170,255,255], [85,255,255], [0,255,255] ] stick_width = 4 # Draw body limbs for idx, (mean_x, mean_y, angle, length) in enumerate(limb_sticks): current_canvas = canvas.copy() polygon = cv2.ellipse2Poly( (int(mean_x), int(mean_y)), (int(length / 2), stick_width), int(angle), 0, 360, 1 ) cv2.fillConvexPoly(current_canvas, polygon, colors[idx]) canvas = cv2.addWeighted(canvas, 0.4, current_canvas, 0.6, 0) # Draw body keypoints for idx, (x, y) in enumerate(keypoint_circles): cv2.circle(canvas, (int(x), int(y)), 4, colors[idx], thickness=-1) # Draw hand poses using matplotlib fig = Figure(figsize=plt.figaspect(canvas)) fig.subplots_adjust(0, 0, 1, 1) ax = fig.subplots() ax.axis('off') ax.imshow(canvas) edges = [ [0, 1], [1, 2], [2, 3], [3, 4], [0, 5], [5, 6], [6, 7], [7, 8], [0, 9], [9, 10], [10, 11], [11, 12], [0, 13], [13, 14], [14, 15], [15, 16], [0, 17], [17, 18], [18, 19], [19, 20] ] for hand_edge_set in hand_edges: for (ie, (x1, y1), (x2, y2)) in hand_edge_set: ax.plot([x1, x2], [y1, y2], color=matplotlib.colors.hsv_to_rgb([ie/float(len(edges)), 1.0, 1.0])) for hand_peak_set in hand_peaks: for (x, y, text) in hand_peak_set: ax.plot(x, y, 'r.') # Convert figure to numpy array bg = FigureCanvas(fig) bg.draw() width, height = fig.get_size_inches() * fig.get_dpi() buf = bg.buffer_rgba() canvas = np.frombuffer(buf, dtype=np.uint8).reshape(int(height), int(width), 4) canvas = canvas[:, :, :3] # Keep only RGB channels plt.close(fig) # Clean up return cv2.resize(canvas, (math.ceil(width), math.ceil(height))) def create_bar_plot_visualization(image, predictions, title, orig_img): """ Create bar plot visualization below the image Args: image: Input image predictions: Dictionary of prediction probabilities title: Plot title orig_img: Original image for sizing """ # Handle empty predictions case if not predictions or len(predictions) == 0: # Create a simple plot showing "No predictions available" fig, ax = plt.subplots(figsize=(orig_img.shape[1]/100, orig_img.shape[0]/200), dpi=100) ax.text(0.5, 0.5, 'No Predictions Available', horizontalalignment='center', verticalalignment='center', transform=ax.transAxes, fontsize=14) ax.set_title(title) ax.set_xlim(0, 1) ax.set_ylim(0, 1) ax.set_xticks([]) ax.set_yticks([]) else: fig, ax = plt.subplots(figsize=(orig_img.shape[1]/100, orig_img.shape[0]/200), dpi=100) plt.title(title) # Create bar plot data labels = list(predictions.keys()) probabilities = list(predictions.values()) # Create seaborn bar plot sns.barplot(x=labels, y=probabilities, ax=ax) fig.canvas.draw() # Convert plot to numpy array plot_image = np.array(fig.canvas.renderer.buffer_rgba())[:, :, :3] # Remove alpha plt.close(fig) # Close to avoid memory leaks # Combine image and plot vertically combined_image = np.vstack((image, cv2.resize(plot_image, (image.shape[1], plot_image.shape[0])))) return combined_image def add_bottom_padding(image, pad_value, pad_height): """ Add padding to the bottom of an image Args: image: Input image pad_value: Color value for padding (tuple or int) pad_height: Height of padding to add """ height, width, channels = image.shape padding = np.zeros((pad_height, width, channels), dtype=image.dtype) padding[:, :, :] = pad_value return np.vstack((image, padding)) def find_array_maximum(array): """ Get maximum index of 2D array Args: array: 2D numpy array Returns: tuple: (row_index, col_index) of maximum value """ array_index = array.argmax(1) array_value = array.max(1) i = array_value.argmax() j = array_index[i] return i, j