Spaces:

Abs6187
/

ISL_Sign_Language_Translation

Sleeping

File size: 17,275 Bytes

"""

ISL Sign Language Translation - TechMatrix Solvers Initiative

Utility functions for pose processing and visualization

Developed by: TechMatrix Solvers Team

"""

import numpy as np
import math
import cv2
import matplotlib
from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
from matplotlib.figure import Figure
import matplotlib.pyplot as plt
import copy
import seaborn as sns


def pad_image_corner(img, stride, pad_value):
    """

    Pad image to ensure dimensions are divisible by stride

    

    Args:

        img: Input image array

        stride: Stride value for padding calculation

        pad_value: Value to use for padding

    """
    h, w = img.shape[:2]

    pad = [0, 0, 0, 0]  # [up, left, down, right]
    pad[2] = 0 if (h % stride == 0) else stride - (h % stride)  # down
    pad[3] = 0 if (w % stride == 0) else stride - (w % stride)  # right

    img_padded = img
    
    # Add padding
    if pad[0] > 0:  # up
        pad_up = np.tile(img_padded[0:1, :, :] * 0 + pad_value, (pad[0], 1, 1))
        img_padded = np.concatenate((pad_up, img_padded), axis=0)
        
    if pad[1] > 0:  # left
        pad_left = np.tile(img_padded[:, 0:1, :] * 0 + pad_value, (1, pad[1], 1))
        img_padded = np.concatenate((pad_left, img_padded), axis=1)
        
    if pad[2] > 0:  # down
        pad_down = np.tile(img_padded[-2:-1, :, :] * 0 + pad_value, (pad[2], 1, 1))
        img_padded = np.concatenate((img_padded, pad_down), axis=0)
        
    if pad[3] > 0:  # right
        pad_right = np.tile(img_padded[:, -2:-1, :] * 0 + pad_value, (1, pad[3], 1))
        img_padded = np.concatenate((img_padded, pad_right), axis=1)

    return img_padded, pad


def transfer_model_weights(model, model_weights):
    """

    Transfer weights from caffe model to pytorch model format

    

    Args:

        model: PyTorch model

        model_weights: Dictionary of weights from caffe model

    """
    transferred_weights = {}
    for weights_name in model.state_dict().keys():
        if len(weights_name.split('.')) > 4:  # body25 format
            transferred_weights[weights_name] = model_weights['.'.join(
                weights_name.split('.')[3:])]
        else:
            transferred_weights[weights_name] = model_weights['.'.join(
                weights_name.split('.')[1:])]
    return transferred_weights


def draw_body_pose_visualization(canvas, candidate, subset, model_type='body25'):
    """

    Draw body pose keypoints and connections on image

    

    Args:

        canvas: Image to draw on

        candidate: Detected keypoint candidates

        subset: Valid keypoint connections

        model_type: Type of pose model ('body25' or 'coco')

    """
    stick_width = 4
    
    if model_type == 'body25':
        limb_sequence = [
            [1,0],[1,2],[2,3],[3,4],[1,5],[5,6],[6,7],[1,8],[8,9],[9,10],
            [10,11],[8,12],[12,13],[13,14],[0,15],[0,16],[15,17],[16,18],
            [11,24],[11,22],[14,21],[14,19],[22,23],[19,20]
        ]
        num_joints = 25
    else:
        limb_sequence = [
            [1, 2], [1, 5], [2, 3], [3, 4], [5, 6], [6, 7], [1, 8], [8, 9],
            [9, 10], [1, 11], [11, 12], [12, 13], [1, 0], [0, 14], [14, 16],
            [0, 15], [15, 17], [2, 16], [5, 17]
        ]
        num_joints = 18

    # Color scheme for different joints
    colors = [
        [255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0],
        [85, 255, 0], [0, 255, 0], [0, 255, 85], [0, 255, 170], [0, 255, 255],
        [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], [170, 0, 255],
        [255, 0, 255], [255, 0, 170], [255, 0, 85], [255,255,0], [255,255,85],
        [255,255,170], [255,255,255], [170,255,255], [85,255,255], [0,255,255]
    ]

    # Draw keypoints
    for i in range(num_joints):
        for n in range(len(subset)):
            index = int(subset[n][i])
            if index == -1:
                continue
            x, y = candidate[index][0:2]
            cv2.circle(canvas, (int(x), int(y)), 4, colors[i], thickness=-1)
    
    # Draw limbs
    for i in range(num_joints - 1):
        for n in range(len(subset)):
            index = subset[n][np.array(limb_sequence[i])]
            if -1 in index:
                continue
            current_canvas = canvas.copy()
            Y = candidate[index.astype(int), 0]
            X = candidate[index.astype(int), 1]
            mean_x = np.mean(X)
            mean_y = np.mean(Y)
            length = ((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) ** 0.5
            angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1]))
            polygon = cv2.ellipse2Poly((int(mean_y), int(mean_x)), 
                                     (int(length / 2), stick_width), 
                                     int(angle), 0, 360, 1)
            cv2.fillConvexPoly(current_canvas, polygon, colors[i])
            canvas = cv2.addWeighted(canvas, 0.4, current_canvas, 0.6, 0)
    
    return canvas


def extract_body_pose_data(candidate, subset, model_type='body25'):
    """

    Extract body pose data without drawing

    

    Returns:

        tuple: (keypoint_circles, limb_sticks) data for further processing

    """
    stick_width = 4
    
    if model_type == 'body25':
        limb_sequence = [
            [1,0],[1,2],[2,3],[3,4],[1,5],[5,6],[6,7],[1,8],[8,9],[9,10],
            [10,11],[8,12],[12,13],[13,14],[0,15],[0,16],[15,17],[16,18],
            [11,24],[11,22],[14,21],[14,19],[22,23],[19,20]
        ]
        num_joints = 25
    else:
        limb_sequence = [
            [1, 2], [1, 5], [2, 3], [3, 4], [5, 6], [6, 7], [1, 8], [8, 9],
            [9, 10], [1, 11], [11, 12], [12, 13], [1, 0], [0, 14], [14, 16],
            [0, 15], [15, 17], [2, 16], [5, 17]
        ]
        num_joints = 18

    # Extract keypoint coordinates
    keypoint_circles = []
    for i in range(num_joints):
        for n in range(len(subset)):
            index = int(subset[n][i])
            if index == -1:
                continue
            x, y = candidate[index][0:2]
            keypoint_circles.append((x, y))

    # Extract limb stick data
    limb_sticks = []
    for i in range(num_joints - 1):
        for n in range(len(subset)):
            index = subset[n][np.array(limb_sequence[i])]
            if -1 in index:
                continue
            Y = candidate[index.astype(int), 0]
            X = candidate[index.astype(int), 1]
            mean_x = np.mean(X)
            mean_y = np.mean(Y)
            length = ((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) ** 0.5
            angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1]))
            limb_sticks.append((mean_y, mean_x, angle, length))

    return keypoint_circles, limb_sticks


def draw_hand_pose_visualization(canvas, all_hand_peaks, show_numbers=False):
    """

    Draw hand pose keypoints and connections

    

    Args:

        canvas: Image to draw on

        all_hand_peaks: Detected hand keypoints for both hands

        show_numbers: Whether to show keypoint numbers

    """
    edges = [
        [0, 1], [1, 2], [2, 3], [3, 4], [0, 5], [5, 6], [6, 7], [7, 8], [0, 9], [9, 10],
        [10, 11], [11, 12], [0, 13], [13, 14], [14, 15], [15, 16], [0, 17], [17, 18], [18, 19], [19, 20]
    ]
    
    fig = Figure(figsize=plt.figaspect(canvas))
    fig.subplots_adjust(0, 0, 1, 1)
    bg = FigureCanvas(fig)
    ax = fig.subplots()
    ax.axis('off')
    ax.imshow(canvas)

    width, height = ax.figure.get_size_inches() * ax.figure.get_dpi()

    for peaks in all_hand_peaks:
        for ie, e in enumerate(edges):
            if np.sum(np.all(peaks[e], axis=1) == 0) == 0:
                x1, y1 = peaks[e[0]]
                x2, y2 = peaks[e[1]]
                ax.plot([x1, x2], [y1, y2], 
                       color=matplotlib.colors.hsv_to_rgb([ie/float(len(edges)), 1.0, 1.0]))

        for i, keypoint in enumerate(peaks):
            x, y = keypoint
            ax.plot(x, y, 'r.')
            if show_numbers:
                ax.text(x, y, str(i))
    
    bg.draw()
    canvas = np.fromstring(bg.tostring_rgb(), dtype='uint8').reshape(int(height), int(width), 3)
    return canvas


def extract_hand_pose_data(all_hand_peaks, show_numbers=False):
    """

    Extract hand pose data without drawing

    

    Returns:

        tuple: (hand_edges, hand_peaks) data for further processing

    """
    edges = [
        [0, 1], [1, 2], [2, 3], [3, 4], [0, 5], [5, 6], [6, 7], [7, 8], [0, 9], [9, 10],
        [10, 11], [11, 12], [0, 13], [13, 14], [14, 15], [15, 16], [0, 17], [17, 18], [18, 19], [19, 20]
    ]
    
    export_edges = [[], []]
    export_peaks = [[], []]
    
    for idx, peaks in enumerate(all_hand_peaks):
        for ie, e in enumerate(edges):
            if np.sum(np.all(peaks[e], axis=1) == 0) == 0:
                x1, y1 = peaks[e[0]]
                x2, y2 = peaks[e[1]]
                export_edges[idx].append((ie, (x1, y1), (x2, y2)))

        for i, keypoint in enumerate(peaks):
            x, y = keypoint
            export_peaks[idx].append((x, y, str(i)))
            
    return export_edges, export_peaks


def detect_hand_regions(candidate, subset, original_image):
    """

    Detect hand regions based on body pose keypoints

    

    Args:

        candidate: Body pose candidates

        subset: Valid body pose connections

        original_image: Original input image

        

    Returns:

        List of detected hand regions [x, y, width, is_left_hand]

    """
    ratio_wrist_elbow = 0.33
    detection_results = []
    
    image_height, image_width = original_image.shape[0:2]
    
    for person in subset.astype(int):
        # Check if left hand keypoints exist (shoulder, elbow, wrist)
        has_left_hand = np.sum(person[[5, 6, 7]] == -1) == 0
        has_right_hand = np.sum(person[[2, 3, 4]] == -1) == 0
        
        if not (has_left_hand or has_right_hand):
            continue
            
        hands = []
        
        # Process left hand
        if has_left_hand:
            left_shoulder_idx, left_elbow_idx, left_wrist_idx = person[[5, 6, 7]]
            x1, y1 = candidate[left_shoulder_idx][:2]
            x2, y2 = candidate[left_elbow_idx][:2]
            x3, y3 = candidate[left_wrist_idx][:2]
            hands.append([x1, y1, x2, y2, x3, y3, True])
            
        # Process right hand
        if has_right_hand:
            right_shoulder_idx, right_elbow_idx, right_wrist_idx = person[[2, 3, 4]]
            x1, y1 = candidate[right_shoulder_idx][:2]
            x2, y2 = candidate[right_elbow_idx][:2]
            x3, y3 = candidate[right_wrist_idx][:2]
            hands.append([x1, y1, x2, y2, x3, y3, False])

        for x1, y1, x2, y2, x3, y3, is_left in hands:
            # Calculate hand region based on wrist and elbow positions
            x = x3 + ratio_wrist_elbow * (x3 - x2)
            y = y3 + ratio_wrist_elbow * (y3 - y2)
            
            distance_wrist_elbow = math.sqrt((x3 - x2) ** 2 + (y3 - y2) ** 2)
            distance_elbow_shoulder = math.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2)
            width = 1.5 * max(distance_wrist_elbow, 0.9 * distance_elbow_shoulder)
            
            # Adjust to top-left corner
            x -= width / 2
            y -= width / 2
            
            # Ensure bounds are within image
            x = max(0, x)
            y = max(0, y)
            
            width1 = width if x + width <= image_width else image_width - x
            width2 = width if y + width <= image_height else image_height - y
            width = min(width1, width2)
            
            # Only include if region is large enough
            if width >= 20:
                detection_results.append([int(x), int(y), int(width), is_left])

    return detection_results


def render_stick_model(original_img, keypoint_circles, limb_sticks, hand_edges, hand_peaks):
    """

    Render complete stick model with body and hand poses

    

    Args:

        original_img: Original image

        keypoint_circles: Body keypoint coordinates

        limb_sticks: Body limb stick data

        hand_edges: Hand connection data

        hand_peaks: Hand keypoint data

    """
    canvas = copy.deepcopy(original_img)

    colors = [
        [255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0],
        [85, 255, 0], [0, 255, 0], [0, 255, 85], [0, 255, 170], [0, 255, 255],
        [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], [170, 0, 255],
        [255, 0, 255], [255, 0, 170], [255, 0, 85], [255,255,0], [255,255,85],
        [255,255,170], [255,255,255], [170,255,255], [85,255,255], [0,255,255]
    ]
    stick_width = 4

    # Draw body limbs
    for idx, (mean_x, mean_y, angle, length) in enumerate(limb_sticks):
        current_canvas = canvas.copy()
        polygon = cv2.ellipse2Poly(
            (int(mean_x), int(mean_y)), 
            (int(length / 2), stick_width),
            int(angle), 0, 360, 1
        )
        cv2.fillConvexPoly(current_canvas, polygon, colors[idx])
        canvas = cv2.addWeighted(canvas, 0.4, current_canvas, 0.6, 0)

    # Draw body keypoints
    for idx, (x, y) in enumerate(keypoint_circles):
        cv2.circle(canvas, (int(x), int(y)), 4, colors[idx], thickness=-1)

    # Draw hand poses using matplotlib
    fig = Figure(figsize=plt.figaspect(canvas))
    fig.subplots_adjust(0, 0, 1, 1)
    ax = fig.subplots()
    ax.axis('off')
    ax.imshow(canvas)

    edges = [
        [0, 1], [1, 2], [2, 3], [3, 4], [0, 5], [5, 6], [6, 7], [7, 8], [0, 9],
        [9, 10], [10, 11], [11, 12], [0, 13], [13, 14], [14, 15], [15, 16],
        [0, 17], [17, 18], [18, 19], [19, 20]
    ]

    for hand_edge_set in hand_edges:
        for (ie, (x1, y1), (x2, y2)) in hand_edge_set:
            ax.plot([x1, x2], [y1, y2],
                   color=matplotlib.colors.hsv_to_rgb([ie/float(len(edges)), 1.0, 1.0]))

    for hand_peak_set in hand_peaks:
        for (x, y, text) in hand_peak_set:
            ax.plot(x, y, 'r.')

    # Convert figure to numpy array
    bg = FigureCanvas(fig)
    bg.draw()

    width, height = fig.get_size_inches() * fig.get_dpi()
    buf = bg.buffer_rgba()
    canvas = np.frombuffer(buf, dtype=np.uint8).reshape(int(height), int(width), 4)
    canvas = canvas[:, :, :3]  # Keep only RGB channels

    plt.close(fig)  # Clean up
    return cv2.resize(canvas, (math.ceil(width), math.ceil(height)))


def create_bar_plot_visualization(image, predictions, title, orig_img):
    """

    Create bar plot visualization below the image

    

    Args:

        image: Input image

        predictions: Dictionary of prediction probabilities

        title: Plot title

        orig_img: Original image for sizing

    """
    # Handle empty predictions case
    if not predictions or len(predictions) == 0:
        # Create a simple plot showing "No predictions available"
        fig, ax = plt.subplots(figsize=(orig_img.shape[1]/100, orig_img.shape[0]/200), dpi=100)
        ax.text(0.5, 0.5, 'No Predictions Available', 
                horizontalalignment='center', verticalalignment='center',
                transform=ax.transAxes, fontsize=14)
        ax.set_title(title)
        ax.set_xlim(0, 1)
        ax.set_ylim(0, 1)
        ax.set_xticks([])
        ax.set_yticks([])
    else:
        fig, ax = plt.subplots(figsize=(orig_img.shape[1]/100, orig_img.shape[0]/200), dpi=100)
        plt.title(title)
        
        # Create bar plot data
        labels = list(predictions.keys())
        probabilities = list(predictions.values())

        # Create seaborn bar plot
        sns.barplot(x=labels, y=probabilities, ax=ax)
    
    fig.canvas.draw()
    
    # Convert plot to numpy array
    plot_image = np.array(fig.canvas.renderer.buffer_rgba())[:, :, :3]  # Remove alpha
    plt.close(fig)  # Close to avoid memory leaks

    # Combine image and plot vertically
    combined_image = np.vstack((image, cv2.resize(plot_image, (image.shape[1], plot_image.shape[0]))))

    return combined_image


def add_bottom_padding(image, pad_value, pad_height):
    """

    Add padding to the bottom of an image

    

    Args:

        image: Input image

        pad_value: Color value for padding (tuple or int)

        pad_height: Height of padding to add

    """
    height, width, channels = image.shape
    padding = np.zeros((pad_height, width, channels), dtype=image.dtype)
    padding[:, :, :] = pad_value
    
    return np.vstack((image, padding))


def find_array_maximum(array):
    """

    Get maximum index of 2D array

    

    Args:

        array: 2D numpy array

        

    Returns:

        tuple: (row_index, col_index) of maximum value

    """
    array_index = array.argmax(1)
    array_value = array.max(1)
    i = array_value.argmax()
    j = array_index[i]
    return i, j