Spaces:

sunilsarolkar
/

ISL-SignLanguageTranslation

Running

App Files Files Community

Sunil Sarolkar commited on Oct 5, 2025

Commit

9c63ccc

0 Parent(s):

Fresh reset: clean Space without LFS history

Browse files

Files changed (7) hide show

ISL_Model_parameter.py +662 -0
app.py +653 -0
expression_mapping.py +168 -0
model.py +407 -0
packages.txt +6 -0
requirements.txt +22 -0
util.py +463 -0

ISL_Model_parameter.py ADDED Viewed

	@@ -0,0 +1,662 @@

+import keras
+from keras.layers import TorchModuleWrapper
+import numpy as np
+import cv2
+import torch
+from scipy.ndimage.filters import gaussian_filter
+import math
+import os
+import numpy as np
+from skimage.measure import label
+import util as util
+class ISLSignPos(keras.Model):
+    def __init__(self,pt_body_model,pt_hand_model):
+        super().__init__()
+        self.pt_body = TorchModuleWrapper(pt_body_model)
+        self.pt_body.trainable=False
+        self.pt_hand = TorchModuleWrapper(pt_hand_model)
+        self.pt_hand.trainable=False
+        self.njoint_body = 26
+        self.npaf_body = 52
+    def call(self, oriImg):
+        candidate, subset = self.bodypos(oriImg.cpu().numpy())
+        hands_list = util.handDetect(candidate, subset, oriImg.cpu().numpy())
+        all_hand_peaks = []
+        for x, y, w, is_left in hands_list:
+            peaks = self.handpos(oriImg.cpu().numpy()[y:y+w, x:x+w, :])
+            peaks[:, 0] = np.where(peaks[:, 0]==0, peaks[:, 0], peaks[:, 0]+x)
+            peaks[:, 1] = np.where(peaks[:, 1]==0, peaks[:, 1], peaks[:, 1]+y)
+            all_hand_peaks.append(peaks)
+        return (candidate, subset,all_hand_peaks)
+    def bodypos(self, oriImg):
+        model_type = 'body25'
+        scale_search = [0.5]
+        boxsize = 368
+        stride = 8
+        padValue = 128
+        thre1 = 0.1
+        thre2 = 0.05
+        multiplier = [x * boxsize / oriImg.shape[0] for x in scale_search]
+        heatmap_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], self.njoint_body))
+        paf_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], self.npaf_body))
+        for m in range(len(multiplier)):
+            scale = multiplier[m]
+            imageToTest = cv2.resize(oriImg, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
+            imageToTest_padded, pad = util.padRightDownCorner(imageToTest, stride, padValue)
+            im = np.transpose(np.float32(imageToTest_padded[:, :, :, np.newaxis]), (3, 2, 0, 1)) / 256 - 0.5
+            im = np.ascontiguousarray(im)
+            data = torch.from_numpy(im).float()
+            if torch.cuda.is_available():
+                data = data.cuda()
+            with torch.no_grad():
+                Mconv7_stage6_L1, Mconv7_stage6_L2 = self.pt_body(data)
+            Mconv7_stage6_L1 = Mconv7_stage6_L1.cpu().numpy()
+            Mconv7_stage6_L2 = Mconv7_stage6_L2.cpu().numpy()
+            heatmap = np.transpose(np.squeeze(Mconv7_stage6_L2), (1, 2, 0))  # output 1 is heatmaps
+            heatmap = cv2.resize(heatmap, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC)
+            heatmap = heatmap[:imageToTest_padded.shape[0] - pad[2], :imageToTest_padded.shape[1] - pad[3], :]
+            heatmap = cv2.resize(heatmap, (oriImg.shape[1], oriImg.shape[0]), interpolation=cv2.INTER_CUBIC)
+            paf = np.transpose(np.squeeze(Mconv7_stage6_L1), (1, 2, 0))  # output 0 is PAFs
+            paf = cv2.resize(paf, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC)
+            paf = paf[:imageToTest_padded.shape[0] - pad[2], :imageToTest_padded.shape[1] - pad[3], :]
+            paf = cv2.resize(paf, (oriImg.shape[1], oriImg.shape[0]), interpolation=cv2.INTER_CUBIC)
+            heatmap_avg += heatmap_avg + heatmap / len(multiplier)
+            paf_avg += + paf / len(multiplier)
+        all_peaks = []
+        peak_counter = 0
+        for part in range(self.njoint_body-1):
+            map_ori = heatmap_avg[:, :, part]
+            one_heatmap = gaussian_filter(map_ori, sigma=3)
+            map_left = np.zeros(one_heatmap.shape)
+            map_left[1:, :] = one_heatmap[:-1, :]
+            map_right = np.zeros(one_heatmap.shape)
+            map_right[:-1, :] = one_heatmap[1:, :]
+            map_up = np.zeros(one_heatmap.shape)
+            map_up[:, 1:] = one_heatmap[:, :-1]
+            map_down = np.zeros(one_heatmap.shape)
+            map_down[:, :-1] = one_heatmap[:, 1:]
+            peaks_binary = np.logical_and.reduce(
+                (one_heatmap >= map_left, one_heatmap >= map_right, one_heatmap >= map_up, one_heatmap >= map_down, one_heatmap > thre1))
+            peaks = list(zip(np.nonzero(peaks_binary)[1], np.nonzero(peaks_binary)[0]))  # note reverse
+            peaks_with_score = [x + (map_ori[x[1], x[0]],) for x in peaks]
+            peak_id = range(peak_counter, peak_counter + len(peaks))
+            peaks_with_score_and_id = [peaks_with_score[i] + (peak_id[i],) for i in range(len(peak_id))]
+            all_peaks.append(peaks_with_score_and_id)
+            peak_counter += len(peaks)
+        if model_type=='body25':
+            # find connection in the specified sequence, center 29 is in the position 15
+            limbSeq = [[1,0],[1,2],[2,3],[3,4],[1,5],[5,6],[6,7],[1,8],[8,9],[9,10],\
+                    [10,11],[8,12],[12,13],[13,14],[0,15],[0,16],[15,17],[16,18],\
+                    [11,24],[11,22],[14,21],[14,19],[22,23],[19,20]]
+            # the middle joints heatmap correpondence
+            mapIdx = [[30, 31],[14, 15],[16, 17],[18, 19],[22, 23],[24, 25],[26, 27],[0, 1],[6, 7],\
+                    [2, 3],[4, 5],  [8, 9],[10, 11],[12, 13],[32, 33],[34, 35],[36,37],[38,39],\
+                    [50,51],[46,47],[44,45],[40,41],[48,49],[42,43]]
+        else:
+            # find connection in the specified sequence, center 29 is in the position 15
+            limbSeq = [[1, 2], [1, 5], [2, 3], [3, 4], [5, 6], [6, 7], [1, 8], [8, 9], \
+                    [9, 10], [1, 11], [11, 12], [12, 13], [1, 0], [0, 14], [14, 16], \
+                    [0, 15], [15, 17], [2, 16], [5, 17]]
+            # the middle joints heatmap correpondence
+            mapIdx = [[12, 13], [20, 21], [14, 15], [16, 17], [22, 23], [24, 25], [0, 1], [2, 3], \
+                    [4, 5], [6, 7], [8, 9], [10, 11], [28, 29], [30, 31], [34, 35], [32, 33], \
+                    [36, 37], [18, 19], [26, 27]]
+        connection_all = []
+        special_k = []
+        mid_num = 10
+        for k in range(len(mapIdx)):
+            score_mid = paf_avg[:, :, mapIdx[k]]
+            candA = all_peaks[limbSeq[k][0]]
+            candB = all_peaks[limbSeq[k][1]]
+            nA = len(candA)
+            nB = len(candB)
+            indexA, indexB = limbSeq[k]
+            if (nA != 0 and nB != 0):
+                connection_candidate = []
+                for i in range(nA):
+                    for j in range(nB):
+                        vec = np.subtract(candB[j][:2], candA[i][:2])
+                        norm = math.sqrt(vec[0] * vec[0] + vec[1] * vec[1])
+                        norm = max(0.001, norm)
+                        vec = np.divide(vec, norm)
+                        startend = list(zip(np.linspace(candA[i][0], candB[j][0], num=mid_num), \
+                                            np.linspace(candA[i][1], candB[j][1], num=mid_num)))
+                        vec_x = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 0] \
+                                          for I in range(len(startend))])
+                        vec_y = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 1] \
+                                          for I in range(len(startend))])
+                        score_midpts = np.multiply(vec_x, vec[0]) + np.multiply(vec_y, vec[1])
+                        score_with_dist_prior = sum(score_midpts) / len(score_midpts) + min(
+                            0.5 * oriImg.shape[0] / norm - 1, 0)
+                        criterion1 = len(np.nonzero(score_midpts > thre2)[0]) > 0.8 * len(score_midpts)
+                        criterion2 = score_with_dist_prior > 0
+                        if criterion1 and criterion2:
+                            connection_candidate.append(
+                                [i, j, score_with_dist_prior, score_with_dist_prior + candA[i][2] + candB[j][2]])
+                connection_candidate = sorted(connection_candidate, key=lambda x: x[2], reverse=True)
+                connection = np.zeros((0, 5))
+                for c in range(len(connection_candidate)):
+                    i, j, s = connection_candidate[c][0:3]
+                    if (i not in connection[:, 3] and j not in connection[:, 4]):
+                        connection = np.vstack([connection, [candA[i][3], candB[j][3], s, i, j]])
+                        if (len(connection) >= min(nA, nB)):
+                            break
+                connection_all.append(connection)
+            else:
+                special_k.append(k)
+                connection_all.append([])
+        # last number in each row is the total parts number of that person
+        # the second last number in each row is the score of the overall configuration
+        subset = -1 * np.ones((0, self.njoint_body+1))
+        candidate = np.array([item for sublist in all_peaks for item in sublist])
+        for k in range(len(mapIdx)):
+            if k not in special_k:
+                partAs = connection_all[k][:, 0]
+                partBs = connection_all[k][:, 1]
+                indexA, indexB = np.array(limbSeq[k])
+                for i in range(len(connection_all[k])):  # = 1:size(temp,1)
+                    found = 0
+                    subset_idx = [-1, -1]
+                    for j in range(len(subset)):  # 1:size(subset,1):
+                        if subset[j][indexA] == partAs[i] or subset[j][indexB] == partBs[i]:
+                            subset_idx[found] = j
+                            found += 1
+                    if found == 1:
+                        j = subset_idx[0]
+                        if subset[j][indexB] != partBs[i]:
+                            subset[j][indexB] = partBs[i]
+                            subset[j][-1] += 1
+                            subset[j][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2]
+                    elif found == 2:  # if found 2 and disjoint, merge them
+                        j1, j2 = subset_idx
+                        membership = ((subset[j1] >= 0).astype(int) + (subset[j2] >= 0).astype(int))[:-2]
+                        if len(np.nonzero(membership == 2)[0]) == 0:  # merge
+                            subset[j1][:-2] += (subset[j2][:-2] + 1)
+                            subset[j1][-2:] += subset[j2][-2:]
+                            subset[j1][-2] += connection_all[k][i][2]
+                            subset = np.delete(subset, j2, 0)
+                        else:  # as like found == 1
+                            subset[j1][indexB] = partBs[i]
+                            subset[j1][-1] += 1
+                            subset[j1][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2]
+                    # if find no partA in the subset, create a new subset
+                    elif not found and k < self.njoint_body-2:
+                        row = -1 * np.ones(self.njoint_body+1)
+                        row[indexA] = partAs[i]
+                        row[indexB] = partBs[i]
+                        row[-1] = 2
+                        row[-2] = sum(candidate[connection_all[k][i, :2].astype(int), 2]) + connection_all[k][i][2]
+                        subset = np.vstack([subset, row])
+        # delete some rows of subset which has few parts occur
+        deleteIdx = []
+        for i in range(len(subset)):
+            if subset[i][-1] < 4 or subset[i][-2] / subset[i][-1] < 0.4:
+                deleteIdx.append(i)
+        subset = np.delete(subset, deleteIdx, axis=0)
+        # subset: n*20 array, 0-17 is the index in candidate, 18 is the total score, 19 is the total parts
+        # candidate: x, y, score, id
+        return candidate, subset
+    def handpos(self, oriImg):
+        scale_search = [0.5, 1.0, 1.5, 2.0]
+        # scale_search = [0.5]
+        boxsize = 368
+        stride = 8
+        padValue = 128
+        thre = 0.05
+        multiplier = [x * boxsize / oriImg.shape[0] for x in scale_search]
+        heatmap_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 22))
+        # paf_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 38))
+        for m in range(len(multiplier)):
+            scale = multiplier[m]
+            imageToTest = cv2.resize(oriImg, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
+            imageToTest_padded, pad = util.padRightDownCorner(imageToTest, stride, padValue)
+            im = np.transpose(np.float32(imageToTest_padded[:, :, :, np.newaxis]), (3, 2, 0, 1)) / 256 - 0.5
+            im = np.ascontiguousarray(im)
+            data = torch.from_numpy(im).float()
+            if torch.cuda.is_available():
+                data = data.cuda()
+            # data = data.permute([2, 0, 1]).unsqueeze(0).float()
+            with torch.no_grad():
+                output = self.pt_hand(data).cpu().numpy()
+                # output = self.model(data).numpy()q
+            # extract outputs, resize, and remove padding
+            heatmap = np.transpose(np.squeeze(output), (1, 2, 0))  # output 1 is heatmaps
+            heatmap = cv2.resize(heatmap, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC)
+            heatmap = heatmap[:imageToTest_padded.shape[0] - pad[2], :imageToTest_padded.shape[1] - pad[3], :]
+            heatmap = cv2.resize(heatmap, (oriImg.shape[1], oriImg.shape[0]), interpolation=cv2.INTER_CUBIC)
+            heatmap_avg += heatmap / len(multiplier)
+        all_peaks = []
+        for part in range(21):
+            map_ori = heatmap_avg[:, :, part]
+            one_heatmap = gaussian_filter(map_ori, sigma=3)
+            binary = np.ascontiguousarray(one_heatmap > thre, dtype=np.uint8)
+            # 全部小于阈值
+            if np.sum(binary) == 0:
+                all_peaks.append([0, 0])
+                continue
+            label_img, label_numbers = label(binary, return_num=True, connectivity=binary.ndim)
+            max_index = np.argmax([np.sum(map_ori[label_img == i]) for i in range(1, label_numbers + 1)]) + 1
+            label_img[label_img != max_index] = 0
+            map_ori[label_img == 0] = 0
+            y, x = util.npmax(map_ori)
+            all_peaks.append([x, y])
+        return np.array(all_peaks)
+class ISLSignPosTranslator(keras.Model):
+    def __init__(self,body_model,hand_model, translation_model):
+        super().__init__()
+        self.pt_body = TorchModuleWrapper(body_model)
+        self.pt_body.trainable=False
+        self.pt_hand = TorchModuleWrapper(hand_model)
+        self.pt_hand.trainable=False
+        self.njoint_body = 26
+        self.npaf_body = 52
+        self.model_type='body25'
+        self.translation_layer=translation_model
+    def call(self, window):
+        window_size=20
+        window_features=[]
+        blank_frame=np.zeros((1,156))
+        for idx,frame in enumerate(window.cpu()):
+            # frame=frame.cpu().numpy()[:, :, ::-1]
+            candidate, subset = self.bodypos(frame.cpu().numpy())
+            hands_list = util.handDetect(candidate, subset, frame.cpu().numpy())
+            all_hand_peaks = []
+            for x, y, w, is_left in hands_list:
+                peaks = self.handpos(frame.cpu().numpy()[y:y+w, x:x+w, :])
+                peaks[:, 0] = np.where(peaks[:, 0]==0, peaks[:, 0], peaks[:, 0]+x)
+                peaks[:, 1] = np.where(peaks[:, 1]==0, peaks[:, 1], peaks[:, 1]+y)
+                all_hand_peaks.append(peaks)
+            (bodypose_circles,bodypose_sticks,)=util.get_bodypose(candidate, subset, self.model_type)
+            (handpose_edges,handpose_peaks)=util.get_handpose(all_hand_peaks,)
+            feature=self.populate_features(bodypose_circles,handpose_peaks)
+            window_features.append(feature)
+        if len(window_features)<window_size:
+            for _ in range(0,(window_size-window_features.shape[0])):
+                window_features.append(blank_frame)
+        # self.writer.close()
+        # timeseries=self.create_timeseries_data(feature)
+        # self.frame_to_window(feature)
+        # np.savetxt('C:/Users/spsar/OneDrive/Desktop/test/MVI_9590.MOV.window1.numpy',np.array(window_features))
+        return self.translation_layer(np.array(window_features).reshape(1,20,156))
+    def frame_to_window(self,frame):
+        """
+        Converts a single frame to a rolling window array with zero padding.
+        Args:
+            frame: A numpy array representing a video frame.
+            window_size: The size of the rolling window (default: 20).
+            window (optional): An existing window array to add the frame to
+                                (useful for maintaining rolling window state).
+        Returns:
+            A numpy array representing the rolling window with the added frame.
+        """
+        # Shift the window elements by 1 (oldest frame is dropped)
+        self.window[:-1] = self.window[1:]
+        # Add the new frame to the end of the window
+        self.window[-1] = frame
+    def populate_features(self,bodypose_circles,handpose_peaks):
+        # X_body_test = [f'bodypeaks_x_{i}' for i in range(15)] + [f'bodypeaks_y_{i}' for i in range(15)]
+        # X_hand0_test = [f'hand0peaks_x_{i}' for i in range(21)] + [f'hand0peaks_y_{i}' for i in range(21)] + [f'hand0peaks_peaktxt{i}' for i in range(21)]
+        # X_hand1_test = [f'hand1peaks_x_{i}' for i in range(21)] + [f'hand1peaks_y_{i}' for i in range(21)] + [f'hand1peaks_peaktxt{i}' for i in range(21)]
+        # feature_columns_new = X_body_test + X_hand0_test + X_hand1_test
+        feature=[]
+        for idx in range(15):
+            if(idx<len(bodypose_circles)):
+                feature.append(bodypose_circles[idx][0])
+            else:
+                feature.append(0)
+        for idx in range(15):
+            if(idx<len(bodypose_circles)):
+                feature.append(bodypose_circles[idx][1])
+            else:
+                feature.append(0)
+        for hand_idx in range(2):
+            for idx in range(21):
+                if(idx<len(handpose_peaks[hand_idx])):
+                    feature.append(float(handpose_peaks[hand_idx][idx][0]))
+                else:
+                    feature.append(0)
+            for idx in range(21):
+                if(idx<len(handpose_peaks[hand_idx])):
+                    feature.append(float(handpose_peaks[hand_idx][idx][1]))
+                else:
+                    feature.append(0)
+            for idx in range(21):
+                if(idx<len(handpose_peaks[hand_idx])):
+                    feature.append(float(handpose_peaks[hand_idx][idx][2]))
+                else:
+                    feature.append(0)
+        # for idx in range(21):
+        #     if(idx<len(handpose_peaks[1])):
+        #         feature.append(handpose_peaks[1][idx][0])
+        #     else:
+        #         feature.append(0)
+        # for idx in range(21):
+        #     if(idx<len(handpose_peaks[1])):
+        #         feature.append(handpose_peaks[1][idx][1])
+        #     else:
+        #         feature.append(0)
+        # for idx in range(21):
+        #     if(idx<len(handpose_peaks[1])):
+        #         feature.append(handpose_peaks[1][idx][2])
+        #     else:
+        #         feature.append(0)
+        # for idx,handedges in enumerate(handpose_edges):
+        #     for (peaktxt, (handedge_x1, handedge_y1), (handedge_x2, handedge_y2)) in handedges:
+        #         feature[f'hand{idx}edge_x1_{peaktxt}']=handedge_x1
+        #         feature[f'hand{idx}edge_y1_{peaktxt}']=handedge_y1
+        #         feature[f'hand{idx}edge_x2_{peaktxt}']=handedge_x2
+        #         feature[f'hand{idx}edge_y2_{peaktxt}']=handedge_y2
+        X=np.array(feature)
+        # time_steps = 12  # Number of time steps
+        # num_features = X.shape[0] // time_steps  # Number of features per time step
+        # X_reshaped = X.reshape(1,time_steps, num_features)
+        return X
+    def bodypos(self, oriImg):
+        model_type = 'body25'
+        scale_search = [0.5]
+        boxsize = 368
+        stride = 8
+        padValue = 128
+        thre1 = 0.1
+        thre2 = 0.05
+        multiplier = [x * boxsize / oriImg.shape[0] for x in scale_search]
+        heatmap_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], self.njoint_body))
+        paf_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], self.npaf_body))
+        for m in range(len(multiplier)):
+            scale = multiplier[m]
+            imageToTest = cv2.resize(oriImg, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
+            imageToTest_padded, pad = util.padRightDownCorner(imageToTest, stride, padValue)
+            im = np.transpose(np.float32(imageToTest_padded[:, :, :, np.newaxis]), (3, 2, 0, 1)) / 256 - 0.5
+            im = np.ascontiguousarray(im)
+            data = torch.from_numpy(im).float()
+            if torch.cuda.is_available():
+                data = data.cuda()
+            with torch.no_grad():
+                Mconv7_stage6_L1, Mconv7_stage6_L2 = self.pt_body(data)
+            Mconv7_stage6_L1 = Mconv7_stage6_L1.cpu().numpy()
+            Mconv7_stage6_L2 = Mconv7_stage6_L2.cpu().numpy()
+            heatmap = np.transpose(np.squeeze(Mconv7_stage6_L2), (1, 2, 0))  # output 1 is heatmaps
+            heatmap = cv2.resize(heatmap, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC)
+            heatmap = heatmap[:imageToTest_padded.shape[0] - pad[2], :imageToTest_padded.shape[1] - pad[3], :]
+            heatmap = cv2.resize(heatmap, (oriImg.shape[1], oriImg.shape[0]), interpolation=cv2.INTER_CUBIC)
+            paf = np.transpose(np.squeeze(Mconv7_stage6_L1), (1, 2, 0))  # output 0 is PAFs
+            paf = cv2.resize(paf, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC)
+            paf = paf[:imageToTest_padded.shape[0] - pad[2], :imageToTest_padded.shape[1] - pad[3], :]
+            paf = cv2.resize(paf, (oriImg.shape[1], oriImg.shape[0]), interpolation=cv2.INTER_CUBIC)
+            heatmap_avg += heatmap_avg + heatmap / len(multiplier)
+            paf_avg += + paf / len(multiplier)
+        all_peaks = []
+        peak_counter = 0
+        for part in range(self.njoint_body-1):
+            map_ori = heatmap_avg[:, :, part]
+            one_heatmap = gaussian_filter(map_ori, sigma=3)
+            map_left = np.zeros(one_heatmap.shape)
+            map_left[1:, :] = one_heatmap[:-1, :]
+            map_right = np.zeros(one_heatmap.shape)
+            map_right[:-1, :] = one_heatmap[1:, :]
+            map_up = np.zeros(one_heatmap.shape)
+            map_up[:, 1:] = one_heatmap[:, :-1]
+            map_down = np.zeros(one_heatmap.shape)
+            map_down[:, :-1] = one_heatmap[:, 1:]
+            peaks_binary = np.logical_and.reduce(
+                (one_heatmap >= map_left, one_heatmap >= map_right, one_heatmap >= map_up, one_heatmap >= map_down, one_heatmap > thre1))
+            peaks = list(zip(np.nonzero(peaks_binary)[1], np.nonzero(peaks_binary)[0]))  # note reverse
+            peaks_with_score = [x + (map_ori[x[1], x[0]],) for x in peaks]
+            peak_id = range(peak_counter, peak_counter + len(peaks))
+            peaks_with_score_and_id = [peaks_with_score[i] + (peak_id[i],) for i in range(len(peak_id))]
+            all_peaks.append(peaks_with_score_and_id)
+            peak_counter += len(peaks)
+        if model_type=='body25':
+            # find connection in the specified sequence, center 29 is in the position 15
+            limbSeq = [[1,0],[1,2],[2,3],[3,4],[1,5],[5,6],[6,7],[1,8],[8,9],[9,10],\
+                    [10,11],[8,12],[12,13],[13,14],[0,15],[0,16],[15,17],[16,18],\
+                    [11,24],[11,22],[14,21],[14,19],[22,23],[19,20]]
+            # the middle joints heatmap correpondence
+            mapIdx = [[30, 31],[14, 15],[16, 17],[18, 19],[22, 23],[24, 25],[26, 27],[0, 1],[6, 7],\
+                    [2, 3],[4, 5],  [8, 9],[10, 11],[12, 13],[32, 33],[34, 35],[36,37],[38,39],\
+                    [50,51],[46,47],[44,45],[40,41],[48,49],[42,43]]
+        else:
+            # find connection in the specified sequence, center 29 is in the position 15
+            limbSeq = [[1, 2], [1, 5], [2, 3], [3, 4], [5, 6], [6, 7], [1, 8], [8, 9], \
+                    [9, 10], [1, 11], [11, 12], [12, 13], [1, 0], [0, 14], [14, 16], \
+                    [0, 15], [15, 17], [2, 16], [5, 17]]
+            # the middle joints heatmap correpondence
+            mapIdx = [[12, 13], [20, 21], [14, 15], [16, 17], [22, 23], [24, 25], [0, 1], [2, 3], \
+                    [4, 5], [6, 7], [8, 9], [10, 11], [28, 29], [30, 31], [34, 35], [32, 33], \
+                    [36, 37], [18, 19], [26, 27]]
+        connection_all = []
+        special_k = []
+        mid_num = 10
+        for k in range(len(mapIdx)):
+            score_mid = paf_avg[:, :, mapIdx[k]]
+            candA = all_peaks[limbSeq[k][0]]
+            candB = all_peaks[limbSeq[k][1]]
+            nA = len(candA)
+            nB = len(candB)
+            indexA, indexB = limbSeq[k]
+            if (nA != 0 and nB != 0):
+                connection_candidate = []
+                for i in range(nA):
+                    for j in range(nB):
+                        vec = np.subtract(candB[j][:2], candA[i][:2])
+                        norm = math.sqrt(vec[0] * vec[0] + vec[1] * vec[1])
+                        norm = max(0.001, norm)
+                        vec = np.divide(vec, norm)
+                        startend = list(zip(np.linspace(candA[i][0], candB[j][0], num=mid_num), \
+                                            np.linspace(candA[i][1], candB[j][1], num=mid_num)))
+                        vec_x = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 0] \
+                                          for I in range(len(startend))])
+                        vec_y = np.array([score_mid[int(round(startend[I][1])), int(round(startend[I][0])), 1] \
+                                          for I in range(len(startend))])
+                        score_midpts = np.multiply(vec_x, vec[0]) + np.multiply(vec_y, vec[1])
+                        score_with_dist_prior = sum(score_midpts) / len(score_midpts) + min(
+                            0.5 * oriImg.shape[0] / norm - 1, 0)
+                        criterion1 = len(np.nonzero(score_midpts > thre2)[0]) > 0.8 * len(score_midpts)
+                        criterion2 = score_with_dist_prior > 0
+                        if criterion1 and criterion2:
+                            connection_candidate.append(
+                                [i, j, score_with_dist_prior, score_with_dist_prior + candA[i][2] + candB[j][2]])
+                connection_candidate = sorted(connection_candidate, key=lambda x: x[2], reverse=True)
+                connection = np.zeros((0, 5))
+                for c in range(len(connection_candidate)):
+                    i, j, s = connection_candidate[c][0:3]
+                    if (i not in connection[:, 3] and j not in connection[:, 4]):
+                        connection = np.vstack([connection, [candA[i][3], candB[j][3], s, i, j]])
+                        if (len(connection) >= min(nA, nB)):
+                            break
+                connection_all.append(connection)
+            else:
+                special_k.append(k)
+                connection_all.append([])
+        # last number in each row is the total parts number of that person
+        # the second last number in each row is the score of the overall configuration
+        subset = -1 * np.ones((0, self.njoint_body+1))
+        candidate = np.array([item for sublist in all_peaks for item in sublist])
+        for k in range(len(mapIdx)):
+            if k not in special_k:
+                partAs = connection_all[k][:, 0]
+                partBs = connection_all[k][:, 1]
+                indexA, indexB = np.array(limbSeq[k])
+                for i in range(len(connection_all[k])):  # = 1:size(temp,1)
+                    found = 0
+                    subset_idx = [-1, -1]
+                    for j in range(len(subset)):  # 1:size(subset,1):
+                        if subset[j][indexA] == partAs[i] or subset[j][indexB] == partBs[i]:
+                            subset_idx[found] = j
+                            found += 1
+                    if found == 1:
+                        j = subset_idx[0]
+                        if subset[j][indexB] != partBs[i]:
+                            subset[j][indexB] = partBs[i]
+                            subset[j][-1] += 1
+                            subset[j][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2]
+                    elif found == 2:  # if found 2 and disjoint, merge them
+                        j1, j2 = subset_idx
+                        membership = ((subset[j1] >= 0).astype(int) + (subset[j2] >= 0).astype(int))[:-2]
+                        if len(np.nonzero(membership == 2)[0]) == 0:  # merge
+                            subset[j1][:-2] += (subset[j2][:-2] + 1)
+                            subset[j1][-2:] += subset[j2][-2:]
+                            subset[j1][-2] += connection_all[k][i][2]
+                            subset = np.delete(subset, j2, 0)
+                        else:  # as like found == 1
+                            subset[j1][indexB] = partBs[i]
+                            subset[j1][-1] += 1
+                            subset[j1][-2] += candidate[partBs[i].astype(int), 2] + connection_all[k][i][2]
+                    # if find no partA in the subset, create a new subset
+                    elif not found and k < self.njoint_body-2:
+                        row = -1 * np.ones(self.njoint_body+1)
+                        row[indexA] = partAs[i]
+                        row[indexB] = partBs[i]
+                        row[-1] = 2
+                        row[-2] = sum(candidate[connection_all[k][i, :2].astype(int), 2]) + connection_all[k][i][2]
+                        subset = np.vstack([subset, row])
+        # delete some rows of subset which has few parts occur
+        deleteIdx = []
+        for i in range(len(subset)):
+            if subset[i][-1] < 4 or subset[i][-2] / subset[i][-1] < 0.4:
+                deleteIdx.append(i)
+        subset = np.delete(subset, deleteIdx, axis=0)
+        # subset: n*20 array, 0-17 is the index in candidate, 18 is the total score, 19 is the total parts
+        # candidate: x, y, score, id
+        return candidate, subset
+    def handpos(self, oriImg):
+        scale_search = [0.5, 1.0, 1.5, 2.0]
+        # scale_search = [0.5]
+        boxsize = 368
+        stride = 8
+        padValue = 128
+        thre = 0.05
+        multiplier = [x * boxsize / oriImg.shape[0] for x in scale_search]
+        heatmap_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 22))
+        # paf_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 38))
+        for m in range(len(multiplier)):
+            scale = multiplier[m]
+            imageToTest = cv2.resize(oriImg, (0, 0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
+            imageToTest_padded, pad = util.padRightDownCorner(imageToTest, stride, padValue)
+            im = np.transpose(np.float32(imageToTest_padded[:, :, :, np.newaxis]), (3, 2, 0, 1)) / 256 - 0.5
+            im = np.ascontiguousarray(im)
+            data = torch.from_numpy(im).float()
+            if torch.cuda.is_available():
+                data = data.cuda()
+            # data = data.permute([2, 0, 1]).unsqueeze(0).float()
+            with torch.no_grad():
+                output = self.pt_hand(data).cpu().numpy()
+                # output = self.model(data).numpy()q
+            # extract outputs, resize, and remove padding
+            heatmap = np.transpose(np.squeeze(output), (1, 2, 0))  # output 1 is heatmaps
+            heatmap = cv2.resize(heatmap, (0, 0), fx=stride, fy=stride, interpolation=cv2.INTER_CUBIC)
+            heatmap = heatmap[:imageToTest_padded.shape[0] - pad[2], :imageToTest_padded.shape[1] - pad[3], :]
+            heatmap = cv2.resize(heatmap, (oriImg.shape[1], oriImg.shape[0]), interpolation=cv2.INTER_CUBIC)
+            heatmap_avg += heatmap / len(multiplier)
+        all_peaks = []
+        for part in range(21):
+            map_ori = heatmap_avg[:, :, part]
+            one_heatmap = gaussian_filter(map_ori, sigma=3)
+            binary = np.ascontiguousarray(one_heatmap > thre, dtype=np.uint8)
+            # 全部小于阈值
+            if np.sum(binary) == 0:
+                all_peaks.append([0, 0])
+                continue
+            label_img, label_numbers = label(binary, return_num=True, connectivity=binary.ndim)
+            max_index = np.argmax([np.sum(map_ori[label_img == i]) for i in range(1, label_numbers + 1)]) + 1
+            label_img[label_img != max_index] = 0
+            map_ori[label_img == 0] = 0
+            y, x = util.npmax(map_ori)
+            all_peaks.append([x, y])
+        return np.array(all_peaks)

app.py ADDED Viewed

	@@ -0,0 +1,653 @@

+#Modified by Augmented Startups 2021
+#Face Landmark User Interface with StreamLit
+#Watch Computer Vision Tutorials at www.augmentedstartups.info/YouTube
+import streamlit as st
+st.write("Booting…")
+import os
+os.environ["KERAS_BACKEND"] = "torch"
+import keras
+import cv2
+import numpy as np
+import tempfile
+import time
+from PIL import Image
+from keras.models import Sequential
+import os
+from keras.models import Sequential
+import pickle
+import keras
+from keras.models import Sequential
+import os
+from keras.layers import LSTM, Dense, Bidirectional, Dropout,Input,BatchNormalization
+from model import handpose_model, bodypose_25_model
+from expression_mapping import expression_mapping
+from ISL_Model_parameter import ISLSignPosTranslator
+import pandas as pd
+import numpy as np
+import ffmpeg
+import subprocess
+from typing import NamedTuple
+import json
+import util
+from huggingface_hub import hf_hub_download
+import shutil, platform, subprocess
+st.write("Python:", platform.python_version())
+st.write("FFmpeg exists:", shutil.which("ffmpeg"), "FFprobe:", shutil.which("ffprobe"))
+try:
+    import cv2
+    st.write("OpenCV version:", cv2.__version__)
+except Exception as e:
+    st.error(f"OpenCV import failed: {e}")
+try:
+    import torch, keras
+    st.write("Torch:", torch.__version__, "Keras:", keras.__version__)
+except Exception as e:
+    st.error(f"Torch/Keras import failed: {e}")
+class FFProbeResult(NamedTuple):
+    return_code: int
+    json: str
+    error: str
+def ffprobe(file_path) -> FFProbeResult:
+    command_array = ["ffprobe",
+                     "-v", "quiet",
+                     "-print_format", "json",
+                     "-show_format",
+                     "-show_streams",
+                     file_path]
+    result = subprocess.run(command_array, stdout=subprocess.PIPE, stderr=subprocess.PIPE, universal_newlines=True)
+    return FFProbeResult(return_code=result.returncode,
+                         json=result.stdout,
+                         error=result.stderr)
+X_body_test = [f'bodypeaks_x_{i}' for i in range(15)] + [f'bodypeaks_y_{i}' for i in range(15)]
+X_hand0_test = [f'hand0peaks_x_{i}' for i in range(21)] + [f'hand0peaks_y_{i}' for i in range(21)] + [f'hand0peaks_peaktxt{i}' for i in range(21)]
+X_hand1_test = [f'hand1peaks_x_{i}' for i in range(21)] + [f'hand1peaks_y_{i}' for i in range(21)] + [f'hand1peaks_peaktxt{i}' for i in range(21)]
+feature_columns_new = X_body_test + X_hand0_test + X_hand1_test
+label_columns = ['Expression_encoded']
+@st.cache_resource
+def create_timeseries_data(isl_data,feature_columns,label_columns, window_size=20):
+  """
+  Creates timeseries data from a DataFrame with a specified window size
+  and padding at the end.
+  Args:
+      df (pandas.DataFrame): The input DataFrame.
+      window_size (int, optional): The window size for creating timeseries data. Defaults to 20.
+      pad_value (any, optional): The value to use for padding at the end. Defaults to None.
+  Returns:
+      list: A list of lists, where each inner list represents a window of timeseries data.
+  """
+  # Handle empty DataFrame
+  if isl_data.empty:
+    return [],[]
+  X=[]
+  y=[]
+  i=0
+  for group, file_df in isl_data.groupby(['Type','Expression_encoded','FileName']):
+    expr_types,exprs,filepaths=group
+    # print('expr_types,exprs,filepaths',(expr_types,exprs,filepaths))
+    # print(type(name))
+    # Get the rolling window iterator with padding
+    first_frame=np.zeros((1,156))
+    for idx,x in enumerate([file_df[i:i+window_size] for i in range(0,file_df.shape[0],1)]):#enumerate(file_df.rolling(window=20, step=20,min_periods=1)):
+      # print(f'records processed {idx} of {file_df.shape[0]}')
+      # print(f"{filepaths}-Frame#{x['Frame'].values}/{file_df['Frame'].max()}")
+      if x.shape[0]<window_size:
+        X.append(np.concatenate((np.repeat(first_frame, (window_size-x.shape[0]), axis=0),x[feature_columns].values), axis=0))
+        y.append(exprs)
+        # print('len(X)',len(X))
+        # print('len(y)',len(y))
+        continue
+      X.append(x[feature_columns].values)
+      y.append(exprs)
+      # print('len(X)',len(X))
+      # print('len(y)',len(y))
+      # if idx>4:
+      #   break
+    # i=i+1
+    # if i>4:
+    #   break
+  return X,y
+translation_model=None
+@st.cache_resource
+def get_translator_model():
+    translation_model = Sequential()
+    translation_model.add(Input(shape=((20, 156))))
+    translation_model.add(keras.layers.Masking(mask_value=0.))
+    translation_model.add(BatchNormalization())
+    translation_model.add(Bidirectional(LSTM(32, recurrent_dropout=0.2, return_sequences=True)))
+    translation_model.add(Dropout(0.2))
+    translation_model.add(Bidirectional(LSTM(32, recurrent_dropout=0.2)))
+    translation_model.add(keras.layers.Activation('elu'))
+    translation_model.add(Dense(32, use_bias=False, kernel_initializer='he_normal'))
+    translation_model.add(BatchNormalization())
+    translation_model.add(Dropout(0.2))
+    translation_model.add(keras.layers.Activation('elu'))
+    translation_model.add(Dense(32, kernel_initializer='he_normal',use_bias=False))
+    translation_model.add(BatchNormalization())
+    translation_model.add(keras.layers.Activation('elu'))
+    translation_model.add(Dropout(0.2))
+    translation_model.add(Dense(len(list(expression_mapping.keys())), activation='softmax'))
+    translation_model.load_weights('isl_model_final.keras')
+    return translation_model
+testing_cleaned = hf_hub_download(
+    repo_id="sunilsarolkar/isl-test-data",
+    filename="testing_cleaned.csv",
+    repo_type="dataset"
+)
+testing_df=pd.read_csv(testing_cleaned)
+test_files = hf_hub_download(
+    repo_id="sunilsarolkar/isl-test-data",
+    filename="test_files.csv",
+    repo_type="dataset"
+)
+# test_statistic_df=pd.read_csv('test_statistic.csv')
+test_files_df=pd.read_csv(test_files)
+# mp_drawing = mp.solutions.drawing_utils
+# mp_face_mesh = mp.solutions.face_mesh
+class Writer():
+    def __init__(self, output_file, input_fps, input_framesize, input_pix_fmt,
+                 input_vcodec):
+        # if os.path.exists(output_file):
+        #     os.remove(output_file)
+        self.ff_proc = (
+            ffmpeg
+            .input('pipe:',
+                   format='rawvideo',
+                   pix_fmt="bgr24",
+                   s='%sx%s'%(input_framesize[1],input_framesize[0]),
+                   r=input_fps)
+            .output(output_file, pix_fmt=input_pix_fmt, vcodec=input_vcodec)
+            .overwrite_output()
+            .run_async(pipe_stdin=True)
+        )
+    def __call__(self, frame):
+        self.ff_proc.stdin.write(frame.tobytes())
+    def close(self):
+        self.ff_proc.stdin.close()
+        self.ff_proc.wait()
+st.title('ISL Indian Sign Language translation using LSTM')
+st.markdown(
+    """
+    <style>
+    [data-testid="stSidebar"][aria-expanded="true"] > div:first-child {
+        width: 350px;
+    }
+    [data-testid="stSidebar"][aria-expanded="false"] > div:first-child {
+        width: 350px;
+        margin-left: -350px;
+    }
+    </style>
+    """,
+    unsafe_allow_html=True,
+)
+st.sidebar.title('ISL Sign Language Translation using Openpose')
+st.sidebar.subheader('Parameters')
+frame_wise_outputs={}
+def weighted_average(nums, weights):
+  if sum(weights)==0:
+      return 0
+  return sum(x * y for x, y in zip(nums, weights)) / sum(weights)
+@st.cache_data
+def image_resize(image, width=None, height=None, inter=cv2.INTER_AREA):
+    # initialize the dimensions of the image to be resized and
+    # grab the image size
+    dim = None
+    (h, w) = image.shape[:2]
+    # if both the width and height are None, then return the
+    # original image
+    if width is None and height is None:
+        return image
+    # check to see if the width is None
+    if width is None:
+        # calculate the ratio of the height and construct the
+        # dimensions
+        r = height / float(h)
+        dim = (int(w * r), height)
+    # otherwise, the height is None
+    else:
+        # calculate the ratio of the width and construct the
+        # dimensions
+        r = width / float(w)
+        dim = (width, int(h * r))
+    # resize the image
+    resized = cv2.resize(image, dim, interpolation=inter)
+    # return the resized image
+    return resized
+app_mode = st.sidebar.selectbox('Choose the App mode',
+['About App','Run on Test Videos']
+)
+if app_mode =='About App':
+    st.markdown('In this application we are demonstrating model developed for translating the Indian Sign Language(ISL) using LSTM')
+    st.markdown(
+    """
+    <style>
+    [data-testid="stSidebar"][aria-expanded="true"] > div:first-child {
+        width: 400px;
+    }
+    [data-testid="stSidebar"][aria-expanded="false"] > div:first-child {
+        width: 400px;
+        margin-left: -400px;
+    }
+    </style>
+    """,
+    unsafe_allow_html=True,
+    )
+    # st.video('https://www.youtube.com/watch?v=FMaNNXgB_5c&ab_channel=AugmentedStartups')
+    st.markdown('''
+          # Dataset Used \n
+            This model is trained using [INCLUDE](https://zenodo.org/records/4010759) dataset. \n
+            ### Key Statistics for the dataset is as follows-
+                +-----------------------+-----------------+
+                |    Charasteristics    | INCLUDE-DATASET |
+                +-----------------------+-----------------+
+                | Categories            | 15              |
+                | Words                 | 263             |
+                | Videos                | 4257            |
+                | Avg Videos per class  | 16.3            |
+                | Avg Video Length      | 2.57s           |
+                | Min Video Length      | 1.28s           |
+                | Max Video Length      | 6.16s           |
+                | Frame Rate            | 25fps           |
+                | Resolution            | 1920x1080       |
+                +-----------------------+-----------------+
+            #### Size of each category
+                +--------------------+-------------------+------------------+
+                |      Category      | Number of Classes | Number of Videos |
+                +--------------------+-------------------+------------------+
+                | Adjectives         |                59 |              791 |
+                | Animals            |                 8 |              166 |
+                | Clothes            |                10 |              198 |
+                | Colours            |                11 |              222 |
+                | Days and Time      |                22 |              306 |
+                | Electronics        |                10 |              140 |
+                | Greetings          |                 9 |              185 |
+                | Means of Transport |                 9 |              186 |
+                | Objects at Home    |                27 |              379 |
+                | Occupations        |                16 |              225 |
+                | People             |                26 |              513 |
+                | Places             |                19 |              399 |
+                | Pronouns           |                 8 |              168 |
+                | Seasons            |                 6 |               85 |
+                | Society            |                23 |              324 |
+                |                    |   Categories# 263 | Total Videos-4287|
+                +--------------------+-------------------+------------------+
+            Below are count of videos we were able to process (1986 of 4287). We processed limited set of records due to time/compute constraints.
+            ''')
+    image = np.array(Image.open('eda/categories_processed.png'))
+    # categories_processed = np.array(Image.open('categories_processed.png'))
+    st.image(image)
+    st.markdown('''
+    #### Below are the count of Videos per Label for each Dataframe
+                ''')
+    image = np.array(Image.open('eda/distribution_of_data.png'))
+    # categories_processed = np.array(Image.open('categories_processed.png'))
+    st.image(image)
+    st.markdown('''
+                ### Date Pipeline
+            ''')
+    image = np.array(Image.open('DataPipeline.png'))
+    # categories_processed = np.array(Image.open('categories_processed.png'))
+    st.image(image)
+    st.markdown('''
+        ### Model structure
+            ```
+                translation_model = Sequential()
+                translation_model.add(Input(shape=((20, 156))))
+                translation_model.add(keras.layers.Masking(mask_value=0.))
+                translation_model.add(BatchNormalization())
+                translation_model.add(Bidirectional(LSTM(32, recurrent_dropout=0.2, return_sequences=True)))
+                translation_model.add(Dropout(0.2))
+                translation_model.add(Bidirectional(LSTM(32, recurrent_dropout=0.2)))
+                translation_model.add(keras.layers.Activation('elu'))
+                translation_model.add(Dense(32, use_bias=False, kernel_initializer='he_normal'))
+                translation_model.add(BatchNormalization())
+                translation_model.add(Dropout(0.2))
+                translation_model.add(keras.layers.Activation('elu'))
+                translation_model.add(Dense(32, kernel_initializer='he_normal',use_bias=False))
+                translation_model.add(BatchNormalization())
+                translation_model.add(keras.layers.Activation('elu'))
+                translation_model.add(Dropout(0.2))
+                translation_model.add(Dense(len(list(expression_mapping.keys())), activation='softmax'))
+                isl_translator=ISLSignPosTranslator(bodypose_25_model(),handpose_model(), translation_model)
+            ```
+            Total params: 82,679 (322.96 KB)
+            Trainable params: 82,239 (321.25 KB)
+            Non-trainable params: 440 (1.72 KB)
+        ''')
+    image = np.array(Image.open('model-graph.png'))
+    # categories_processed = np.array(Image.open('categories_processed.png'))
+    st.image(image)
+    st.markdown('''
+            # Training
+              [Tensorboard](https://huggingface.co/cdsteameight/ISL-SignLanguageTranslation/tensorboard)
+        ''')
+elif app_mode =='Run on Test Videos':
+    # placeholder = st.empty()
+    category = st.sidebar.selectbox('Choose Category',
+                                    np.sort(test_files_df['Category'].unique(), axis=-1, kind='mergesort'))
+    # print(category)
+    mask = (test_files_df['Category']==category)
+    test_files_df_category=test_files_df[mask]
+    cls = st.sidebar.selectbox('Choose Class',
+        np.sort(test_files_df_category['Class'].unique(), axis=-1, kind='mergesort')
+    )
+    mask = (test_files_df['Class']==cls)
+    filename = st.sidebar.selectbox('Choose File',
+        np.sort(test_files_df_category[mask]['Filename'].unique(), axis=-1, kind='mergesort')
+    )
+    # print(f'test/{category}/{cls}/{filename}')
+    # mask = (include_df['Filepath'].str.contains(key[0])) & (include_df['type']==key[2]) & (include_df['expression']==key[1])
+    # stframe = st.empty()
+    if st.sidebar.button("Start", type="primary"):
+        mask = (testing_df['FileName'] == filename) & (testing_df['Type']==category)& (testing_df['Expression']==cls)
+        # filtered_df = current_test_df.sort_
+        window_size=20
+        current_test_df=testing_df[mask]
+        X_test_filtered,y_test_filtered = create_timeseries_data(current_test_df,feature_columns_new,label_columns,window_size=window_size)
+        # y_filtered_encoded=to_categorical(y_test_filtered, num_classes=len(df['Expression_encoded'].unique()))
+        X_test_filtered=np.array(X_test_filtered)
+        # encoded_translation=model(frame.reshape(1,frame.shape[0],frame.shape[1]))
+        st.set_option('deprecation.showfileUploaderEncoding', False)
+        # use_webcam = st.sidebar.button('Use Webcam')
+        # record = st.sidebar.checkbox("Record Video")
+        # if record:
+        #     st.checkbox("Recording", value=True)
+        st.sidebar.markdown('---')
+        st.markdown(
+        """
+        <style>
+        [data-testid="stSidebar"][aria-expanded="true"] > div:first-child {
+            width: 400px;
+        }
+        [data-testid="stSidebar"][aria-expanded="false"] > div:first-child {
+            width: 400px;
+            margin-left: -400px;
+        }
+        </style>
+        """,
+        unsafe_allow_html=True,
+            )
+        st.sidebar.markdown('---')
+        st.markdown(' ## Output')
+        runtime_progress = st.empty()
+        with runtime_progress.container():
+            df1 = pd.DataFrame([['--','--']], columns=['Frames Processed','Detected Class'])
+            my_table = st.table(df1)
+        # kpi1, kpi2 = st.columns(2)
+        # with kpi1:
+        #     st.markdown("**Frames Processed**")
+        #     kpi1_text = st.markdown(f'0/{current_test_df.shape[0]}')
+        # with kpi2:
+        #     st.markdown("**Detected Class**")
+        #     kpi2_text = st.markdown("--")
+        view = st.empty()
+        st.markdown("<hr/>", unsafe_allow_html=True)
+        stframes = st.empty()#[st.empty() for _ in range(20)]
+        # video_file_buffer = st.sidebar.file_uploader("Upload a video", type=[ "mp4", "mov",'avi','asf', 'm4v' ])
+        # tfflie = tempfile.NamedTemporaryFile(delete=False)
+        vid_file = hf_hub_download(
+            repo_id="sunilsarolkar/isl-test-data",
+            filename=f'test/{category}/{cls}/{filename}',
+            repo_type="dataset"
+        )
+        vid = cv2.VideoCapture(vid_file)
+        ffprobe_result = ffprobe(vid_file)
+        info = json.loads(ffprobe_result.json)
+        videoinfo = [i for i in info["streams"] if i["codec_type"] == "video"][0]
+        input_fps = videoinfo["avg_frame_rate"]
+        # input_fps = float(input_fps[0])/float(input_fps[1])
+        input_pix_fmt = videoinfo["pix_fmt"]
+        input_vcodec = videoinfo["codec_name"]
+        postfix = info["format"]["format_name"].split(",")[0]
+        # print(f'input_vcodec-{input_vcodec}')
+        width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
+        height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
+        fps_input = int(vid.get(cv2.CAP_PROP_FPS))
+        #codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
+        # codec = cv2.VideoWriter_fourcc('V','P','0','9')
+        # out = cv2.VideoWriter('output1.mp4', codec, fps_input, (width, height))
+        # st.sidebar.text('Input Video')
+        # st.sidebar.video(tfflie.name)
+        fps = 0
+        i = 0
+        # cap = cv2.VideoCapture(video_file,)
+        totalFrames=int(vid.get(cv2.CAP_PROP_FRAME_COUNT))
+        window_size=20
+        # print('current_test_df',current_test_df)
+        # print('totalFrames',totalFrames)
+        window=[]
+        prevTime = 0
+        postfix = info["format"]["format_name"].split(",")[0]
+        with tempfile.NamedTemporaryFile(suffix=f'.{postfix}',delete=False) as tfflie:
+            output_file = tfflie.name#'./output.mp4'
+            # width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
+            # height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
+            fps_input = int(vid.get(cv2.CAP_PROP_FPS))
+            #codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
+            # codec = cv2.VideoWriter_fourcc('m','p','4','v')
+            out = None
+            writer=None
+            weighted_avg_dict={}
+            idx=0
+            for _, row in current_test_df.iterrows():#enumerate(file_df.rolling(window=20, step=20,min_periods=1)):
+                # print(f'captured frame#{idx}')
+                if(vid.isOpened()):
+                    ret, frame = vid.read()
+                    if len(window)<window_size:
+                        canvas=util.drawStickmodel(frame,eval(row['bodypose_circles']),eval(row['bodypose_sticks']),eval(row['handpose_edges']),eval(row['handpose_peaks']))
+                        canvas_with_plot=util.draw_bar_plot_below_image(canvas,{}, f'Prediction bar plot - Frame number {idx+1} [** no predictions]',canvas)
+                        canvas_with_plot=util.draw_bar_plot_below_image(canvas_with_plot,weighted_avg_dict, f'Weighted avg - Frame number {idx+1} [** no predictions]',canvas)
+                        canvas_with_plot=util.add_padding_to_bottom(canvas_with_plot,(255,255,255),100)# Adds padding at bottom
+                        if writer is None:
+                            input_framesize = canvas_with_plot.shape[:2]
+                            writer = Writer(output_file, input_fps, input_framesize, input_pix_fmt,
+                                            input_vcodec)
+                        # if out is None:
+                        #     out=cv2.VideoWriter(output_file, codec, fps_input, frame.shape[:2])
+                        writer(canvas_with_plot)
+                        # out.write(canvas)
+                        with runtime_progress.container():
+                            df1 = pd.DataFrame([[f'{idx+1}/{current_test_df.shape[0]}','<model will output after 20 frames>']], columns=['Frames Processed','Detected Class'])
+                            my_table = st.table(df1)
+                        window.append(frame)
+                        # kpi1_text.write(f"<h1 style='text-align: center; color: red;'>{idx+1}/{current_test_df.shape[0]}</h1>", unsafe_allow_html=True)
+                        # kpi2_text.write(f"<h1 style='text-align: center; color: red;'>--</h1>", unsafe_allow_html=True)
+                        with view.container():
+                            st.image(canvas_with_plot,channels = 'BGR',use_column_width=True)
+                    else:
+                        window[:-1] = window[1:]
+                        window[-1]=frame
+                        translation_model=get_translator_model()
+                        # testing_df[]
+                        encoded_translation = translation_model(X_test_filtered[idx-20].reshape(1,X_test_filtered[idx-20].shape[0],X_test_filtered[idx-20].shape[1]))
+                        encoded_translation=encoded_translation[0].cpu().detach().numpy()
+                        sorted_index=np.argsort(encoded_translation)[::-1]
+                        maxindex=np.argmax(encoded_translation)
+                        top_3_probs = encoded_translation.argsort()[-3:][::-1]  # Get indices of top 3 probabilities (descending order)
+                        top_3_categories = [expression_mapping[i] for i in top_3_probs]  # Convert indices to category names (assuming class_names list exists)
+                        top_3_values = encoded_translation[top_3_probs]  # Get corresponding probabilities
+                        # print(f'{idx} {encoded_translation[maxindex]:0.4f} {maxindex}-{expression_mapping[maxindex]} ')#{[(pi,encoded_translation[pi],expression_mapping[pi]) for pi in sorted_index]}
+                        for category, prob in zip(top_3_categories, top_3_values):
+                            if category not in frame_wise_outputs:
+                                frame_wise_outputs[category]=[]
+                            frame_wise_outputs[category].append(prob)
+                        current_prob={}
+                        for category, prob in zip(top_3_categories, top_3_values):
+                            current_prob[category]=prob
+                        for key in frame_wise_outputs:
+                            weighted_avg_dict[key]=weighted_average(frame_wise_outputs[key],[len(frame_wise_outputs[key]) for i in range(len(frame_wise_outputs[key]))])
+                        sorted_dict = dict(sorted(weighted_avg_dict.items(), key=lambda item: item[1], reverse=True))
+                        canvas=util.drawStickmodel(frame,eval(row['bodypose_circles']),eval(row['bodypose_sticks']),eval(row['handpose_edges']),eval(row['handpose_peaks']))
+                        canvas_with_plot=util.draw_bar_plot_below_image(canvas,current_prob, f'Prediction at frame window({idx-20+1}-{idx+1})',canvas)
+                        canvas_with_plot=util.draw_bar_plot_below_image(canvas_with_plot,weighted_avg_dict, f'Weighted avg till window {idx+1}',canvas)
+                        canvas_with_plot=util.add_padding_to_bottom(canvas_with_plot,(255,255,255),100)
+                        writer(canvas_with_plot)
+                        currTime = time.time()
+                        fps = 1 / (currTime - prevTime)
+                        prevTime = currTime
+                        # out.write(frame)
+                        # if record:
+                        #     #st.checkbox("Recording", value=True)
+                        #     out.write(frame)
+                        #Dashboard
+                        max_prob = float('-inf')  # Initialize with negative infinity
+                        max_key = None
+                        for exp, prob in weighted_avg_dict.items():
+                            if prob > max_prob:
+                                max_prob = prob
+                                max_key = exp
+                        with runtime_progress.container():
+                            df1 = pd.DataFrame([[f'{idx+1}/{current_test_df.shape[0]}',f'{max_key} ({max_prob*100:.2f}%)']], columns=['Frames Processed','Detected Class'])
+                            my_table = st.table(df1)
+                        # kpi1_text.write(f"<h1 style='text-align: center; color: red;'>{idx+1}/{current_test_df.shape[0]}</h1>", unsafe_allow_html=True)
+                        # kpi2_text.write(f"<h1 style='text-align: center; color: red;'>{max_key} ({max_prob*100:.2f}%)</h1>", unsafe_allow_html=True)
+                        # with placeholder.container():
+                        #     # st.write(weighted_avg_dict)
+                        #     # data = {
+                        #     #     "I": 0.7350964583456516,
+                        #     #     "Hello": 0.1078806109726429,
+                        #     #     "you": 0.11776176246348768,
+                        #     #     "you (plural)": 0.12685142129916568
+                        #     # }
+                        #     # Convert the dictionary to a Pandas DataFrame for easier plotting
+                        #     df = pd.DataFrame.from_dict(weighted_avg_dict, orient='index', columns=['Values'])
+                        #     # Create a bar chart with Streamlit
+                        #     st.bar_chart(df)
+                        # frame = cv2.resize(frame,(0,0),fx = 0.8 , fy = 0.8)
+                        # frame = image_resize(image = frame, width = 640)
+                        with view.container():
+                            st.image(canvas_with_plot,channels = 'BGR',use_column_width=True)
+                    idx=idx+1
+            # st.text('Video Processed')
+            with view.container():
+                writer.close()
+                # out. release()
+                output_video = open(output_file,'rb')
+                out_bytes = output_video.read()
+                st.video(out_bytes)
+                # out.release()
+                print(f'Output file - {output_file}')
+            cv2.destroyAllWindows()
+            vid.release()

expression_mapping.py ADDED Viewed

	@@ -0,0 +1,168 @@

+expression_mapping={107: "alive",
+58: "Nice",
+8: "Beautiful",
+115: "dead",
+120: "famous",
+122: "female",
+51: "Mean",
+21: "Deaf",
+111: "clean",
+117: "dirty",
+123: "flat",
+110: "cheap",
+119: "expensive",
+116: "deep",
+99: "Ugly",
+114: "curved",
+12: "Blind",
+142: "poor",
+138: "male",
+126: "hard",
+133: "light",
+137: "low",
+113: "cool",
+144: "rich",
+109: "big large",
+108: "bad",
+112: "cold",
+135: "loose",
+121: "fast",
+141: "old",
+130: "high",
+118: "dry",
+145: "sad",
+131: "hot",
+125: "happy",
+129: "heavy",
+128: "healthy",
+124: "good",
+146: "shallow",
+153: "strong",
+161: "weak",
+157: "thin",
+158: "tight",
+136: "loud",
+139: "narrow",
+134: "long",
+156: "thick",
+148: "short",
+152: "soft",
+150: "slow",
+151: "small little",
+149: "sick",
+154: "tall",
+140: "new",
+143: "quiet",
+95: "Today",
+163: "wide",
+159: "warm",
+96: "Tomorrow",
+162: "wet",
+1: "Afternoon",
+27: "Evening",
+56: "Morning",
+59: "Night",
+166: "young",
+53: "Minute",
+38: "Hour",
+88: "Sunday",
+55: "Month",
+94: "Time",
+70: "Pleased",
+63: "Paper",
+105: "Year",
+80: "Second",
+32: "Gift",
+102: "Week",
+43: "Key",
+48: "Lock",
+4: "Bag",
+106: "Yesterday",
+7: "Bathroom",
+15: "Card",
+66: "Pen",
+45: "Letter",
+9: "Bed",
+2: "Alright",
+67: "Pencil",
+24: "Dream",
+13: "Book",
+44: "Kitchen",
+92: "Telephone",
+23: "Door",
+36: "Hello",
+61: "Page",
+40: "How are you",
+16: "Chair",
+89: "Table",
+97: "Tool",
+68: "Photograph",
+10: "Bedroom",
+103: "Window",
+62: "Paint",
+14: "Box",
+76: "Ring",
+82: "Soap",
+20: "Crowd",
+75: "Restaurant",
+98: "Train Station",
+31: "Friend",
+17: "Child",
+0: "Adult",
+46: "Library",
+39: "House",
+42: "India",
+86: "Street or Road",
+72: "Queen",
+85: "Store or Shop",
+64: "Park",
+77: "School",
+18: "City",
+49: "Market",
+60: "Office",
+132: "it",
+41: "I",
+6: "Bank",
+69: "Player",
+147: "she",
+19: "Court",
+155: "they",
+104: "Winter",
+93: "Temple",
+33: "God",
+50: "Marriage",
+29: "Exercise",
+37: "Hospital",
+34: "Ground",
+25: "Election",
+73: "Race (ethnicity)",
+11: "Bill",
+87: "Summer",
+160: "we",
+127: "he",
+22: "Death",
+84: "Spring",
+47: "Location",
+26: "Energy",
+54: "Money",
+28: "Ex. Monsoon",
+165: "you (plural)",
+65: "Peace",
+5: "Ball",
+71: "Price",
+35: "Gun",
+30: "Fall",
+164: "you",
+81: "Sign",
+100: "University",
+83: "Sport",
+74: "Religion",
+101: "War",
+57: "Newspaper",
+3: "Attack",
+90: "Team",
+78: "Science",
+79: "Season",
+52: "Medicine",
+91: "Technology",
+}

model.py ADDED Viewed

	@@ -0,0 +1,407 @@

+import torch
+from collections import OrderedDict
+import torch
+import torch.nn as nn
+# def make_layers(block, no_relu_layers):
+#     layers = []
+#     for layer_name, v in block.items():
+#         if 'pool' in layer_name:
+#             layer = nn.MaxPool2d(kernel_size=v[0], stride=v[1],
+#                                     padding=v[2])
+#             layers.append((layer_name, layer))
+#         else:
+#             conv2d = nn.Conv2d(in_channels=v[0], out_channels=v[1],
+#                                kernel_size=v[2], stride=v[3],
+#                                padding=v[4])
+#             layers.append((layer_name, conv2d))
+#             if layer_name not in no_relu_layers:
+#                 layers.append(('relu_'+layer_name, nn.ReLU(inplace=True)))
+#     return nn.Sequential(OrderedDict(layers))
+def make_layers(block, no_relu_layers,prelu_layers = []):
+    layers = []
+    for layer_name, v in block.items():
+        if 'pool' in layer_name:
+            layer = nn.MaxPool2d(kernel_size=v[0], stride=v[1],
+                                    padding=v[2])
+            layers.append((layer_name, layer))
+        else:
+            #[3, 64, 3, 1, 1]
+            conv2d = nn.Conv2d(in_channels=v[0], out_channels=v[1],
+                               kernel_size=v[2], stride=v[3],
+                               padding=v[4])
+            layers.append((layer_name, conv2d))
+            if layer_name not in no_relu_layers:
+                if layer_name not in prelu_layers:
+                    layers.append(('relu_'+layer_name, nn.ReLU(inplace=True)))
+                else:
+                    layers.append(('prelu'+layer_name[4:],nn.PReLU(v[1])))
+    return nn.Sequential(OrderedDict(layers))
+def make_layers_Mconv(block,no_relu_layers):
+    modules = []
+    for layer_name, v in block.items():
+        layers = []
+        if 'pool' in layer_name:
+            layer = nn.MaxPool2d(kernel_size=v[0], stride=v[1],
+                                    padding=v[2])
+            layers.append((layer_name, layer))
+        else:
+            conv2d = nn.Conv2d(in_channels=v[0], out_channels=v[1],
+                               kernel_size=v[2], stride=v[3],
+                               padding=v[4])
+            layers.append((layer_name, conv2d))
+            if layer_name not in no_relu_layers:
+                layers.append(('Mprelu'+layer_name[5:], nn.PReLU(v[1])))
+        modules.append(nn.Sequential(OrderedDict(layers)))
+    return nn.ModuleList(modules)
+class bodypose_25_model(nn.Module):
+    def __init__(self):
+        super(bodypose_25_model,self).__init__()
+        # these layers have no relu layer
+        no_relu_layers = ['Mconv7_stage0_L1','Mconv7_stage0_L2',\
+                          'Mconv7_stage1_L1', 'Mconv7_stage1_L2',\
+                          'Mconv7_stage2_L2', 'Mconv7_stage3_L2']
+        prelu_layers = ['conv4_2','conv4_3_CPM','conv4_4_CPM']
+        blocks = {}
+        block0 = OrderedDict([
+                      ('conv1_1', [3, 64, 3, 1, 1]),
+                      ('conv1_2', [64, 64, 3, 1, 1]),
+                      ('pool1_stage1', [2, 2, 0]),
+                      ('conv2_1', [64, 128, 3, 1, 1]),
+                      ('conv2_2', [128, 128, 3, 1, 1]),
+                      ('pool2_stage1', [2, 2, 0]),
+                      ('conv3_1', [128, 256, 3, 1, 1]),
+                      ('conv3_2', [256, 256, 3, 1, 1]),
+                      ('conv3_3', [256, 256, 3, 1, 1]),
+                      ('conv3_4', [256, 256, 3, 1, 1]),
+                      ('pool3_stage1', [2, 2, 0]),
+                      ('conv4_1', [256, 512, 3, 1, 1]),
+                      ('conv4_2', [512, 512, 3, 1, 1]),
+                      ('conv4_3_CPM', [512, 256, 3, 1, 1]),
+                      ('conv4_4_CPM', [256, 128, 3, 1, 1])
+                  ])
+        self.model0 = make_layers(block0, no_relu_layers,prelu_layers)
+        #L2
+        #stage0
+        blocks['Mconv1_stage0_L2'] = OrderedDict([
+                    ('Mconv1_stage0_L2_0',[128,96,3,1,1]),
+                    ('Mconv1_stage0_L2_1',[96,96,3,1,1]),
+                    ('Mconv1_stage0_L2_2',[96,96,3,1,1])
+                    ])
+        for i in range(2,6):
+            blocks['Mconv%d_stage0_L2' % i] = OrderedDict([
+                    ('Mconv%d_stage0_L2_0' % i,[288,96,3,1,1]),
+                    ('Mconv%d_stage0_L2_1' % i,[96,96,3,1,1]),
+                    ('Mconv%d_stage0_L2_2' % i,[96,96,3,1,1])
+              ])
+        blocks['Mconv6_7_stage0_L2'] = OrderedDict([
+                    ('Mconv6_stage0_L2',[288, 256, 1,1,0]),
+                    ('Mconv7_stage0_L2',[256,52,1,1,0])
+              ])
+        #stage1~3
+        for s in range(1,4):
+            blocks['Mconv1_stage%d_L2' % s] = OrderedDict([
+                    ('Mconv1_stage%d_L2_0' % s,[180,128,3,1,1]),
+                    ('Mconv1_stage%d_L2_1' % s,[128,128,3,1,1]),
+                    ('Mconv1_stage%d_L2_2' % s,[128,128,3,1,1])
+                ])
+            for i in range(2,6):
+                blocks['Mconv%d_stage%d_L2' % (i,s)] = OrderedDict([
+                        ('Mconv%d_stage%d_L2_0' % (i,s) ,[384,128,3,1,1]),
+                        ('Mconv%d_stage%d_L2_1' % (i,s) ,[128,128,3,1,1]),
+                        ('Mconv%d_stage%d_L2_2' % (i,s) ,[128,128,3,1,1])
+                    ])
+            blocks['Mconv6_7_stage%d_L2' % s] = OrderedDict([
+                    ('Mconv6_stage%d_L2' % s,[384,512,1,1,0]),
+                    ('Mconv7_stage%d_L2' % s,[512,52,1,1,0])
+                ])
+        #L1
+        #stage0
+        blocks['Mconv1_stage0_L1'] = OrderedDict([
+                ('Mconv1_stage0_L1_0',[180,96,3,1,1]),
+                ('Mconv1_stage0_L1_1',[96,96,3,1,1]),
+                ('Mconv1_stage0_L1_2',[96,96,3,1,1])
+            ])
+        for i in range(2,6):
+            blocks['Mconv%d_stage0_L1' % i] = OrderedDict([
+                    ('Mconv%d_stage0_L1_0' % i,[288,96,3,1,1]),
+                    ('Mconv%d_stage0_L1_1' % i,[96,96,3,1,1]),
+                    ('Mconv%d_stage0_L1_2' % i,[96,96,3,1,1])
+              ])
+        blocks['Mconv6_7_stage0_L1'] = OrderedDict([
+                    ('Mconv6_stage0_L1',[288, 256, 1,1,0]),
+                    ('Mconv7_stage0_L1',[256,26,1,1,0])
+              ])
+        #stage1
+        blocks['Mconv1_stage1_L1'] = OrderedDict([
+                ('Mconv1_stage1_L1_0',[206,128,3,1,1]),
+                ('Mconv1_stage1_L1_1',[128,128,3,1,1]),
+                ('Mconv1_stage1_L1_2',[128,128,3,1,1])
+            ])
+        for i in range(2,6):
+            blocks['Mconv%d_stage1_L1' % i] = OrderedDict([
+                    ('Mconv%d_stage1_L1_0' % i,[384,128,3,1,1]),
+                    ('Mconv%d_stage1_L1_1' % i,[128,128,3,1,1]),
+                    ('Mconv%d_stage1_L1_2' % i,[128,128,3,1,1])
+                ])
+        blocks['Mconv6_7_stage1_L1'] = OrderedDict([
+                ('Mconv6_stage1_L1',[384,512,1,1,0]),
+                ('Mconv7_stage1_L1',[512,26,1,1,0])
+            ])
+        for k in blocks.keys():
+            blocks[k] = make_layers_Mconv(blocks[k], no_relu_layers)
+        self.models = nn.ModuleDict(blocks)
+        #self.model_L2_S0_mconv1 = blocks['Mconv1_stage0_L2']
+        for param in self.parameters():
+            param.requires_grad = False
+    def _Mconv_forward(self,x,models):
+        outs = []
+        out = x
+        for m in models:
+            out = m(out)
+            outs.append(out)
+        return torch.cat(outs,1)
+    def forward(self,x):
+        out0 = self.model0(x)
+        #L2
+        tout = out0
+        for s in range(4):
+            tout = self._Mconv_forward(tout,self.models['Mconv1_stage%d_L2' % s])
+            for v in range(2,6):
+                tout = self._Mconv_forward(tout,self.models['Mconv%d_stage%d_L2' % (v,s)])
+            tout = self.models['Mconv6_7_stage%d_L2' % s][0](tout)
+            tout = self.models['Mconv6_7_stage%d_L2' % s][1](tout)
+            outL2 = tout
+            tout = torch.cat([out0,tout],1)
+        #L1 stage0
+        #tout = torch.cat([out0,outL2],1)
+        tout = self._Mconv_forward(tout, self.models['Mconv1_stage0_L1'])
+        for v in range(2,6):
+            tout = self._Mconv_forward(tout, self.models['Mconv%d_stage0_L1' % v])
+        tout = self.models['Mconv6_7_stage0_L1'][0](tout)
+        tout = self.models['Mconv6_7_stage0_L1'][1](tout)
+        outS0L1 = tout
+        tout = torch.cat([out0,outS0L1,outL2],1)
+        #L1 stage1
+        tout = self._Mconv_forward(tout, self.models['Mconv1_stage1_L1'])
+        for v in range(2,6):
+            tout = self._Mconv_forward(tout, self.models['Mconv%d_stage1_L1' % v])
+        tout = self.models['Mconv6_7_stage1_L1'][0](tout)
+        outS1L1 = self.models['Mconv6_7_stage1_L1'][1](tout)
+        return outL2, outS1L1
+class bodypose_model(nn.Module):
+    def __init__(self):
+        super(bodypose_model, self).__init__()
+        # these layers have no relu layer
+        no_relu_layers = ['conv5_5_CPM_L1', 'conv5_5_CPM_L2', 'Mconv7_stage2_L1',\
+                          'Mconv7_stage2_L2', 'Mconv7_stage3_L1', 'Mconv7_stage3_L2',\
+                          'Mconv7_stage4_L1', 'Mconv7_stage4_L2', 'Mconv7_stage5_L1',\
+                          'Mconv7_stage5_L2', 'Mconv7_stage6_L1', 'Mconv7_stage6_L1']
+        blocks = {}
+        block0 = OrderedDict([
+                      ('conv1_1', [3, 64, 3, 1, 1]),
+                      ('conv1_2', [64, 64, 3, 1, 1]),
+                      ('pool1_stage1', [2, 2, 0]),
+                      ('conv2_1', [64, 128, 3, 1, 1]),
+                      ('conv2_2', [128, 128, 3, 1, 1]),
+                      ('pool2_stage1', [2, 2, 0]),
+                      ('conv3_1', [128, 256, 3, 1, 1]),
+                      ('conv3_2', [256, 256, 3, 1, 1]),
+                      ('conv3_3', [256, 256, 3, 1, 1]),
+                      ('conv3_4', [256, 256, 3, 1, 1]),
+                      ('pool3_stage1', [2, 2, 0]),
+                      ('conv4_1', [256, 512, 3, 1, 1]),
+                      ('conv4_2', [512, 512, 3, 1, 1]),
+                      ('conv4_3_CPM', [512, 256, 3, 1, 1]),
+                      ('conv4_4_CPM', [256, 128, 3, 1, 1])
+                  ])
+        # Stage 1
+        block1_1 = OrderedDict([
+                        ('conv5_1_CPM_L1', [128, 128, 3, 1, 1]),
+                        ('conv5_2_CPM_L1', [128, 128, 3, 1, 1]),
+                        ('conv5_3_CPM_L1', [128, 128, 3, 1, 1]),
+                        ('conv5_4_CPM_L1', [128, 512, 1, 1, 0]),
+                        ('conv5_5_CPM_L1', [512, 38, 1, 1, 0])
+                    ])
+        block1_2 = OrderedDict([
+                        ('conv5_1_CPM_L2', [128, 128, 3, 1, 1]),
+                        ('conv5_2_CPM_L2', [128, 128, 3, 1, 1]),
+                        ('conv5_3_CPM_L2', [128, 128, 3, 1, 1]),
+                        ('conv5_4_CPM_L2', [128, 512, 1, 1, 0]),
+                        ('conv5_5_CPM_L2', [512, 19, 1, 1, 0])
+                    ])
+        blocks['block1_1'] = block1_1
+        blocks['block1_2'] = block1_2
+        self.model0 = make_layers(block0, no_relu_layers)
+        # Stages 2 - 6
+        for i in range(2, 7):
+            blocks['block%d_1' % i] = OrderedDict([
+                    ('Mconv1_stage%d_L1' % i, [185, 128, 7, 1, 3]),
+                    ('Mconv2_stage%d_L1' % i, [128, 128, 7, 1, 3]),
+                    ('Mconv3_stage%d_L1' % i, [128, 128, 7, 1, 3]),
+                    ('Mconv4_stage%d_L1' % i, [128, 128, 7, 1, 3]),
+                    ('Mconv5_stage%d_L1' % i, [128, 128, 7, 1, 3]),
+                    ('Mconv6_stage%d_L1' % i, [128, 128, 1, 1, 0]),
+                    ('Mconv7_stage%d_L1' % i, [128, 38, 1, 1, 0])
+                ])
+            blocks['block%d_2' % i] = OrderedDict([
+                    ('Mconv1_stage%d_L2' % i, [185, 128, 7, 1, 3]),
+                    ('Mconv2_stage%d_L2' % i, [128, 128, 7, 1, 3]),
+                    ('Mconv3_stage%d_L2' % i, [128, 128, 7, 1, 3]),
+                    ('Mconv4_stage%d_L2' % i, [128, 128, 7, 1, 3]),
+                    ('Mconv5_stage%d_L2' % i, [128, 128, 7, 1, 3]),
+                    ('Mconv6_stage%d_L2' % i, [128, 128, 1, 1, 0]),
+                    ('Mconv7_stage%d_L2' % i, [128, 19, 1, 1, 0])
+                ])
+        for k in blocks.keys():
+            blocks[k] = make_layers(blocks[k], no_relu_layers)
+        self.model1_1 = blocks['block1_1']
+        self.model2_1 = blocks['block2_1']
+        self.model3_1 = blocks['block3_1']
+        self.model4_1 = blocks['block4_1']
+        self.model5_1 = blocks['block5_1']
+        self.model6_1 = blocks['block6_1']
+        self.model1_2 = blocks['block1_2']
+        self.model2_2 = blocks['block2_2']
+        self.model3_2 = blocks['block3_2']
+        self.model4_2 = blocks['block4_2']
+        self.model5_2 = blocks['block5_2']
+        self.model6_2 = blocks['block6_2']
+        for param in self.parameters():
+            param.requires_grad = False
+    def forward(self, x):
+        out1 = self.model0(x)
+        out1_1 = self.model1_1(out1)
+        out1_2 = self.model1_2(out1)
+        out2 = torch.cat([out1_1, out1_2, out1], 1)
+        out2_1 = self.model2_1(out2)
+        out2_2 = self.model2_2(out2)
+        out3 = torch.cat([out2_1, out2_2, out1], 1)
+        out3_1 = self.model3_1(out3)
+        out3_2 = self.model3_2(out3)
+        out4 = torch.cat([out3_1, out3_2, out1], 1)
+        out4_1 = self.model4_1(out4)
+        out4_2 = self.model4_2(out4)
+        out5 = torch.cat([out4_1, out4_2, out1], 1)
+        out5_1 = self.model5_1(out5)
+        out5_2 = self.model5_2(out5)
+        out6 = torch.cat([out5_1, out5_2, out1], 1)
+        out6_1 = self.model6_1(out6)
+        out6_2 = self.model6_2(out6)
+        return out6_1, out6_2
+class handpose_model(nn.Module):
+    def __init__(self):
+        super(handpose_model, self).__init__()
+        # these layers have no relu layer
+        no_relu_layers = ['conv6_2_CPM', 'Mconv7_stage2', 'Mconv7_stage3',\
+                          'Mconv7_stage4', 'Mconv7_stage5', 'Mconv7_stage6']
+        # stage 1
+        block1_0 = OrderedDict([
+                ('conv1_1', [3, 64, 3, 1, 1]),
+                ('conv1_2', [64, 64, 3, 1, 1]),
+                ('pool1_stage1', [2, 2, 0]),
+                ('conv2_1', [64, 128, 3, 1, 1]),
+                ('conv2_2', [128, 128, 3, 1, 1]),
+                ('pool2_stage1', [2, 2, 0]),
+                ('conv3_1', [128, 256, 3, 1, 1]),
+                ('conv3_2', [256, 256, 3, 1, 1]),
+                ('conv3_3', [256, 256, 3, 1, 1]),
+                ('conv3_4', [256, 256, 3, 1, 1]),
+                ('pool3_stage1', [2, 2, 0]),
+                ('conv4_1', [256, 512, 3, 1, 1]),
+                ('conv4_2', [512, 512, 3, 1, 1]),
+                ('conv4_3', [512, 512, 3, 1, 1]),
+                ('conv4_4', [512, 512, 3, 1, 1]),
+                ('conv5_1', [512, 512, 3, 1, 1]),
+                ('conv5_2', [512, 512, 3, 1, 1]),
+                ('conv5_3_CPM', [512, 128, 3, 1, 1])
+            ])
+        block1_1 = OrderedDict([
+            ('conv6_1_CPM', [128, 512, 1, 1, 0]),
+            ('conv6_2_CPM', [512, 22, 1, 1, 0])
+        ])
+        blocks = {}
+        blocks['block1_0'] = block1_0
+        blocks['block1_1'] = block1_1
+        # stage 2-6
+        for i in range(2, 7):
+            blocks['block%d' % i] = OrderedDict([
+                    ('Mconv1_stage%d' % i, [150, 128, 7, 1, 3]),
+                    ('Mconv2_stage%d' % i, [128, 128, 7, 1, 3]),
+                    ('Mconv3_stage%d' % i, [128, 128, 7, 1, 3]),
+                    ('Mconv4_stage%d' % i, [128, 128, 7, 1, 3]),
+                    ('Mconv5_stage%d' % i, [128, 128, 7, 1, 3]),
+                    ('Mconv6_stage%d' % i, [128, 128, 1, 1, 0]),
+                    ('Mconv7_stage%d' % i, [128, 22, 1, 1, 0])
+                ])
+        for k in blocks.keys():
+            blocks[k] = make_layers(blocks[k], no_relu_layers)
+        self.model1_0 = blocks['block1_0']
+        self.model1_1 = blocks['block1_1']
+        self.model2 = blocks['block2']
+        self.model3 = blocks['block3']
+        self.model4 = blocks['block4']
+        self.model5 = blocks['block5']
+        self.model6 = blocks['block6']
+        for param in self.parameters():
+            param.requires_grad = False
+    def forward(self, x):
+        out1_0 = self.model1_0(x)
+        out1_1 = self.model1_1(out1_0)
+        concat_stage2 = torch.cat([out1_1, out1_0], 1)
+        out_stage2 = self.model2(concat_stage2)
+        concat_stage3 = torch.cat([out_stage2, out1_0], 1)
+        out_stage3 = self.model3(concat_stage3)
+        concat_stage4 = torch.cat([out_stage3, out1_0], 1)
+        out_stage4 = self.model4(concat_stage4)
+        concat_stage5 = torch.cat([out_stage4, out1_0], 1)
+        out_stage5 = self.model5(concat_stage5)
+        concat_stage6 = torch.cat([out_stage5, out1_0], 1)
+        out_stage6 = self.model6(concat_stage6)
+        return out_stage6

packages.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+ffmpeg
+libgl1
+libglib2.0-0
+libsm6
+libxrender1
+libxext6

requirements.txt ADDED Viewed

	@@ -0,0 +1,22 @@

+opencv_python_headless
+streamlit
+numpy
+Pillow
+numpy
+matplotlib
+opencv-python
+scipy
+scikit-image
+tqdm
+pandas
+torch
+torchaudio
+torchvision
+torchtext
+torchdata
+av
+keras
+ffmpeg
+ffmpeg-python
+seaborn[stats]
+huggingface_hub

util.py ADDED Viewed

	@@ -0,0 +1,463 @@

+import numpy as np
+import math
+import cv2
+import matplotlib
+from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
+from matplotlib.figure import Figure
+import numpy as np
+import matplotlib.pyplot as plt
+import cv2
+import copy
+import seaborn as sns
+def padRightDownCorner(img, stride, padValue):
+    h = img.shape[0]
+    w = img.shape[1]
+    pad = 4 * [None]
+    pad[0] = 0 # up
+    pad[1] = 0 # left
+    pad[2] = 0 if (h % stride == 0) else stride - (h % stride) # down
+    pad[3] = 0 if (w % stride == 0) else stride - (w % stride) # right
+    img_padded = img
+    pad_up = np.tile(img_padded[0:1, :, :]*0 + padValue, (pad[0], 1, 1))
+    img_padded = np.concatenate((pad_up, img_padded), axis=0)
+    pad_left = np.tile(img_padded[:, 0:1, :]*0 + padValue, (1, pad[1], 1))
+    img_padded = np.concatenate((pad_left, img_padded), axis=1)
+    pad_down = np.tile(img_padded[-2:-1, :, :]*0 + padValue, (pad[2], 1, 1))
+    img_padded = np.concatenate((img_padded, pad_down), axis=0)
+    pad_right = np.tile(img_padded[:, -2:-1, :]*0 + padValue, (1, pad[3], 1))
+    img_padded = np.concatenate((img_padded, pad_right), axis=1)
+    return img_padded, pad
+# transfer caffe model to pytorch which will match the layer name
+def transfer(model, model_weights):
+    transfered_model_weights = {}
+    for weights_name in model.state_dict().keys():
+        if len(weights_name.split('.'))>4:  # body25
+            transfered_model_weights[weights_name] = model_weights['.'.join(
+                weights_name.split('.')[3:])]
+        else:
+            transfered_model_weights[weights_name] = model_weights['.'.join(
+                weights_name.split('.')[1:])]
+    return transfered_model_weights
+# draw the body keypoint and lims
+def draw_bodypose(canvas, candidate, subset, model_type='body25'):
+    stickwidth = 4
+    if model_type == 'body25':
+        limbSeq = [[1,0],[1,2],[2,3],[3,4],[1,5],[5,6],[6,7],[1,8],[8,9],[9,10],\
+                [10,11],[8,12],[12,13],[13,14],[0,15],[0,16],[15,17],[16,18],\
+                [11,24],[11,22],[14,21],[14,19],[22,23],[19,20]]
+        njoint = 25
+    else:
+        limbSeq = [[1, 2], [1, 5], [2, 3], [3, 4], [5, 6], [6, 7], [1, 8], [8, 9], \
+                    [9, 10], [1, 11], [11, 12], [12, 13], [1, 0], [0, 14], [14, 16], \
+                    [0, 15], [15, 17], [2, 16], [5, 17]]
+        njoint = 18
+    # colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0], [85, 255, 0], [0, 255, 0], \
+    #           [0, 255, 85], [0, 255, 170], [0, 255, 255], [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], \
+    #           [170, 0, 255], [255, 0, 255], [255, 0, 170], [255, 0, 85]]
+    colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0], [85, 255, 0], [0, 255, 0], \
+            [0, 255, 85], [0, 255, 170], [0, 255, 255], [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], \
+            [170, 0, 255], [255, 0, 255], [255, 0, 170], [255, 0, 85], [255,255,0], [255,255,85], [255,255,170],\
+                [255,255,255],[170,255,255],[85,255,255],[0,255,255]]
+    for i in range(njoint):
+        for n in range(len(subset)):
+            index = int(subset[n][i])
+            if index == -1:
+                continue
+            x, y = candidate[index][0:2]
+            cv2.circle(canvas, (int(x), int(y)), 4, colors[i], thickness=-1)
+    for i in range(njoint-1):
+        for n in range(len(subset)):
+            index = subset[n][np.array(limbSeq[i])]
+            if -1 in index:
+                continue
+            cur_canvas = canvas.copy()
+            Y = candidate[index.astype(int), 0]
+            X = candidate[index.astype(int), 1]
+            mX = np.mean(X)
+            mY = np.mean(Y)
+            length = ((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) ** 0.5
+            angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1]))
+            # print('original (mX,mY,length,angle)',(mX,mY,length,angle))
+            # print(f'original cv2.ellipse2Poly((int({mY}), int({mX})), (int({length} / 2), {stickwidth}), int({angle}), 0, 360, 1)')
+            polygon = cv2.ellipse2Poly((int(mY), int(mX)), (int(length / 2), stickwidth), int(angle), 0, 360, 1)
+            # print(f'cv2.fillConvexPoly(cur_canvas, polygon, colors[i])')
+            cv2.fillConvexPoly(cur_canvas, polygon, colors[i])
+            canvas = cv2.addWeighted(canvas, 0.4, cur_canvas, 0.6, 0)
+    # plt.imsave("preview.jpg", canvas[:, :, [2, 1, 0]])
+    # plt.imshow(canvas[:, :, [2, 1, 0]])
+    return canvas
+#subsets [[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, -1.0, 11.0, 12.0, -1.0, 13.0, 14.0, 15.0, 16.0, -1.0, -1.0, -1.0, -1.0, -1.0, -1.0, 26.650803712300775, 17.0]]
+#candidates [[983.0, 172.0, 0.8991263508796692, 0.0], [980.0, 352.0, 0.930037796497345, 1.0], [848.0, 342.0, 0.8652207255363464, 2.0], [811.0, 598.0, 0.8107873797416687, 3.0], [806.0, 817.0, 0.7464589476585388, 4.0], [1120.0, 361.0, 0.8538270592689514, 5.0], [1148.0, 601.0, 0.6797391176223755, 6.0], [1149.0, 834.0, 0.5189468264579773, 7.0], [968.0, 757.0, 0.6468111276626587, 8.0], [876.0, 756.0, 0.6387956142425537, 9.0], [854.0, 1072.0, 0.4211728572845459, 10.0], [1057.0, 759.0, 0.6311940550804138, 11.0], [1038.0, 1072.0, 0.38531172275543213, 12.0], [955.0, 146.0, 0.925083339214325, 13.0], [1016.0, 151.0, 0.9023998379707336, 14.0], [909.0, 167.0, 0.9096773862838745, 15.0], [1057.0, 173.0, 0.8605436086654663, 16.0]]
+def  get_bodypose(candidate, subset, model_type='coco'):
+    stickwidth = 4
+    if model_type == 'body25':
+        limbSeq = [[1,0],[1,2],[2,3],[3,4],[1,5],[5,6],[6,7],[1,8],[8,9],[9,10],\
+                [10,11],[8,12],[12,13],[13,14],[0,15],[0,16],[15,17],[16,18],\
+                [11,24],[11,22],[14,21],[14,19],[22,23],[19,20]]
+        njoint = 25
+    else:
+        limbSeq = [[1, 2], [1, 5], [2, 3], [3, 4], [5, 6], [6, 7], [1, 8], [8, 9], \
+                    [9, 10], [1, 11], [11, 12], [12, 13], [1, 0], [0, 14], [14, 16], \
+                    [0, 15], [15, 17], [2, 16], [5, 17]]
+        njoint = 18
+    # colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0], [85, 255, 0], [0, 255, 0], \
+    #           [0, 255, 85], [0, 255, 170], [0, 255, 255], [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], \
+    #           [170, 0, 255], [255, 0, 255], [255, 0, 170], [255, 0, 85]]
+    colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0], [85, 255, 0], [0, 255, 0], \
+            [0, 255, 85], [0, 255, 170], [0, 255, 255], [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], \
+            [170, 0, 255], [255, 0, 255], [255, 0, 170], [255, 0, 85], [255,255,0], [255,255,85], [255,255,170],\
+                [255,255,255],[170,255,255],[85,255,255],[0,255,255]]
+    x_y_circles=[]
+    for i in range(njoint):
+        for n in range(len(subset)):
+            index = int(subset[n][i])
+            if index == -1:
+                continue
+            x, y = candidate[index][0:2] # 983.0, 172.0
+            x_y_circles.append((x, y))
+            # cv2.circle(canvas, (int(x), int(y)), 4, colors[i], thickness=-1)
+    x_y_sticks=[]
+    for i in range(njoint-1):
+        for n in range(len(subset)):
+            index = subset[n][np.array(limbSeq[i])] #0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, -1.0, 11.0, 12.0, -1.0, 13.0, 14.0, 15.0, 16.0, -1.0, -1.0, -1.0, -1.0, -1.0
+            if -1 in index:
+                continue
+            # cur_canvas = canvas.copy()
+            Y = candidate[index.astype(int), 0]
+            X = candidate[index.astype(int), 1]
+            mX = np.mean(X)
+            mY = np.mean(Y)
+            length = ((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) ** 0.5
+            angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1]))
+            x_y_sticks.append((mY, mX,angle,length))
+            # print('new  (mX,mY,length,angle)',(mX,mY,length,angle))
+            # polygon = cv2.ellipse2Poly((int(mY), int(mX)), (int(length / 2), stickwidth), int(angle), 0, 360, 1)
+            # cv2.fillConvexPoly(cur_canvas, polygon, colors[i])
+            # canvas = cv2.addWeighted(canvas, 0.4, cur_canvas, 0.6, 0)
+    # plt.imsave("preview.jpg", canvas[:, :, [2, 1, 0]])
+    # plt.imshow(canvas[:, :, [2, 1, 0]])
+    return (x_y_circles,x_y_sticks,)
+#all_hands_peaks[[[0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [1100, 858], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0]], [[0, 0], [858, 859], [868, 894], [873, 938], [0, 0], [802, 920], [807, 961], [821, 977], [836, 992], [0, 0], [781, 955], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0], [0, 0]]]
+def draw_handpose(canvas, all_hand_peaks, show_number=False):
+    edges = [[0, 1], [1, 2], [2, 3], [3, 4], [0, 5], [5, 6], [6, 7], [7, 8], [0, 9], [9, 10], \
+             [10, 11], [11, 12], [0, 13], [13, 14], [14, 15], [15, 16], [0, 17], [17, 18], [18, 19], [19, 20]]
+    fig = Figure(figsize=plt.figaspect(canvas))
+    fig.subplots_adjust(0, 0, 1, 1)
+    fig.subplots_adjust(bottom=0, top=1, left=0, right=1)
+    bg = FigureCanvas(fig)
+    ax = fig.subplots()
+    ax.axis('off')
+    ax.imshow(canvas)
+    width, height = ax.figure.get_size_inches() * ax.figure.get_dpi()
+    for peaks in all_hand_peaks:
+        for ie, e in enumerate(edges):
+            if np.sum(np.all(peaks[e], axis=1)==0)==0:
+                x1, y1 = peaks[e[0]]
+                x2, y2 = peaks[e[1]]
+                # print(f'original ax.plot([{x1}, {x2}], [{y1}, {y2}], color=matplotlib.colors.hsv_to_rgb([ie/float({len(edges)}), 1.0, 1.0]))')
+                ax.plot([x1, x2], [y1, y2], color=matplotlib.colors.hsv_to_rgb([ie/float(len(edges)), 1.0, 1.0]))
+        for i, keyponit in enumerate(peaks):
+            x, y = keyponit
+            # print(f"original ax.plot({x}, {y}, 'r.')")
+            ax.plot(x, y, 'r.')
+            if show_number:
+                ax.text(x, y, str(i))
+    # print(f'width = {width}, height={height}')
+    bg.draw()
+    canvas = np.fromstring(bg.tostring_rgb(), dtype='uint8').reshape(int(height), int(width), 3)
+    return canvas
+def get_handpose(all_hand_peaks, show_number=False):
+    edges = [[0, 1], [1, 2], [2, 3], [3, 4], [0, 5], [5, 6], [6, 7], [7, 8], [0, 9], [9, 10], \
+             [10, 11], [11, 12], [0, 13], [13, 14], [14, 15], [15, 16], [0, 17], [17, 18], [18, 19], [19, 20]]
+    # fig = Figure(figsize=plt.figaspect(canvas))
+    # fig.subplots_adjust(0, 0, 1, 1)
+    # fig.subplots_adjust(bottom=0, top=1, left=0, right=1)
+    # bg = FigureCanvas(fig)
+    # ax = fig.subplots()
+    # ax.axis('off')
+    # ax.imshow(canvas)
+    # width, height = ax.figure.get_size_inches() * ax.figure.get_dpi()
+    export_edges=[[],[]]
+    export_peaks=[[],[]]
+    for idx,peaks in enumerate(all_hand_peaks):
+        for ie, e in enumerate(edges):
+            if np.sum(np.all(peaks[e], axis=1)==0)==0:
+                x1, y1 = peaks[e[0]]
+                x2, y2 = peaks[e[1]]
+                export_edges[idx].append((ie,(x1, y1),(x2, y2)))
+                # ax.plot([x1, x2], [y1, y2], color=matplotlib.colors.hsv_to_rgb([ie/float(len(edges)), 1.0, 1.0]))
+        for i, keyponit in enumerate(peaks):
+            x, y = keyponit
+            # ax.plot(x, y, 'r.')
+            # if show_number:
+            #     ax.text(x, y, str(i))
+            export_peaks[idx].append((x,y,str(i)))
+    # bg.draw()
+    # canvas = np.fromstring(bg.tostring_rgb(), dtype='uint8').reshape(int(height), int(width), 3)
+    return (export_edges,export_peaks)
+# image drawed by opencv is not good.
+def draw_handpose_by_opencv(canvas, peaks, show_number=False):
+    edges = [[0, 1], [1, 2], [2, 3], [3, 4], [0, 5], [5, 6], [6, 7], [7, 8], [0, 9], [9, 10], \
+             [10, 11], [11, 12], [0, 13], [13, 14], [14, 15], [15, 16], [0, 17], [17, 18], [18, 19], [19, 20]]
+    # cv2.rectangle(canvas, (x, y), (x+w, y+w), (0, 255, 0), 2, lineType=cv2.LINE_AA)
+    # cv2.putText(canvas, 'left' if is_left else 'right', (x, y), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)
+    for ie, e in enumerate(edges):
+        if np.sum(np.all(peaks[e], axis=1)==0)==0:
+            x1, y1 = peaks[e[0]]
+            x2, y2 = peaks[e[1]]
+            cv2.line(canvas, (x1, y1), (x2, y2), matplotlib.colors.hsv_to_rgb([ie/float(len(edges)), 1.0, 1.0])*255, thickness=2)
+    for i, keyponit in enumerate(peaks):
+        x, y = keyponit
+        cv2.circle(canvas, (x, y), 4, (0, 0, 255), thickness=-1)
+        if show_number:
+            cv2.putText(canvas, str(i), (x, y), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (0, 0, 0), lineType=cv2.LINE_AA)
+    return canvas
+# detect hand according to body pose keypoints
+# please refer to https://github.com/CMU-Perceptual-Computing-Lab/openpose/blob/master/src/openpose/hand/handDetector.cpp
+def handDetect(candidate, subset, oriImg):
+    # right hand: wrist 4, elbow 3, shoulder 2
+    # left hand: wrist 7, elbow 6, shoulder 5
+    ratioWristElbow = 0.33
+    detect_result = []
+    image_height, image_width = oriImg.shape[0:2]
+    #print(f'handDetect ---------- {image_height}, {image_width}')
+    for person in subset.astype(int):
+        # if any of three not detected
+        has_left = np.sum(person[[5, 6, 7]] == -1) == 0
+        has_right = np.sum(person[[2, 3, 4]] == -1) == 0
+        if not (has_left or has_right):
+            continue
+        hands = []
+        #left hand
+        if has_left:
+            left_shoulder_index, left_elbow_index, left_wrist_index = person[[5, 6, 7]]
+            x1, y1 = candidate[left_shoulder_index][:2]
+            x2, y2 = candidate[left_elbow_index][:2]
+            x3, y3 = candidate[left_wrist_index][:2]
+            hands.append([x1, y1, x2, y2, x3, y3, True])
+        # right hand
+        if has_right:
+            right_shoulder_index, right_elbow_index, right_wrist_index = person[[2, 3, 4]]
+            x1, y1 = candidate[right_shoulder_index][:2]
+            x2, y2 = candidate[right_elbow_index][:2]
+            x3, y3 = candidate[right_wrist_index][:2]
+            hands.append([x1, y1, x2, y2, x3, y3, False])
+        for x1, y1, x2, y2, x3, y3, is_left in hands:
+            # pos_hand = pos_wrist + ratio * (pos_wrist - pos_elbox) = (1 + ratio) * pos_wrist - ratio * pos_elbox
+            # handRectangle.x = posePtr[wrist*3] + ratioWristElbow * (posePtr[wrist*3] - posePtr[elbow*3]);
+            # handRectangle.y = posePtr[wrist*3+1] + ratioWristElbow * (posePtr[wrist*3+1] - posePtr[elbow*3+1]);
+            # const auto distanceWristElbow = getDistance(poseKeypoints, person, wrist, elbow);
+            # const auto distanceElbowShoulder = getDistance(poseKeypoints, person, elbow, shoulder);
+            # handRectangle.width = 1.5f * fastMax(distanceWristElbow, 0.9f * distanceElbowShoulder);
+            x = x3 + ratioWristElbow * (x3 - x2)
+            y = y3 + ratioWristElbow * (y3 - y2)
+            distanceWristElbow = math.sqrt((x3 - x2) ** 2 + (y3 - y2) ** 2)
+            distanceElbowShoulder = math.sqrt((x2 - x1) ** 2 + (y2 - y1) ** 2)
+            width = 1.5 * max(distanceWristElbow, 0.9 * distanceElbowShoulder)
+            # x-y refers to the center --> offset to topLeft point
+            # handRectangle.x -= handRectangle.width / 2.f;
+            # handRectangle.y -= handRectangle.height / 2.f;
+            x -= width / 2
+            y -= width / 2  # width = height
+            # overflow the image
+            if x < 0: x = 0
+            if y < 0: y = 0
+            width1 = width
+            width2 = width
+            if x + width > image_width: width1 = image_width - x
+            if y + width > image_height: width2 = image_height - y
+            width = min(width1, width2)
+            # the max hand box value is 20 pixels
+            if width >= 20:
+                detect_result.append([int(x), int(y), int(width), is_left])
+    '''
+    return value: [[x, y, w, True if left hand else False]].
+    width=height since the network require squared input.
+    x, y is the coordinate of top left
+    '''
+    return detect_result
+def drawStickmodel(oriImg,x_ytupple,x_y_sticks,export_edges,export_peaks):
+    canvas = copy.deepcopy(oriImg)
+    colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0], [85, 255, 0], [0, 255, 0],
+            [0, 255, 85], [0, 255, 170], [0, 255, 255], [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255],
+            [170, 0, 255], [255, 0, 255], [255, 0, 170], [255, 0, 85], [255,255,0], [255,255,85], [255,255,170],
+                [255,255,255],[170,255,255],[85,255,255],[0,255,255]]
+    stickwidth=4
+    for idx,(mX,mY,angle,length) in enumerate(x_y_sticks):
+        cur_canvas = canvas.copy()
+        # print(f'new cv2.ellipse2Poly((int({mY}), int({mX})), (int({length} / 2), {stickwidth}), int({angle}), 0, 360, 1)')
+        polygon = cv2.ellipse2Poly((int(mX), int(mY)), (int(length / 2), stickwidth), int(angle), 0, 360, 1)
+        cv2.fillConvexPoly(cur_canvas, polygon, colors[idx])
+        canvas = cv2.addWeighted(canvas, 0.4, cur_canvas, 0.6, 0)
+    for idx,(x,y) in enumerate(x_ytupple):
+        cv2.circle(canvas, (int(x), int(y)), 4, colors[idx], thickness=-1)
+    ## Handpose
+    fig = Figure(figsize=plt.figaspect(canvas))
+    fig.subplots_adjust(0, 0, 1, 1)
+    fig.subplots_adjust(bottom=0, top=1, left=0, right=1)
+    bg = FigureCanvas(fig)
+    ax = fig.subplots()
+    ax.axis('off')
+    ax.imshow(canvas)
+    edges = [[0, 1], [1, 2], [2, 3], [3, 4], [0, 5], [5, 6], [6, 7], [7, 8], [0, 9], [9, 10], \
+                [10, 11], [11, 12], [0, 13], [13, 14], [14, 15], [15, 16], [0, 17], [17, 18], [18, 19], [19, 20]]
+    for both_hand_edges in export_edges:
+        for (ie,(x1, y1),(x2, y2)) in both_hand_edges:
+            # print(f'new ax.plot([{x1}, {x2}], [{y1}, {y2}], color=matplotlib.colors.hsv_to_rgb([ie/float({len(edges)}), 1.0, 1.0]))')
+            ax.plot([x1, x2], [y1, y2], color=matplotlib.colors.hsv_to_rgb([ie/float(len(edges)), 1.0, 1.0]))
+    width, height = ax.figure.get_size_inches() * ax.figure.get_dpi()
+    for both_hand_peaks in export_peaks:
+        for (x,y,text) in both_hand_peaks:
+            # print(f"new ax.plot({x}, {y}, 'r.')")
+            ax.plot(x, y, 'r.')
+    # print(f'NEW width = {width}, height={height}')
+    bg.draw()
+    canvas = np.fromstring(bg.tostring_rgb(), dtype='uint8').reshape(int(height), int(width), 3)
+    ####
+    # cv2.imwrite('C:/Users/spsar/Downloads/MVI_5177.MOV-transformed/MVI_5177.MOV-GaussianBlur/MVI_5177.MOV-14-modified.jpg', canvas)
+    return cv2.resize(canvas,(math.ceil(width),math.ceil(height)))
+def draw_bar_plot_below_image(image, predictions, title, origImg):
+  """
+  Draws a bar plot of predictions below an image using OpenCV and Matplotlib.
+  Args:
+      image (numpy.ndarray): The image to display.
+      predictions (numpy.ndarray): Array containing prediction probabilities.
+  """
+  fig, ax = plt.subplots(figsize=(origImg.shape[1]/100,origImg.shape[0]/200), dpi=100)
+  plt.title(title)
+  # Create a figure and plot the bar chart
+  labels = list(predictions.keys())
+  probabilities = list(predictions.values())
+  # Create a Seaborn bar plot
+  sns.barplot(x=labels, y=probabilities,ax=ax)  # Default color palette used
+  plt.close(fig)  # Close plot to avoid memory leaks
+  fig.canvas.draw()
+  # Convert the plot to a NumPy array for manipulation
+  plot_image = np.array(fig.canvas.renderer.buffer_rgba())[:, :, :3]  # Remove alpha channel
+  # Resize the plot image to match the width of the original image
+#   plot_image = cv2.resize(plot_image, (image.shape[1], math.ceil(image.shape[0] * 0.8)))  # Adjust height ratio as needed
+  # Combine the image and plot image vertically (stacking)
+  combined_image = np.vstack((image, cv2.resize(plot_image,(image.shape[1],plot_image.shape[0]))))
+  return combined_image
+def add_padding_to_bottom(image, pad_value, pad_height):
+  """
+  Adds padding to the bottom of an image with a specified value.
+  Args:
+      image (numpy.ndarray): The input image.
+      pad_value (tuple or int): The color value to fill the padding area.
+      pad_height (int): The height of the padding to add at the bottom.
+  Returns:
+      numpy.ndarray: The image with padding added.
+  """
+  # Get image dimensions
+  height, width, channels = image.shape
+  padding=np.zeros((pad_height, width, channels), dtype=image.dtype)
+  padding[:,:,:]=pad_value
+#   # Create a new image with the desired height
+#   padded_image = np.zeros((height + pad_height, width, channels), dtype=image.dtype)
+#   # Copy the original image to the top of the padded image
+#   padded_image[:height, :, :] = image
+#   # Fill the padding area with the specified value
+#   if isinstance(pad_value, tuple):  # Check for multiple color values (e.g., BGR)
+#       padded_image[height:, :, :] = pad_value
+#   else:  # Single value for all channels (e.g., black)
+#       padded_image[height:, :, :] = np.full((pad_height, width, 1), pad_value, dtype=image.dtype)
+  return np.vstack((image, padding))
+def crop_to_drawing(image):
+  """
+  Crops an image to the tight bounding rectangle of non-zero pixels.
+  Args:
+      image: A NumPy array representing the image.
+  Returns:
+      A cropped image (NumPy array) containing only the drawing area.
+  """
+  image=np.transpose(image, (2, 0, 1))
+  united_x,united_h=0,0
+  for channel in np.arange(image.shape[0]):
+    x, y, w, h = cv2.boundingRect(image[channel])
+    if x>united_x:
+        united_x=x
+    if h>united_h:
+        united_h=h
+  for channel in np.arange(image.shape[0]):
+    # Crop the image
+    image[channel] = image[channel][y:y+united_h, x:x+united_x]
+  return image.transpose(image, (1,2,0))
+# get max index of 2d array
+def npmax(array):
+    arrayindex = array.argmax(1)
+    arrayvalue = array.max(1)
+    i = arrayvalue.argmax()
+    j = arrayindex[i]
+    return i, j